{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:SMEZMH2XHN3UOKTAYVYDTMSQTG","short_pith_number":"pith:SMEZMH2X","canonical_record":{"source":{"id":"2505.14479","kind":"arxiv","version":9},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-20T15:13:32Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"0efb8506f134bc83a6392e5fb9e845ddcb611ecaccbc3ebe2dcbc28c234978af","abstract_canon_sha256":"6e8a0a6c6a58a36468481d6796d1d1b33d52c5b4fbb664e0ada189d181f95f47"},"schema_version":"1.0"},"canonical_sha256":"9309961f573b77472a60c57039b25099848211aeae4c99de92d92b2ca284b783","source":{"kind":"arxiv","id":"2505.14479","version":9},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.14479","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"arxiv_version","alias_value":"2505.14479v9","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.14479","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"pith_short_12","alias_value":"SMEZMH2XHN3U","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"pith_short_16","alias_value":"SMEZMH2XHN3UOKTA","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"pith_short_8","alias_value":"SMEZMH2X","created_at":"2026-05-26T01:03:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:SMEZMH2XHN3UOKTAYVYDTMSQTG","target":"record","payload":{"canonical_record":{"source":{"id":"2505.14479","kind":"arxiv","version":9},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-20T15:13:32Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"0efb8506f134bc83a6392e5fb9e845ddcb611ecaccbc3ebe2dcbc28c234978af","abstract_canon_sha256":"6e8a0a6c6a58a36468481d6796d1d1b33d52c5b4fbb664e0ada189d181f95f47"},"schema_version":"1.0"},"canonical_sha256":"9309961f573b77472a60c57039b25099848211aeae4c99de92d92b2ca284b783","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:12.739904Z","signature_b64":"qTXVm0y4l8+Wq5jUL+plarE5okzxXmK9/0PEGwIIp4I82s6vKO2KruZh+TNRSo1Fqr+BdzuUxcVRXRvVZ38JCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9309961f573b77472a60c57039b25099848211aeae4c99de92d92b2ca284b783","last_reissued_at":"2026-05-26T01:03:12.739303Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:12.739303Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2505.14479","source_version":9,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"88IsGB3HLhbpk4en4SDorVqqpLb4nF7k+eGRqdnI29y7C4Yfv5j0rGFPaP8kwXYJWibz4iFWOsG9Y+tqNZEhBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T12:10:12.655305Z"},"content_sha256":"1d9b2c75bdabc134cd3d084c2bd5b7591d830ec002de14a3375748c0a302076a","schema_version":"1.0","event_id":"sha256:1d9b2c75bdabc134cd3d084c2bd5b7591d830ec002de14a3375748c0a302076a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:SMEZMH2XHN3UOKTAYVYDTMSQTG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Neuro-Symbolic Approach for Reliable Proof Generation with LLMs: A Case Study in Euclidean Geometry","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Retrieving similar proofs and verifier feedback boosts an LLM's geometry proof accuracy by 58 to 70 percent.","cross_cats":["cs.CL"],"primary_cat":"cs.AI","authors_text":"Dafna Shahaf, Eitan Stern, Oren Sultan","submitted_at":"2025-05-20T15:13:32Z","abstract_excerpt":"Large language models (LLMs) struggle with formal domains that require rigorous logical deduction and symbolic reasoning, such as mathematical proof generation. We propose a neuro-symbolic approach that combines LLMs' generative strengths with structured components to overcome this challenge. As a proof of concept, we focus on SAT-level geometry problems. Our approach is two-fold: (1) We retrieve analogous problems and use their proofs to guide the LLM, and (2) a formal verifier evaluates the generated proofs and provides feedback, helping the model fix incorrect proofs. Our method significant"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We demonstrate that our method significantly improves proof accuracy for OpenAI's o1 model (58%-70% improvement); both analogous problems and the verifier's feedback contribute to these gains.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The formal verifier can accurately evaluate generated proofs and provide feedback that helps the LLM fix errors, assuming the verifier is complete for the problem domain and the feedback is effectively usable by the model.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A neuro-symbolic approach using analogous problem retrieval and formal verification feedback improves LLM proof generation accuracy on Euclidean geometry problems by 58-70% for the o1 model.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Retrieving similar proofs and verifier feedback boosts an LLM's geometry proof accuracy by 58 to 70 percent.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"75a148ef883419b710599cf734fe411a7318eb1bd5353732edc238d8d2de7ed9"},"source":{"id":"2505.14479","kind":"arxiv","version":9},"verdict":{"id":"4262254c-7422-4f62-82b6-8918e6127419","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-22T14:04:04.868793Z","strongest_claim":"We demonstrate that our method significantly improves proof accuracy for OpenAI's o1 model (58%-70% improvement); both analogous problems and the verifier's feedback contribute to these gains.","one_line_summary":"A neuro-symbolic approach using analogous problem retrieval and formal verification feedback improves LLM proof generation accuracy on Euclidean geometry problems by 58-70% for the o1 model.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The formal verifier can accurately evaluate generated proofs and provide feedback that helps the LLM fix errors, assuming the verifier is complete for the problem domain and the feedback is effectively usable by the model.","pith_extraction_headline":"Retrieving similar proofs and verifier feedback boosts an LLM's geometry proof accuracy by 58 to 70 percent."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2505.14479/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4e2a6a098e61ac106a1349577375879fa3db69d447bd404a81527da1f84b53f8"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"4262254c-7422-4f62-82b6-8918e6127419"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mrWg9G6yaLp1ZngjarqTi7gcttFjDpU3s3PvpTT5pLgrlnccohgcOk/2ligASh6oasksvrGo+duXgSM8KrT8CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T12:10:12.655791Z"},"content_sha256":"2c09747dce8d9298554cf5ff66cedd85a49afe7b48d5a9568b5c265d4f3c3225","schema_version":"1.0","event_id":"sha256:2c09747dce8d9298554cf5ff66cedd85a49afe7b48d5a9568b5c265d4f3c3225"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG/bundle.json","state_url":"https://pith.science/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T12:10:12Z","links":{"resolver":"https://pith.science/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG","bundle":"https://pith.science/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG/bundle.json","state":"https://pith.science/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SMEZMH2XHN3UOKTAYVYDTMSQTG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:SMEZMH2XHN3UOKTAYVYDTMSQTG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6e8a0a6c6a58a36468481d6796d1d1b33d52c5b4fbb664e0ada189d181f95f47","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-20T15:13:32Z","title_canon_sha256":"0efb8506f134bc83a6392e5fb9e845ddcb611ecaccbc3ebe2dcbc28c234978af"},"schema_version":"1.0","source":{"id":"2505.14479","kind":"arxiv","version":9}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.14479","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"arxiv_version","alias_value":"2505.14479v9","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.14479","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"pith_short_12","alias_value":"SMEZMH2XHN3U","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"pith_short_16","alias_value":"SMEZMH2XHN3UOKTA","created_at":"2026-05-26T01:03:12Z"},{"alias_kind":"pith_short_8","alias_value":"SMEZMH2X","created_at":"2026-05-26T01:03:12Z"}],"graph_snapshots":[{"event_id":"sha256:2c09747dce8d9298554cf5ff66cedd85a49afe7b48d5a9568b5c265d4f3c3225","target":"graph","created_at":"2026-05-26T01:03:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We demonstrate that our method significantly improves proof accuracy for OpenAI's o1 model (58%-70% improvement); both analogous problems and the verifier's feedback contribute to these gains."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The formal verifier can accurately evaluate generated proofs and provide feedback that helps the LLM fix errors, assuming the verifier is complete for the problem domain and the feedback is effectively usable by the model."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A neuro-symbolic approach using analogous problem retrieval and formal verification feedback improves LLM proof generation accuracy on Euclidean geometry problems by 58-70% for the o1 model."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Retrieving similar proofs and verifier feedback boosts an LLM's geometry proof accuracy by 58 to 70 percent."}],"snapshot_sha256":"75a148ef883419b710599cf734fe411a7318eb1bd5353732edc238d8d2de7ed9"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"4e2a6a098e61ac106a1349577375879fa3db69d447bd404a81527da1f84b53f8"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2505.14479/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models (LLMs) struggle with formal domains that require rigorous logical deduction and symbolic reasoning, such as mathematical proof generation. We propose a neuro-symbolic approach that combines LLMs' generative strengths with structured components to overcome this challenge. As a proof of concept, we focus on SAT-level geometry problems. Our approach is two-fold: (1) We retrieve analogous problems and use their proofs to guide the LLM, and (2) a formal verifier evaluates the generated proofs and provides feedback, helping the model fix incorrect proofs. Our method significant","authors_text":"Dafna Shahaf, Eitan Stern, Oren Sultan","cross_cats":["cs.CL"],"headline":"Retrieving similar proofs and verifier feedback boosts an LLM's geometry proof accuracy by 58 to 70 percent.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-20T15:13:32Z","title":"A Neuro-Symbolic Approach for Reliable Proof Generation with LLMs: A Case Study in Euclidean Geometry"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.14479","kind":"arxiv","version":9},"verdict":{"created_at":"2026-05-22T14:04:04.868793Z","id":"4262254c-7422-4f62-82b6-8918e6127419","model_set":{"reader":"grok-4.3"},"one_line_summary":"A neuro-symbolic approach using analogous problem retrieval and formal verification feedback improves LLM proof generation accuracy on Euclidean geometry problems by 58-70% for the o1 model.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Retrieving similar proofs and verifier feedback boosts an LLM's geometry proof accuracy by 58 to 70 percent.","strongest_claim":"We demonstrate that our method significantly improves proof accuracy for OpenAI's o1 model (58%-70% improvement); both analogous problems and the verifier's feedback contribute to these gains.","weakest_assumption":"The formal verifier can accurately evaluate generated proofs and provide feedback that helps the LLM fix errors, assuming the verifier is complete for the problem domain and the feedback is effectively usable by the model."}},"verdict_id":"4262254c-7422-4f62-82b6-8918e6127419"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1d9b2c75bdabc134cd3d084c2bd5b7591d830ec002de14a3375748c0a302076a","target":"record","created_at":"2026-05-26T01:03:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6e8a0a6c6a58a36468481d6796d1d1b33d52c5b4fbb664e0ada189d181f95f47","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-05-20T15:13:32Z","title_canon_sha256":"0efb8506f134bc83a6392e5fb9e845ddcb611ecaccbc3ebe2dcbc28c234978af"},"schema_version":"1.0","source":{"id":"2505.14479","kind":"arxiv","version":9}},"canonical_sha256":"9309961f573b77472a60c57039b25099848211aeae4c99de92d92b2ca284b783","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9309961f573b77472a60c57039b25099848211aeae4c99de92d92b2ca284b783","first_computed_at":"2026-05-26T01:03:12.739303Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:12.739303Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qTXVm0y4l8+Wq5jUL+plarE5okzxXmK9/0PEGwIIp4I82s6vKO2KruZh+TNRSo1Fqr+BdzuUxcVRXRvVZ38JCA==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:12.739904Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.14479","source_kind":"arxiv","source_version":9}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1d9b2c75bdabc134cd3d084c2bd5b7591d830ec002de14a3375748c0a302076a","sha256:2c09747dce8d9298554cf5ff66cedd85a49afe7b48d5a9568b5c265d4f3c3225"],"state_sha256":"00095abc73a913cfbc17b58b76da86706891e8dfd9ff087566d312645c825a3c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"G7P7xaVGWGGzaOtMCh1qmCbr2WsBS6dzzuEIQlZ3nQV8j5rg/OaMgZSkCY0i3iDiMSuJtqP03A4vDmkqzyYsAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T12:10:12.657941Z","bundle_sha256":"37bfbf75dfb4edc8f01561bf7e535b33a98a5acd0299caa5f5620ed8039c07d6"}}