{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:R4DJ6UDXJUDPEV5PGLYY2QMS75","short_pith_number":"pith:R4DJ6UDX","canonical_record":{"source":{"id":"2604.23135","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-04-25T04:26:19Z","cross_cats_sorted":[],"title_canon_sha256":"5a1ec79842ce630dccbb6a9b5fb2f9ee1f67a5a59f99870d906fe929ca50e67f","abstract_canon_sha256":"f1489ff677e938eaaa6e21dfcd7f5935da64f11fde2eeae444e05f3469082b37"},"schema_version":"1.0"},"canonical_sha256":"8f069f50774d06f257af32f18d4192ff4001533b0b7dc409ee51f8b01abf6079","source":{"kind":"arxiv","id":"2604.23135","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.23135","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"arxiv_version","alias_value":"2604.23135v2","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.23135","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"pith_short_12","alias_value":"R4DJ6UDXJUDP","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"pith_short_16","alias_value":"R4DJ6UDXJUDPEV5P","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"pith_short_8","alias_value":"R4DJ6UDX","created_at":"2026-05-20T00:04:32Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:R4DJ6UDXJUDPEV5PGLYY2QMS75","target":"record","payload":{"canonical_record":{"source":{"id":"2604.23135","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-04-25T04:26:19Z","cross_cats_sorted":[],"title_canon_sha256":"5a1ec79842ce630dccbb6a9b5fb2f9ee1f67a5a59f99870d906fe929ca50e67f","abstract_canon_sha256":"f1489ff677e938eaaa6e21dfcd7f5935da64f11fde2eeae444e05f3469082b37"},"schema_version":"1.0"},"canonical_sha256":"8f069f50774d06f257af32f18d4192ff4001533b0b7dc409ee51f8b01abf6079","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:32.668622Z","signature_b64":"DubwLYyviNpvi6iqcRXm5piyy2PzqW/AnU4wnKPBc4T9Cag+KWiiJdfMYN+tVpCYCyvgu2epzG+7qIxtS4x9DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8f069f50774d06f257af32f18d4192ff4001533b0b7dc409ee51f8b01abf6079","last_reissued_at":"2026-05-20T00:04:32.667757Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:32.667757Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.23135","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W9jj++TZEhEOWaJhm9QcQfI35/jaq5Xr/ZoIaQUYT7r9SdNo7CWNbdQy0PXSwJCLGF53rjodoPUX5awwU4htDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T10:29:12.227570Z"},"content_sha256":"548066c9c2f866c048103d0b611562c9b95275313671f192d29eac484a07aa92","schema_version":"1.0","event_id":"sha256:548066c9c2f866c048103d0b611562c9b95275313671f192d29eac484a07aa92"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:R4DJ6UDXJUDPEV5PGLYY2QMS75","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Characterizing Paraphrase-Induced Failures in Lean 4 Autoformalization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Paraphrase sensitivity in Lean autoformalization stems from compilation-boundary failures rather than semantic divergence among successful formalizations.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aryan Sharma, Ethan Lou, William Feng","submitted_at":"2026-04-25T04:26:19Z","abstract_excerpt":"Lean 4 autoformalization has become increasingly popular in recent years, with frontier language models and open-weight autoformalizers now producing valid formalizations of mathematical theorems. However, these evaluations often rely on single canonical phrasings of theorems and rarely probe whether outputs are robust to natural variation in inputs, while prior work has shown that semantically equivalent paraphrases often induce divergent formal outputs. We study the structure of these divergences in Lean 4 by applying deterministic paraphrase rules to datasets of undergraduate and Olympiad-l"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"the observed paraphrase sensitivity reflects compilation-boundary failures rather than semantic divergence among successful formalizations. In particular, when both baseline and perturbed outputs compile, paired predictions are semantically equivalent under BEq+ and structurally near-identical under GTED.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The 60 deterministic paraphrase rules preserve semantic equivalence of the original theorems, and the BEq+ and GTED metrics fully capture any semantic or structural differences in the formal outputs.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Paraphrase sensitivity in Lean 4 autoformalization arises from compilation failures rather than semantic divergence among successful formalizations.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Paraphrase sensitivity in Lean autoformalization stems from compilation-boundary failures rather than semantic divergence among successful formalizations.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"8b0bb5749d2b1042342d1cb02864190b642ee75e8e7f1efa2329f3630b5072e6"},"source":{"id":"2604.23135","kind":"arxiv","version":2},"verdict":{"id":"e4f0a2f7-2d1c-4718-a50c-6778acf66b27","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-08T08:30:06.087700Z","strongest_claim":"the observed paraphrase sensitivity reflects compilation-boundary failures rather than semantic divergence among successful formalizations. In particular, when both baseline and perturbed outputs compile, paired predictions are semantically equivalent under BEq+ and structurally near-identical under GTED.","one_line_summary":"Paraphrase sensitivity in Lean 4 autoformalization arises from compilation failures rather than semantic divergence among successful formalizations.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The 60 deterministic paraphrase rules preserve semantic equivalence of the original theorems, and the BEq+ and GTED metrics fully capture any semantic or structural differences in the formal outputs.","pith_extraction_headline":"Paraphrase sensitivity in Lean autoformalization stems from compilation-boundary failures rather than semantic divergence among successful formalizations."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.23135/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-19T23:25:11.415140Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"990593442741c2e0f4ff63b67a2f0f68c6a343e9ee8dadc74313089e1057befe"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"e4f0a2f7-2d1c-4718-a50c-6778acf66b27"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ITnqQ67xSHQNv+Imt6LyH/3U3Xt12Pdi/n/Nv8yhHX9C9Iq5wU4u1xz7AfxxwAUPbzFfzLGBqTkK2YUm+2TdAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T10:29:12.228074Z"},"content_sha256":"f31e259b5ed3166b8ebdfb68146d5be7c7ffba0e9299c70a108a326ccca5b718","schema_version":"1.0","event_id":"sha256:f31e259b5ed3166b8ebdfb68146d5be7c7ffba0e9299c70a108a326ccca5b718"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75/bundle.json","state_url":"https://pith.science/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T10:29:12Z","links":{"resolver":"https://pith.science/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75","bundle":"https://pith.science/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75/bundle.json","state":"https://pith.science/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75/state.json","well_known_bundle":"https://pith.science/.well-known/pith/R4DJ6UDXJUDPEV5PGLYY2QMS75/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:R4DJ6UDXJUDPEV5PGLYY2QMS75","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f1489ff677e938eaaa6e21dfcd7f5935da64f11fde2eeae444e05f3469082b37","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-04-25T04:26:19Z","title_canon_sha256":"5a1ec79842ce630dccbb6a9b5fb2f9ee1f67a5a59f99870d906fe929ca50e67f"},"schema_version":"1.0","source":{"id":"2604.23135","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.23135","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"arxiv_version","alias_value":"2604.23135v2","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.23135","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"pith_short_12","alias_value":"R4DJ6UDXJUDP","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"pith_short_16","alias_value":"R4DJ6UDXJUDPEV5P","created_at":"2026-05-20T00:04:32Z"},{"alias_kind":"pith_short_8","alias_value":"R4DJ6UDX","created_at":"2026-05-20T00:04:32Z"}],"graph_snapshots":[{"event_id":"sha256:f31e259b5ed3166b8ebdfb68146d5be7c7ffba0e9299c70a108a326ccca5b718","target":"graph","created_at":"2026-05-20T00:04:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"the observed paraphrase sensitivity reflects compilation-boundary failures rather than semantic divergence among successful formalizations. In particular, when both baseline and perturbed outputs compile, paired predictions are semantically equivalent under BEq+ and structurally near-identical under GTED."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The 60 deterministic paraphrase rules preserve semantic equivalence of the original theorems, and the BEq+ and GTED metrics fully capture any semantic or structural differences in the formal outputs."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Paraphrase sensitivity in Lean 4 autoformalization arises from compilation failures rather than semantic divergence among successful formalizations."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Paraphrase sensitivity in Lean autoformalization stems from compilation-boundary failures rather than semantic divergence among successful formalizations."}],"snapshot_sha256":"8b0bb5749d2b1042342d1cb02864190b642ee75e8e7f1efa2329f3630b5072e6"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T23:25:11.415140Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2604.23135/integrity.json","findings":[],"snapshot_sha256":"990593442741c2e0f4ff63b67a2f0f68c6a343e9ee8dadc74313089e1057befe","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Lean 4 autoformalization has become increasingly popular in recent years, with frontier language models and open-weight autoformalizers now producing valid formalizations of mathematical theorems. However, these evaluations often rely on single canonical phrasings of theorems and rarely probe whether outputs are robust to natural variation in inputs, while prior work has shown that semantically equivalent paraphrases often induce divergent formal outputs. We study the structure of these divergences in Lean 4 by applying deterministic paraphrase rules to datasets of undergraduate and Olympiad-l","authors_text":"Aryan Sharma, Ethan Lou, William Feng","cross_cats":[],"headline":"Paraphrase sensitivity in Lean autoformalization stems from compilation-boundary failures rather than semantic divergence among successful formalizations.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-04-25T04:26:19Z","title":"Characterizing Paraphrase-Induced Failures in Lean 4 Autoformalization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.23135","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-08T08:30:06.087700Z","id":"e4f0a2f7-2d1c-4718-a50c-6778acf66b27","model_set":{"reader":"grok-4.3"},"one_line_summary":"Paraphrase sensitivity in Lean 4 autoformalization arises from compilation failures rather than semantic divergence among successful formalizations.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Paraphrase sensitivity in Lean autoformalization stems from compilation-boundary failures rather than semantic divergence among successful formalizations.","strongest_claim":"the observed paraphrase sensitivity reflects compilation-boundary failures rather than semantic divergence among successful formalizations. In particular, when both baseline and perturbed outputs compile, paired predictions are semantically equivalent under BEq+ and structurally near-identical under GTED.","weakest_assumption":"The 60 deterministic paraphrase rules preserve semantic equivalence of the original theorems, and the BEq+ and GTED metrics fully capture any semantic or structural differences in the formal outputs."}},"verdict_id":"e4f0a2f7-2d1c-4718-a50c-6778acf66b27"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:548066c9c2f866c048103d0b611562c9b95275313671f192d29eac484a07aa92","target":"record","created_at":"2026-05-20T00:04:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f1489ff677e938eaaa6e21dfcd7f5935da64f11fde2eeae444e05f3469082b37","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-04-25T04:26:19Z","title_canon_sha256":"5a1ec79842ce630dccbb6a9b5fb2f9ee1f67a5a59f99870d906fe929ca50e67f"},"schema_version":"1.0","source":{"id":"2604.23135","kind":"arxiv","version":2}},"canonical_sha256":"8f069f50774d06f257af32f18d4192ff4001533b0b7dc409ee51f8b01abf6079","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8f069f50774d06f257af32f18d4192ff4001533b0b7dc409ee51f8b01abf6079","first_computed_at":"2026-05-20T00:04:32.667757Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:32.667757Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DubwLYyviNpvi6iqcRXm5piyy2PzqW/AnU4wnKPBc4T9Cag+KWiiJdfMYN+tVpCYCyvgu2epzG+7qIxtS4x9DA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:32.668622Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.23135","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:548066c9c2f866c048103d0b611562c9b95275313671f192d29eac484a07aa92","sha256:f31e259b5ed3166b8ebdfb68146d5be7c7ffba0e9299c70a108a326ccca5b718"],"state_sha256":"e249d97bd881fcf70c9b5b60a112d0e258760c41a7de17631bbac39e8120f6ea"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Y5782jaLbdEndzmFi6w2yhrn9KBlPJZUKKY8FTLpTy7wqZJXVyveI9+SZ7lASN2xNNnqGpt1Usss7amBZ0T5BQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T10:29:12.230397Z","bundle_sha256":"218b8da1c163b897790667974318c84c11c1def51e5d5b72f658cc1691837418"}}