{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ADY4ZJ3TVYZOAB5TR3BMSLP6WN","short_pith_number":"pith:ADY4ZJ3T","schema_version":"1.0","canonical_sha256":"00f1cca773ae32e007b38ec2c92dfeb35e52423aacd82369380ce0b23c46e014","source":{"kind":"arxiv","id":"2606.01160","version":1},"attestation_state":"computed","paper":{"title":"Expected Value Alignment for Generative Reward Modeling in Formal Mathematics Verification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Haotao Tan, Mingyu Li, Shihao Ji, Zihui Song","submitted_at":"2026-05-31T11:06:48Z","abstract_excerpt":"Large Language Models (LLMs) are increasingly used with formal interactive theorem provers such as Lean 4. Scaling these systems with reinforcement learning or search methods requires process reward models (PRMs) that can evaluate intermediate reasoning steps. Existing reward-model designs expose a practical trade-off. Value-head models provide continuous scores but modify the generative model interface, while generative reward models preserve textual rationales but are poorly matched to continuous floating-point regression because numeric values are split across tokens. We introduce Expected "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.01160","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-31T11:06:48Z","cross_cats_sorted":[],"title_canon_sha256":"cb49f2d586edbb25c3e07dfd4dda36a6667682c2f692bebd157d65165f19816c","abstract_canon_sha256":"66517b534962b3d210f41a8760cdf1f83adcb5f921105bd04b86ac8ede2ea8cb"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T02:04:25.381642Z","signature_b64":"KWBDNuc6fR00OWqGitG2PYiiJ/kwpmYv9L6ARLKhHmsQ/iKv7Re1KDSdkb8RxJorBvV9UBMbyKucas22EhsrCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"00f1cca773ae32e007b38ec2c92dfeb35e52423aacd82369380ce0b23c46e014","last_reissued_at":"2026-06-02T02:04:25.381194Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T02:04:25.381194Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Expected Value Alignment for Generative Reward Modeling in Formal Mathematics Verification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Haotao Tan, Mingyu Li, Shihao Ji, Zihui Song","submitted_at":"2026-05-31T11:06:48Z","abstract_excerpt":"Large Language Models (LLMs) are increasingly used with formal interactive theorem provers such as Lean 4. Scaling these systems with reinforcement learning or search methods requires process reward models (PRMs) that can evaluate intermediate reasoning steps. Existing reward-model designs expose a practical trade-off. Value-head models provide continuous scores but modify the generative model interface, while generative reward models preserve textual rationales but are poorly matched to continuous floating-point regression because numeric values are split across tokens. We introduce Expected "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.01160","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.01160/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.01160","created_at":"2026-06-02T02:04:25.381250+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.01160v1","created_at":"2026-06-02T02:04:25.381250+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.01160","created_at":"2026-06-02T02:04:25.381250+00:00"},{"alias_kind":"pith_short_12","alias_value":"ADY4ZJ3TVYZO","created_at":"2026-06-02T02:04:25.381250+00:00"},{"alias_kind":"pith_short_16","alias_value":"ADY4ZJ3TVYZOAB5T","created_at":"2026-06-02T02:04:25.381250+00:00"},{"alias_kind":"pith_short_8","alias_value":"ADY4ZJ3T","created_at":"2026-06-02T02:04:25.381250+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN","json":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN.json","graph_json":"https://pith.science/api/pith-number/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/graph.json","events_json":"https://pith.science/api/pith-number/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/events.json","paper":"https://pith.science/paper/ADY4ZJ3T"},"agent_actions":{"view_html":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN","download_json":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN.json","view_paper":"https://pith.science/paper/ADY4ZJ3T","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.01160&json=true","fetch_graph":"https://pith.science/api/pith-number/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/graph.json","fetch_events":"https://pith.science/api/pith-number/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/action/storage_attestation","attest_author":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/action/author_attestation","sign_citation":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/action/citation_signature","submit_replication":"https://pith.science/pith/ADY4ZJ3TVYZOAB5TR3BMSLP6WN/action/replication_record"}},"created_at":"2026-06-02T02:04:25.381250+00:00","updated_at":"2026-06-02T02:04:25.381250+00:00"}