{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:XRC2M3DMBNRNQLHUOCM66SZSDE","short_pith_number":"pith:XRC2M3DM","canonical_record":{"source":{"id":"2507.16806","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-07-22T17:56:01Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"68d141714159478bdb20aabdcdfe4f788198b8c7659ae00f2a48e36f3ca05749","abstract_canon_sha256":"7858d66cac17213bf2fb66ea392a6b71b0f0b00e1f4b2a19d7548d5f2281bc28"},"schema_version":"1.0"},"canonical_sha256":"bc45a66c6c0b62d82cf47099ef4b32191daee357b1472fb40a589f5b8a32c930","source":{"kind":"arxiv","id":"2507.16806","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.16806","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"arxiv_version","alias_value":"2507.16806v2","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.16806","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"pith_short_12","alias_value":"XRC2M3DMBNRN","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"pith_short_16","alias_value":"XRC2M3DMBNRNQLHU","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"pith_short_8","alias_value":"XRC2M3DM","created_at":"2026-05-20T00:00:22Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:XRC2M3DMBNRNQLHUOCM66SZSDE","target":"record","payload":{"canonical_record":{"source":{"id":"2507.16806","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-07-22T17:56:01Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"68d141714159478bdb20aabdcdfe4f788198b8c7659ae00f2a48e36f3ca05749","abstract_canon_sha256":"7858d66cac17213bf2fb66ea392a6b71b0f0b00e1f4b2a19d7548d5f2281bc28"},"schema_version":"1.0"},"canonical_sha256":"bc45a66c6c0b62d82cf47099ef4b32191daee357b1472fb40a589f5b8a32c930","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:22.345593Z","signature_b64":"QDHO3gSOzBSSL80lLZeg1bSZ+ANYmgIfQTrTKoGcAbxIHDSPFScv6meqRo0gKxNtGtpAoPzio38mp7InNJgMBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bc45a66c6c0b62d82cf47099ef4b32191daee357b1472fb40a589f5b8a32c930","last_reissued_at":"2026-05-20T00:00:22.344833Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:22.344833Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2507.16806","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aFpZKKJ1ggC5z72rs2iE0w3x+0nKCI5q5asYweHfd5LWD6RGcmZC29i3NHiXDtlkqp5bL9VVGSR38FBHaJmWAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T11:41:16.336287Z"},"content_sha256":"67685170c632b3e804ed06d423bedbee3d06d30a7e957102779c5c06234e98c8","schema_version":"1.0","event_id":"sha256:67685170c632b3e804ed06d423bedbee3d06d30a7e957102779c5c06234e98c8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:XRC2M3DMBNRNQLHUOCM66SZSDE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond Binary Rewards: Training LMs to Reason About Their Uncertainty","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Idan Shenfeld, Isha Puri, Jacob Andreas, Leshem Choshen, Mehul Damani, Stewart Slocum, Yoon Kim","submitted_at":"2025-07-22T17:56:01Z","abstract_excerpt":"When language models (LMs) are trained via reinforcement learning (RL) to generate natural language \"reasoning chains\", their performance improves on a variety of difficult question answering tasks. Today, almost all successful applications of RL for reasoning use binary reward functions that evaluate the correctness of LM outputs. Because such reward functions do not penalize guessing or low-confidence outputs, they often have the unintended side-effect of degrading calibration and increasing the rate at which LMs generate incorrect responses (or \"hallucinate\") in other problem domains. This "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.16806","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.16806/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"oI1Lr7d0mLIBzAUsk6ByI3Y2HzQAazmgCCzP+/4iwy9AYlOlrQIFXVKonC+DZLf0ThiOK4uJOn6UYvbKTEfWCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T11:41:16.337043Z"},"content_sha256":"d983d97050bec4ad5877afab2680edc098cc8ae99f6e7d93c3dff16af88c2116","schema_version":"1.0","event_id":"sha256:d983d97050bec4ad5877afab2680edc098cc8ae99f6e7d93c3dff16af88c2116"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XRC2M3DMBNRNQLHUOCM66SZSDE/bundle.json","state_url":"https://pith.science/pith/XRC2M3DMBNRNQLHUOCM66SZSDE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XRC2M3DMBNRNQLHUOCM66SZSDE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T11:41:16Z","links":{"resolver":"https://pith.science/pith/XRC2M3DMBNRNQLHUOCM66SZSDE","bundle":"https://pith.science/pith/XRC2M3DMBNRNQLHUOCM66SZSDE/bundle.json","state":"https://pith.science/pith/XRC2M3DMBNRNQLHUOCM66SZSDE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XRC2M3DMBNRNQLHUOCM66SZSDE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:XRC2M3DMBNRNQLHUOCM66SZSDE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7858d66cac17213bf2fb66ea392a6b71b0f0b00e1f4b2a19d7548d5f2281bc28","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-07-22T17:56:01Z","title_canon_sha256":"68d141714159478bdb20aabdcdfe4f788198b8c7659ae00f2a48e36f3ca05749"},"schema_version":"1.0","source":{"id":"2507.16806","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.16806","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"arxiv_version","alias_value":"2507.16806v2","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.16806","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"pith_short_12","alias_value":"XRC2M3DMBNRN","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"pith_short_16","alias_value":"XRC2M3DMBNRNQLHU","created_at":"2026-05-20T00:00:22Z"},{"alias_kind":"pith_short_8","alias_value":"XRC2M3DM","created_at":"2026-05-20T00:00:22Z"}],"graph_snapshots":[{"event_id":"sha256:d983d97050bec4ad5877afab2680edc098cc8ae99f6e7d93c3dff16af88c2116","target":"graph","created_at":"2026-05-20T00:00:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2507.16806/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"When language models (LMs) are trained via reinforcement learning (RL) to generate natural language \"reasoning chains\", their performance improves on a variety of difficult question answering tasks. Today, almost all successful applications of RL for reasoning use binary reward functions that evaluate the correctness of LM outputs. Because such reward functions do not penalize guessing or low-confidence outputs, they often have the unintended side-effect of degrading calibration and increasing the rate at which LMs generate incorrect responses (or \"hallucinate\") in other problem domains. This ","authors_text":"Idan Shenfeld, Isha Puri, Jacob Andreas, Leshem Choshen, Mehul Damani, Stewart Slocum, Yoon Kim","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-07-22T17:56:01Z","title":"Beyond Binary Rewards: Training LMs to Reason About Their Uncertainty"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.16806","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:67685170c632b3e804ed06d423bedbee3d06d30a7e957102779c5c06234e98c8","target":"record","created_at":"2026-05-20T00:00:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7858d66cac17213bf2fb66ea392a6b71b0f0b00e1f4b2a19d7548d5f2281bc28","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-07-22T17:56:01Z","title_canon_sha256":"68d141714159478bdb20aabdcdfe4f788198b8c7659ae00f2a48e36f3ca05749"},"schema_version":"1.0","source":{"id":"2507.16806","kind":"arxiv","version":2}},"canonical_sha256":"bc45a66c6c0b62d82cf47099ef4b32191daee357b1472fb40a589f5b8a32c930","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bc45a66c6c0b62d82cf47099ef4b32191daee357b1472fb40a589f5b8a32c930","first_computed_at":"2026-05-20T00:00:22.344833Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:22.344833Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QDHO3gSOzBSSL80lLZeg1bSZ+ANYmgIfQTrTKoGcAbxIHDSPFScv6meqRo0gKxNtGtpAoPzio38mp7InNJgMBA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:22.345593Z","signed_message":"canonical_sha256_bytes"},"source_id":"2507.16806","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:67685170c632b3e804ed06d423bedbee3d06d30a7e957102779c5c06234e98c8","sha256:d983d97050bec4ad5877afab2680edc098cc8ae99f6e7d93c3dff16af88c2116"],"state_sha256":"459ffb12fac0c404268176dc582d27c45845c3cffb2c764d1143b0a8e462befc"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BijzY+yNgebaNP4QU6EKFaIaDsTa2HpvqLSNXVYYfPZ1AYAqymZ4+QSkvmfsbiakzrKspvp0EjiFuhz27eWDBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T11:41:16.340507Z","bundle_sha256":"7a6511beec74b0b639378b5d0aa9dbf84826a60cca2158fd563bbde9b5db444f"}}