{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:ZIFSRQIG6DU6TB4HHJ6IBPX4ON","short_pith_number":"pith:ZIFSRQIG","schema_version":"1.0","canonical_sha256":"ca0b28c106f0e9e987873a7c80befc7354a3231f77704ebb0e1a749e4f426042","source":{"kind":"arxiv","id":"2509.25760","version":2},"attestation_state":"computed","paper":{"title":"TruthRL: Incentivizing Truthful LLMs via Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Anuj Kumar, Jiaqi Wang, Jingxiang Chen, Kai Sun, Mohammad Kachuee, Nicolas Scheffer, Rakesh Wanga, Rulin Shao, Teja Gollapudi, Wen-tau Yih, Xiao Yang, Xin Luna Dong, Yiwei Liao, Yu Meng, Zhepei Wei","submitted_at":"2025-09-30T04:25:17Z","abstract_excerpt":"While large language models (LLMs) have demonstrated strong performance on factoid question answering, they are still prone to hallucination and untruthful responses, particularly when tasks demand information outside their parametric knowledge. Indeed, truthfulness requires more than accuracy -- models must also recognize uncertainty and abstain when unsure to avoid hallucinations. This presents a fundamental challenge for existing methods: approaches that optimize for accuracy often amplify hallucinations, while those that encourage abstention can become overly conservative, sacrificing corr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2509.25760","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-09-30T04:25:17Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"e1c3c3c631683f6e3fb25e4a819449fd968b63fc1d68a984c4f6ae92b47504f5","abstract_canon_sha256":"1c2c436911ae9aa689884bc07c28cf03a67e5bfba21e49b5a220226458ab093e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:08:30.249265Z","signature_b64":"iAL8OIWxMAHilIWuVrqXjo/zs2c4WdyN17PZy73xPKdXVmfFv7v/rngP1okKJ6X/vpNNBXaczWYHvR7RFDsiAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ca0b28c106f0e9e987873a7c80befc7354a3231f77704ebb0e1a749e4f426042","last_reissued_at":"2026-06-10T01:08:30.248163Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:08:30.248163Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TruthRL: Incentivizing Truthful LLMs via Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Anuj Kumar, Jiaqi Wang, Jingxiang Chen, Kai Sun, Mohammad Kachuee, Nicolas Scheffer, Rakesh Wanga, Rulin Shao, Teja Gollapudi, Wen-tau Yih, Xiao Yang, Xin Luna Dong, Yiwei Liao, Yu Meng, Zhepei Wei","submitted_at":"2025-09-30T04:25:17Z","abstract_excerpt":"While large language models (LLMs) have demonstrated strong performance on factoid question answering, they are still prone to hallucination and untruthful responses, particularly when tasks demand information outside their parametric knowledge. Indeed, truthfulness requires more than accuracy -- models must also recognize uncertainty and abstain when unsure to avoid hallucinations. This presents a fundamental challenge for existing methods: approaches that optimize for accuracy often amplify hallucinations, while those that encourage abstention can become overly conservative, sacrificing corr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.25760","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.25760/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2509.25760","created_at":"2026-06-10T01:08:30.248307+00:00"},{"alias_kind":"arxiv_version","alias_value":"2509.25760v2","created_at":"2026-06-10T01:08:30.248307+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.25760","created_at":"2026-06-10T01:08:30.248307+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZIFSRQIG6DU6","created_at":"2026-06-10T01:08:30.248307+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZIFSRQIG6DU6TB4H","created_at":"2026-06-10T01:08:30.248307+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZIFSRQIG","created_at":"2026-06-10T01:08:30.248307+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":5,"sample":[{"citing_arxiv_id":"2605.08401","citing_title":"AIPO: Learning to Reason from Active Interaction","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2602.03452","citing_title":"Beyond Variance: Prompt-Efficient RLVR via Rare-Event Amplification and Bidirectional Pairing","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03216","citing_title":"BAS: A Decision-Theoretic Approach to Evaluating Large Language Model Confidence","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2605.08401","citing_title":"AIPO: Learning to Reason from Active Interaction","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07153","citing_title":"Beyond Reasoning: Reinforcement Learning Unlocks Parametric Knowledge in LLMs","ref_index":48,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON","json":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON.json","graph_json":"https://pith.science/api/pith-number/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/graph.json","events_json":"https://pith.science/api/pith-number/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/events.json","paper":"https://pith.science/paper/ZIFSRQIG"},"agent_actions":{"view_html":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON","download_json":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON.json","view_paper":"https://pith.science/paper/ZIFSRQIG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2509.25760&json=true","fetch_graph":"https://pith.science/api/pith-number/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/graph.json","fetch_events":"https://pith.science/api/pith-number/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/action/storage_attestation","attest_author":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/action/author_attestation","sign_citation":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/action/citation_signature","submit_replication":"https://pith.science/pith/ZIFSRQIG6DU6TB4HHJ6IBPX4ON/action/replication_record"}},"created_at":"2026-06-10T01:08:30.248307+00:00","updated_at":"2026-06-10T01:08:30.248307+00:00"}