{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:HX4AWQBYYEMWKSWKNMFMG5NHSL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0d76e7e8c7a0812841ef516fe52ef8c312a570dbfc63a74fb88c723f5c8ae12a","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-25T13:42:37Z","title_canon_sha256":"0a80eed5893be77620d459fcd67fac1c0d59fd868de592f7cf375bc4384e0caf"},"schema_version":"1.0","source":{"id":"2605.25850","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.25850","created_at":"2026-05-26T02:05:15Z"},{"alias_kind":"arxiv_version","alias_value":"2605.25850v1","created_at":"2026-05-26T02:05:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.25850","created_at":"2026-05-26T02:05:15Z"},{"alias_kind":"pith_short_12","alias_value":"HX4AWQBYYEMW","created_at":"2026-05-26T02:05:15Z"},{"alias_kind":"pith_short_16","alias_value":"HX4AWQBYYEMWKSWK","created_at":"2026-05-26T02:05:15Z"},{"alias_kind":"pith_short_8","alias_value":"HX4AWQBY","created_at":"2026-05-26T02:05:15Z"}],"graph_snapshots":[{"event_id":"sha256:3b9d1703c9929734340dcf545c1cdd3805dba170c901d08660ffd5b06f114ab7","target":"graph","created_at":"2026-05-26T02:05:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.25850/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"This paper investigates large language model (LLM) abstention learning, specifically using ternary reward, which incentivize truthfulness in large language models. This paper extends that idea by moving from a ternary reward to a Trajectory-Informed advantage reweighting, dynamically re-weights the abstention reward during Group Relative Policy Optimization (GRPO) training. The objective of this work focuses on abstention learning instead of improving truthfulness, serving as an exploration into hallucination reduction. The novelty of this paper lies in methodological innovation, advantage re-","authors_text":"Muyu Pan, Nan Zhang, Philip Shin, Rui Zhang, Shu Zhao, Varun Parekh, Vijaykrishnan Narayanan","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-25T13:42:37Z","title":"TIAR: Trajectory-Informed Advantage Reweighting for LLM Abstention Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.25850","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:11d54d4e2717c80e1f49edafbbf93e96ce8a53813982b2ec7c4e9cf3af862826","target":"record","created_at":"2026-05-26T02:05:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0d76e7e8c7a0812841ef516fe52ef8c312a570dbfc63a74fb88c723f5c8ae12a","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-25T13:42:37Z","title_canon_sha256":"0a80eed5893be77620d459fcd67fac1c0d59fd868de592f7cf375bc4384e0caf"},"schema_version":"1.0","source":{"id":"2605.25850","kind":"arxiv","version":1}},"canonical_sha256":"3df80b4038c119654aca6b0ac375a792ee2e382de1be1629067926a60a76caac","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3df80b4038c119654aca6b0ac375a792ee2e382de1be1629067926a60a76caac","first_computed_at":"2026-05-26T02:05:15.227032Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:05:15.227032Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"ibB5GZQxY9R65Kiw66YR8lZ+x56rodDS//dVAlx0xLd83POD4deiJnhe3pC+rmWThpHMwBPnYnRMWO0Epm/1Dw==","signature_status":"signed_v1","signed_at":"2026-05-26T02:05:15.227798Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.25850","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:11d54d4e2717c80e1f49edafbbf93e96ce8a53813982b2ec7c4e9cf3af862826","sha256:3b9d1703c9929734340dcf545c1cdd3805dba170c901d08660ffd5b06f114ab7"],"state_sha256":"0167e7ea763477fbb45d865dd7e9e122e6b10f1f56a98cbdd274a0316df8ef86"}