{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:FP4HUMRTCMPIXGA277QHKAJE2I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4e28e56913322b5b6cdf8833390d306df2c82a9c40990a93ea8b69ef94b8ed63","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T12:59:30Z","title_canon_sha256":"05a3e1cc8ec5632bc59ec087b24e0058034fa1f14ebb35d698457c12a9c4c456"},"schema_version":"1.0","source":{"id":"2604.01993","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.01993","created_at":"2026-06-10T01:11:00Z"},{"alias_kind":"arxiv_version","alias_value":"2604.01993v2","created_at":"2026-06-10T01:11:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.01993","created_at":"2026-06-10T01:11:00Z"},{"alias_kind":"pith_short_12","alias_value":"FP4HUMRTCMPI","created_at":"2026-06-10T01:11:00Z"},{"alias_kind":"pith_short_16","alias_value":"FP4HUMRTCMPIXGA2","created_at":"2026-06-10T01:11:00Z"},{"alias_kind":"pith_short_8","alias_value":"FP4HUMRT","created_at":"2026-06-10T01:11:00Z"}],"graph_snapshots":[{"event_id":"sha256:0a149c7f12206f747b3501cd1c8d0b48e8cf84448b2f8e6452b5b9b2747bebae","target":"graph","created_at":"2026-06-10T01:11:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.01993/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Multi-hop QA benchmarks often reward Large Language Models (LLMs) for spurious correctness, where models reach correct answers through invalid intermediate reasoning. We propose SAFE, an LLM-as-verifier framework for evidence-grounded multi-hop QA. Rather than judging only the final answer after generation, SAFE verifies reasoning during generation by checking intermediate steps against the provided passages and previous reasoning trajectory. To make this process checkable, SAFE decomposes reasoning into atomic, evidence-grounded units represented with Knowledge Graph (KG) triples. At train-ti","authors_text":"Daeyong Kwon, Seung-won Hwang, Soyoung Yoon","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T12:59:30Z","title":"SAFE: An LLM-as-Verifier Framework for Evidence-Grounded Multi-Hop Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.01993","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1a6959776c001458ae066cd1f831a14b61ffdd08a0507894a418b06e382b775a","target":"record","created_at":"2026-06-10T01:11:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4e28e56913322b5b6cdf8833390d306df2c82a9c40990a93ea8b69ef94b8ed63","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T12:59:30Z","title_canon_sha256":"05a3e1cc8ec5632bc59ec087b24e0058034fa1f14ebb35d698457c12a9c4c456"},"schema_version":"1.0","source":{"id":"2604.01993","kind":"arxiv","version":2}},"canonical_sha256":"2bf87a3233131e8b981affe0750124d23c05b3040d11e65fbac2867229ce6a10","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2bf87a3233131e8b981affe0750124d23c05b3040d11e65fbac2867229ce6a10","first_computed_at":"2026-06-10T01:11:00.436426Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-10T01:11:00.436426Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"S3w+qIMOJOpgR14KvGr0sETg0k2Wx39QsmCgYPHfbt98tx8SQ3T2oG41KnHTLGwUrrc0vmwzmY0bX52iVKicAA==","signature_status":"signed_v1","signed_at":"2026-06-10T01:11:00.437382Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.01993","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1a6959776c001458ae066cd1f831a14b61ffdd08a0507894a418b06e382b775a","sha256:0a149c7f12206f747b3501cd1c8d0b48e8cf84448b2f8e6452b5b9b2747bebae"],"state_sha256":"98e9ef0f8d7c75d75ed13e96d8851f1abe91df682f1e291ca3fc42b3459876b3"}