{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:HPZPRGEPZ775VW5IHTVOHZDWQX","short_pith_number":"pith:HPZPRGEP","canonical_record":{"source":{"id":"2605.19341","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T04:29:03Z","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"title_canon_sha256":"cac7bd03597a8db66dc1a6af33459619ccfe3e4052a3b7c49ffd30dc2ebf6e3b","abstract_canon_sha256":"95a74d2698adb1c9ed977a4d4ae133e4881791153f56b6de9dfc83441d98a648"},"schema_version":"1.0"},"canonical_sha256":"3bf2f8988fcfffdadba83ceae3e47685fcda45c1915433f853930f03a44532c3","source":{"kind":"arxiv","id":"2605.19341","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19341","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19341v1","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19341","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"pith_short_12","alias_value":"HPZPRGEPZ775","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"pith_short_16","alias_value":"HPZPRGEPZ775VW5I","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"pith_short_8","alias_value":"HPZPRGEP","created_at":"2026-05-20T01:05:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:HPZPRGEPZ775VW5IHTVOHZDWQX","target":"record","payload":{"canonical_record":{"source":{"id":"2605.19341","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T04:29:03Z","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"title_canon_sha256":"cac7bd03597a8db66dc1a6af33459619ccfe3e4052a3b7c49ffd30dc2ebf6e3b","abstract_canon_sha256":"95a74d2698adb1c9ed977a4d4ae133e4881791153f56b6de9dfc83441d98a648"},"schema_version":"1.0"},"canonical_sha256":"3bf2f8988fcfffdadba83ceae3e47685fcda45c1915433f853930f03a44532c3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:40.327583Z","signature_b64":"herzsf+OkgUJDatafQlLqwjue/5QGl42Xl95gRWEN57smUoCXggm2a1ZhAjjvhqqVUVfjxMiQdfYkW4jHyMfCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3bf2f8988fcfffdadba83ceae3e47685fcda45c1915433f853930f03a44532c3","last_reissued_at":"2026-05-20T01:05:40.326782Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:40.326782Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.19341","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ap+MEPBpVf1imLe3noKx1wjzV9AW3yRPy2mJYd7XJLZt8uR2KdfX//uqPME7D2LruQPEPNRV0Wc4jl7vh4CqAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T08:06:15.118206Z"},"content_sha256":"a25aa212a497b61450fc1c30201a4f2ee7b8ea6caccb4e7685f5fbd416fa8136","schema_version":"1.0","event_id":"sha256:a25aa212a497b61450fc1c30201a4f2ee7b8ea6caccb4e7685f5fbd416fa8136"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:HPZPRGEPZ775VW5IHTVOHZDWQX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"HalluWorld: A Controlled Benchmark for Hallucination via Reference World Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Emmy Liu, Karan Singh, Michael Yu, Sachin Kumar, Steven Y. Feng, Varun Gangal, Zhuofu Tao","submitted_at":"2026-05-19T04:29:03Z","abstract_excerpt":"Hallucination remains a central failure mode of large language models, but existing benchmarks operationalize it inconsistently across summarization, question answering, retrieval-augmented generation, and agentic interaction. This fragmentation makes it unclear whether a mitigation that works in one setting reduces hallucinations across contexts. Current benchmarks either require human annotation and fixed references that may be memorized, or rely on observations in settings that are difficult to reproduce. To study root causes, we introduce HalluWorld, an extensible benchmark grounded in an "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19341","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19341/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cZ2PS188xzA6qrdlZ+ERdT+8MS08uDPaRtPBV8KAUjojwOEtq+oDD2agclR/aPmSMfTgIFhwL0svPwb160LaBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T08:06:15.118954Z"},"content_sha256":"3164f88e249f4a9fd186ca2457a11fc14407a1785bfac60fc1b530c902365673","schema_version":"1.0","event_id":"sha256:3164f88e249f4a9fd186ca2457a11fc14407a1785bfac60fc1b530c902365673"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HPZPRGEPZ775VW5IHTVOHZDWQX/bundle.json","state_url":"https://pith.science/pith/HPZPRGEPZ775VW5IHTVOHZDWQX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HPZPRGEPZ775VW5IHTVOHZDWQX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T08:06:15Z","links":{"resolver":"https://pith.science/pith/HPZPRGEPZ775VW5IHTVOHZDWQX","bundle":"https://pith.science/pith/HPZPRGEPZ775VW5IHTVOHZDWQX/bundle.json","state":"https://pith.science/pith/HPZPRGEPZ775VW5IHTVOHZDWQX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HPZPRGEPZ775VW5IHTVOHZDWQX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:HPZPRGEPZ775VW5IHTVOHZDWQX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"95a74d2698adb1c9ed977a4d4ae133e4881791153f56b6de9dfc83441d98a648","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T04:29:03Z","title_canon_sha256":"cac7bd03597a8db66dc1a6af33459619ccfe3e4052a3b7c49ffd30dc2ebf6e3b"},"schema_version":"1.0","source":{"id":"2605.19341","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19341","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19341v1","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19341","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"pith_short_12","alias_value":"HPZPRGEPZ775","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"pith_short_16","alias_value":"HPZPRGEPZ775VW5I","created_at":"2026-05-20T01:05:40Z"},{"alias_kind":"pith_short_8","alias_value":"HPZPRGEP","created_at":"2026-05-20T01:05:40Z"}],"graph_snapshots":[{"event_id":"sha256:3164f88e249f4a9fd186ca2457a11fc14407a1785bfac60fc1b530c902365673","target":"graph","created_at":"2026-05-20T01:05:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.19341/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Hallucination remains a central failure mode of large language models, but existing benchmarks operationalize it inconsistently across summarization, question answering, retrieval-augmented generation, and agentic interaction. This fragmentation makes it unclear whether a mitigation that works in one setting reduces hallucinations across contexts. Current benchmarks either require human annotation and fixed references that may be memorized, or rely on observations in settings that are difficult to reproduce. To study root causes, we introduce HalluWorld, an extensible benchmark grounded in an ","authors_text":"Emmy Liu, Karan Singh, Michael Yu, Sachin Kumar, Steven Y. Feng, Varun Gangal, Zhuofu Tao","cross_cats":["cs.AI","cs.LG","stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T04:29:03Z","title":"HalluWorld: A Controlled Benchmark for Hallucination via Reference World Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19341","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a25aa212a497b61450fc1c30201a4f2ee7b8ea6caccb4e7685f5fbd416fa8136","target":"record","created_at":"2026-05-20T01:05:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"95a74d2698adb1c9ed977a4d4ae133e4881791153f56b6de9dfc83441d98a648","cross_cats_sorted":["cs.AI","cs.LG","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-19T04:29:03Z","title_canon_sha256":"cac7bd03597a8db66dc1a6af33459619ccfe3e4052a3b7c49ffd30dc2ebf6e3b"},"schema_version":"1.0","source":{"id":"2605.19341","kind":"arxiv","version":1}},"canonical_sha256":"3bf2f8988fcfffdadba83ceae3e47685fcda45c1915433f853930f03a44532c3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3bf2f8988fcfffdadba83ceae3e47685fcda45c1915433f853930f03a44532c3","first_computed_at":"2026-05-20T01:05:40.326782Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:40.326782Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"herzsf+OkgUJDatafQlLqwjue/5QGl42Xl95gRWEN57smUoCXggm2a1ZhAjjvhqqVUVfjxMiQdfYkW4jHyMfCw==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:40.327583Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.19341","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a25aa212a497b61450fc1c30201a4f2ee7b8ea6caccb4e7685f5fbd416fa8136","sha256:3164f88e249f4a9fd186ca2457a11fc14407a1785bfac60fc1b530c902365673"],"state_sha256":"6c900cb322d84970b7de7010d30dc91c59c3a6b027cab6a077ce598c881a5f11"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3WbLzc2Lev1EGot/942lKH+oDVZMtzYbz1+nITc88xiG0jc2PECDKGTlQqGfoV27P0ab6NGcHFj/uJanQiAjDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T08:06:15.122839Z","bundle_sha256":"18beb175f7f1527788962a544f4c143c8bdca21dae9066c92221a05adcb0b56f"}}