{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:MMSWSUWTKYPYTKGKPY2EXP272I","short_pith_number":"pith:MMSWSUWT","canonical_record":{"source":{"id":"2606.11816","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"adb989ba0b0e944dca90f3815d337664e06b8c4a176a9579a8a42bda4422ed0f","abstract_canon_sha256":"a912dbafa4ed922bfb5418b6a83e3672f2ce566dac99a2a91f034d3863a6fdf1"},"schema_version":"1.0"},"canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","source":{"kind":"arxiv","id":"2606.11816","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11816","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11816v1","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11816","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_12","alias_value":"MMSWSUWTKYPY","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_16","alias_value":"MMSWSUWTKYPYTKGK","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_8","alias_value":"MMSWSUWT","created_at":"2026-06-11T01:10:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:MMSWSUWTKYPYTKGKPY2EXP272I","target":"record","payload":{"canonical_record":{"source":{"id":"2606.11816","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"adb989ba0b0e944dca90f3815d337664e06b8c4a176a9579a8a42bda4422ed0f","abstract_canon_sha256":"a912dbafa4ed922bfb5418b6a83e3672f2ce566dac99a2a91f034d3863a6fdf1"},"schema_version":"1.0"},"canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:10:09.619359Z","signature_b64":"JGnjhfGvMC+mitjXAKUsOx4OtPpnu+bRTaU8qqLcm0sqE5P7w+02oP32HyH0TFr559voteD5xaEHABPsU/77DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","last_reissued_at":"2026-06-11T01:10:09.618522Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:10:09.618522Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.11816","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:10:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yDBVUCV3sE+HgWEVH287MKbz4Yq3wjak/QmsaS9uTRDnqaN0YgMGUNCdmh+QHaEfDmjuznu8nFSs1Bagl97DAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T14:33:38.080276Z"},"content_sha256":"fd4a8acdc036de3b071100dda223b809ea8ba6ce09f405f6935ed9c5a8914eb1","schema_version":"1.0","event_id":"sha256:fd4a8acdc036de3b071100dda223b809ea8ba6ce09f405f6935ed9c5a8914eb1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:MMSWSUWTKYPYTKGKPY2EXP272I","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"WorldReasoner: Evaluating Whether Language Model Agents Forecast Events with Valid Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Andreas Vlachos, Eric Chamoun, Yizhou Chi, Zifeng Ding","submitted_at":"2026-06-10T08:50:29Z","abstract_excerpt":"Forecasting real-world events requires language-model agents to reason under uncertainty from incomplete, time-bounded information. Yet evaluating whether agents genuinely forecast requires more than final-answer accuracy: a model may be correct by recalling memorized training facts, citing fabricated evidence, or producing an unsupported causal story. We present WorldReasoner, an evaluation framework for temporally valid event forecasting. Each task gives an agent a resolved forecasting question, a simulated forecast date, and access only to evidence available before that date; after resoluti"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11816","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.11816/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:10:09Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YpvJSz8Cefboba5bbz2RX5btw4NZIkqSyWtqYAZQ3q68bm0RTmp4l9pN6o9iBMqewonD1BSDJMFQ6Oxnfrs5BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-29T14:33:38.080653Z"},"content_sha256":"086f661a2aae5e25890c07a6af4445b4d049ac7b23b7daee1f8fa1217b2cb63d","schema_version":"1.0","event_id":"sha256:086f661a2aae5e25890c07a6af4445b4d049ac7b23b7daee1f8fa1217b2cb63d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MMSWSUWTKYPYTKGKPY2EXP272I/bundle.json","state_url":"https://pith.science/pith/MMSWSUWTKYPYTKGKPY2EXP272I/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MMSWSUWTKYPYTKGKPY2EXP272I/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-29T14:33:38Z","links":{"resolver":"https://pith.science/pith/MMSWSUWTKYPYTKGKPY2EXP272I","bundle":"https://pith.science/pith/MMSWSUWTKYPYTKGKPY2EXP272I/bundle.json","state":"https://pith.science/pith/MMSWSUWTKYPYTKGKPY2EXP272I/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MMSWSUWTKYPYTKGKPY2EXP272I/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:MMSWSUWTKYPYTKGKPY2EXP272I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a912dbafa4ed922bfb5418b6a83e3672f2ce566dac99a2a91f034d3863a6fdf1","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","title_canon_sha256":"adb989ba0b0e944dca90f3815d337664e06b8c4a176a9579a8a42bda4422ed0f"},"schema_version":"1.0","source":{"id":"2606.11816","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11816","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11816v1","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11816","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_12","alias_value":"MMSWSUWTKYPY","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_16","alias_value":"MMSWSUWTKYPYTKGK","created_at":"2026-06-11T01:10:09Z"},{"alias_kind":"pith_short_8","alias_value":"MMSWSUWT","created_at":"2026-06-11T01:10:09Z"}],"graph_snapshots":[{"event_id":"sha256:086f661a2aae5e25890c07a6af4445b4d049ac7b23b7daee1f8fa1217b2cb63d","target":"graph","created_at":"2026-06-11T01:10:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.11816/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Forecasting real-world events requires language-model agents to reason under uncertainty from incomplete, time-bounded information. Yet evaluating whether agents genuinely forecast requires more than final-answer accuracy: a model may be correct by recalling memorized training facts, citing fabricated evidence, or producing an unsupported causal story. We present WorldReasoner, an evaluation framework for temporally valid event forecasting. Each task gives an agent a resolved forecasting question, a simulated forecast date, and access only to evidence available before that date; after resoluti","authors_text":"Andreas Vlachos, Eric Chamoun, Yizhou Chi, Zifeng Ding","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","title":"WorldReasoner: Evaluating Whether Language Model Agents Forecast Events with Valid Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11816","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fd4a8acdc036de3b071100dda223b809ea8ba6ce09f405f6935ed9c5a8914eb1","target":"record","created_at":"2026-06-11T01:10:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a912dbafa4ed922bfb5418b6a83e3672f2ce566dac99a2a91f034d3863a6fdf1","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29Z","title_canon_sha256":"adb989ba0b0e944dca90f3815d337664e06b8c4a176a9579a8a42bda4422ed0f"},"schema_version":"1.0","source":{"id":"2606.11816","kind":"arxiv","version":1}},"canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"63256952d3561f89a8ca7e344bbf5fd231d05f6b7093bedd4b42a2c1bb7a1190","first_computed_at":"2026-06-11T01:10:09.618522Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:10:09.618522Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JGnjhfGvMC+mitjXAKUsOx4OtPpnu+bRTaU8qqLcm0sqE5P7w+02oP32HyH0TFr559voteD5xaEHABPsU/77DQ==","signature_status":"signed_v1","signed_at":"2026-06-11T01:10:09.619359Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.11816","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fd4a8acdc036de3b071100dda223b809ea8ba6ce09f405f6935ed9c5a8914eb1","sha256:086f661a2aae5e25890c07a6af4445b4d049ac7b23b7daee1f8fa1217b2cb63d"],"state_sha256":"e073be1ac9020a16c24c5bfe365cd53c0c61264bfe2d0ea74e974e337e6cb2ec"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kH/9C6vUjL1gVnVj5mlX36c2XJ0xTC58fcSfcegWcHWfXh+z5rjbshHX/38a98fQmYg0LQr8X3mMfMhreVoaCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-29T14:33:38.082528Z","bundle_sha256":"c0239ac632f6acbb404c084e1ef732e668b26221b368cd83e66deafd15f3c03f"}}