{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LCSWSEXNAV5PBZBJYSL4EY77LT","short_pith_number":"pith:LCSWSEXN","schema_version":"1.0","canonical_sha256":"58a56912ed057af0e429c497c263ff5ce448a011e0a22e25208e0244645760a4","source":{"kind":"arxiv","id":"2605.16358","version":1},"attestation_state":"computed","paper":{"title":"LEAF: A Living Benchmark for Event-Augmented Forecasting","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chun-Liang Li, Jinsung Yoon, Mihir Parmar, Mingtian Tan, Nanyun Peng, Palash Goyal, Thomas Hartvigsen, Tomas Pfister","submitted_at":"2026-05-09T03:17:59Z","abstract_excerpt":"Large Language Models (LLMs) are increasingly applied to forecasting. To evaluate this capability while mitigating pre-training data contamination, several living benchmarks have been proposed. However, existing benchmarks either lack the multidimensional events essential for accurate forecasting due to data scarcity, or focus on relatively closed environments. To assess the predictive capabilities of LLMs in complex, real-world scenarios, we propose LEAF, the first living benchmark for event-augmented forecasting tasks, including future event probabilities, trend and time series forecasting. "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.16358","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-09T03:17:59Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3f347dc46c92997ca6137b008456a007e46a0f56d5eb7a31da28ebe96f1b3b46","abstract_canon_sha256":"87710a37b1066b2ecd68aa781ca8617f5902c84367060c25f9be94a0d4963c52"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:18.213448Z","signature_b64":"uW3I8s3w2CE9q6ks4670uEKsGxg1gA3XVaQfczwQZglZlqP1kGSvd+SfQU4FducdbyWZ8bCHqaBFdp7O77YaDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"58a56912ed057af0e429c497c263ff5ce448a011e0a22e25208e0244645760a4","last_reissued_at":"2026-05-20T00:02:18.212821Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:18.212821Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"LEAF: A Living Benchmark for Event-Augmented Forecasting","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Chun-Liang Li, Jinsung Yoon, Mihir Parmar, Mingtian Tan, Nanyun Peng, Palash Goyal, Thomas Hartvigsen, Tomas Pfister","submitted_at":"2026-05-09T03:17:59Z","abstract_excerpt":"Large Language Models (LLMs) are increasingly applied to forecasting. To evaluate this capability while mitigating pre-training data contamination, several living benchmarks have been proposed. However, existing benchmarks either lack the multidimensional events essential for accurate forecasting due to data scarcity, or focus on relatively closed environments. To assess the predictive capabilities of LLMs in complex, real-world scenarios, we propose LEAF, the first living benchmark for event-augmented forecasting tasks, including future event probabilities, trend and time series forecasting. "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16358","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16358/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T22:36:48.860347Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"0f1d3680801b2e9d019dfac2cac7d9bac35f98a3fd88e34c9db5f9b89c45725b"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.16358","created_at":"2026-05-20T00:02:18.212938+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.16358v1","created_at":"2026-05-20T00:02:18.212938+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16358","created_at":"2026-05-20T00:02:18.212938+00:00"},{"alias_kind":"pith_short_12","alias_value":"LCSWSEXNAV5P","created_at":"2026-05-20T00:02:18.212938+00:00"},{"alias_kind":"pith_short_16","alias_value":"LCSWSEXNAV5PBZBJ","created_at":"2026-05-20T00:02:18.212938+00:00"},{"alias_kind":"pith_short_8","alias_value":"LCSWSEXN","created_at":"2026-05-20T00:02:18.212938+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT","json":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT.json","graph_json":"https://pith.science/api/pith-number/LCSWSEXNAV5PBZBJYSL4EY77LT/graph.json","events_json":"https://pith.science/api/pith-number/LCSWSEXNAV5PBZBJYSL4EY77LT/events.json","paper":"https://pith.science/paper/LCSWSEXN"},"agent_actions":{"view_html":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT","download_json":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT.json","view_paper":"https://pith.science/paper/LCSWSEXN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.16358&json=true","fetch_graph":"https://pith.science/api/pith-number/LCSWSEXNAV5PBZBJYSL4EY77LT/graph.json","fetch_events":"https://pith.science/api/pith-number/LCSWSEXNAV5PBZBJYSL4EY77LT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT/action/storage_attestation","attest_author":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT/action/author_attestation","sign_citation":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT/action/citation_signature","submit_replication":"https://pith.science/pith/LCSWSEXNAV5PBZBJYSL4EY77LT/action/replication_record"}},"created_at":"2026-05-20T00:02:18.212938+00:00","updated_at":"2026-05-20T00:02:18.212938+00:00"}