{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:PMHBABAV6C2EUGIS3CQ2LQ4QLK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"10797f5aa6dc067ce46ed52fe566d2f3c79765de28d2b54412e64a5cf6e9bcef","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-09-19T10:38:01Z","title_canon_sha256":"c9138c07a2c8c33e08f91b764a7eef6be78aabce610802805a266370c671a936"},"schema_version":"1.0","source":{"id":"2409.12640","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2409.12640","created_at":"2026-07-05T09:09:26Z"},{"alias_kind":"arxiv_version","alias_value":"2409.12640v2","created_at":"2026-07-05T09:09:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2409.12640","created_at":"2026-07-05T09:09:26Z"},{"alias_kind":"pith_short_12","alias_value":"PMHBABAV6C2E","created_at":"2026-07-05T09:09:26Z"},{"alias_kind":"pith_short_16","alias_value":"PMHBABAV6C2EUGIS","created_at":"2026-07-05T09:09:26Z"},{"alias_kind":"pith_short_8","alias_value":"PMHBABAV","created_at":"2026-07-05T09:09:26Z"}],"graph_snapshots":[{"event_id":"sha256:eab09d7eed59fedd1c01b741b805adc36fcc084ddbda3c7100d122c43fd37e64","target":"graph","created_at":"2026-07-05T09:09:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2409.12640/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We introduce Michelangelo: a minimal, synthetic, and unleaked long-context reasoning evaluation for large language models which is also easy to automatically score. This evaluation is derived via a novel, unifying framework for evaluations over arbitrarily long contexts which measure the model's ability to do more than retrieve a single piece of information from its context. The central idea of the Latent Structure Queries framework (LSQ) is to construct tasks which require a model to ``chisel away'' the irrelevant information in the context, revealing a latent structure in the context. To ver","authors_text":"Angeliki Lazaridou, Bahare Fatemi, Ed Chi, Ethan Dyer, Harsh Mehta, Jean-Baptiste Lespiau, Jeffrey Hui, Kate Olszewska, Kelvin Xu, Kiran Vodrahalli, Mehran Kazemi, Nilesh Tripuraneni, Nishanth Dikkala, Nithya Attaluri, Orhan Firat, Quoc Le, Rakesh Shivanna, Rohan Anil, Roopali Vij, Sanil Jain, Santiago Ontanon, Siamak Shakeri, Vinay Ramasesh, Yifeng Lu","cross_cats":["cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-09-19T10:38:01Z","title":"Michelangelo: Long Context Evaluations Beyond Haystacks via Latent Structure Queries"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2409.12640","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6956f992851e304ef5f2cda877d4af4134778d0da6fd527daffec0079932f582","target":"record","created_at":"2026-07-05T09:09:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"10797f5aa6dc067ce46ed52fe566d2f3c79765de28d2b54412e64a5cf6e9bcef","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-09-19T10:38:01Z","title_canon_sha256":"c9138c07a2c8c33e08f91b764a7eef6be78aabce610802805a266370c671a936"},"schema_version":"1.0","source":{"id":"2409.12640","kind":"arxiv","version":2}},"canonical_sha256":"7b0e100415f0b44a1912d8a1a5c3905a891593ff4d6b22df10755148ec7dd634","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7b0e100415f0b44a1912d8a1a5c3905a891593ff4d6b22df10755148ec7dd634","first_computed_at":"2026-07-05T09:09:26.820564Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T09:09:26.820564Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0nBDvNgtj0XvlCZ2cn9l/XQCsnsTTwkkMYvg3eBnhXLQ+NECZ1VkdPKkOegfWeGapgsIQCuWhgORVG9cUMqCAA==","signature_status":"signed_v1","signed_at":"2026-07-05T09:09:26.821076Z","signed_message":"canonical_sha256_bytes"},"source_id":"2409.12640","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6956f992851e304ef5f2cda877d4af4134778d0da6fd527daffec0079932f582","sha256:eab09d7eed59fedd1c01b741b805adc36fcc084ddbda3c7100d122c43fd37e64"],"state_sha256":"c924a31693bd459eb4a488f0cf4aec11901c4a76401f6f848a3bd738e6b613ed"}