{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:ND2NF75GUDY6JST6IAF4VIN2KV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d4d53a4b872203f217ea0bfe53384663009baa5cf1fe61d21c475db395032681","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-01T18:28:55Z","title_canon_sha256":"d1edfe6bb041c3fcb3b826f1a0d3bb001721e5ea66f746a6fe7c393057a4ba82"},"schema_version":"1.0","source":{"id":"2402.00838","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.00838","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2402.00838v4","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.00838","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"ND2NF75GUDY6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"ND2NF75GUDY6JST6","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"ND2NF75G","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:0764477f021a60cba23f7e381866d92c5209e521b0977efc3499feb9932fa3a9","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we have built OLMo, a competitive, truly Open Language Model, to enable the scientific study of language models. Unlike most prior efforts that have only released model weights and inference code, we release OLMo alongside open training data and training and evaluation code."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the released OLMo is sufficiently competitive with closed models and that the research community will actively use the openness for rigorous scientific study rather than just inference."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"OLMo delivers a fully open competitive language model with training data, code, and evaluations to enable community-driven scientific research on LMs."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"OLMo is a competitive open language model released with its full training data, training code, and evaluation code to enable scientific study."}],"snapshot_sha256":"3c765d3213fb7bc2d59c01b9f788a5fdb86c64a40ed4ca11d1ec5d3bf0b8bffb"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"a3336b2bb76efcd20f7848600b07ab1b176e1f6647605ebf27c1bbf83a8fb10f"},"paper":{"abstract_excerpt":"Language models (LMs) have become ubiquitous in both NLP research and in commercial product offerings. As their commercial importance has surged, the most powerful models have become closed off, gated behind proprietary interfaces, with important details of their training data, architectures, and development undisclosed. Given the importance of these details in scientifically studying these models, including their biases and potential risks, we believe it is essential for the research community to have access to powerful, truly open LMs. To this end, we have built OLMo, a competitive, truly Op","authors_text":"Aakanksha Naik, Abhilasha Ravichander, Akshita Bhagia, Ananya Harsh Jha, Arman Cohan, Crystal Nam, David Atkinson, Dirk Groeneveld, Dustin Schwenk, Emma Strubell, Hamish Ivison, Hannaneh Hajishirzi, Ian Magnusson, Iz Beltagy, Jack Hessel, Jacob Morrison, Jennifer Dumas, Jesse Dodge, Khyathi Raghavi Chandu, Kyle Lo, Kyle Richardson, Luca Soldaini, Luke Zettlemoyer, Matthew E. Peters, Mitchell Wortsman, Nathan Lambert, Niklas Muennighoff, Nishant Subramani, Noah A. Smith, Oyvind Tafjord, Pete Walsh, Pradeep Dasigi, Rodney Kinney, Russell Authur, Saurabh Shah, Shane Arora, Tushar Khot, Valentina Pyatkin, William Merrill, Will Smith, Yanai Elazar, Yizhong Wang, Yuling Gu","cross_cats":[],"headline":"OLMo is a competitive open language model released with its full training data, training code, and evaluation code to enable scientific study.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-01T18:28:55Z","title":"OLMo: Accelerating the Science of Language Models"},"references":{"count":12,"internal_anchors":5,"resolved_work":12,"sample":[{"cited_arxiv_id":"1607.06450","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Layer Normalization","work_id":"20a2d720-0046-4c7c-bcd6-327ec8143f69","year":2022},{"cited_arxiv_id":"2005.14165","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","year":2016},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Sidney Greenbaum and Gerald Nelson","work_id":"c0308e11-2f83-46fe-ac86-c7882b2d7c60","year":1996},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"arXiv preprint arXiv:2312.10253","work_id":"f562b393-1e7c-4a73-971b-6ba9398a3229","year":2023},{"cited_arxiv_id":"2401.04088","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Mixtral of Experts","work_id":"0de8c352-9daa-4e1e-8c7b-3d0dec69f369","year":2022}],"snapshot_sha256":"41e0801a8a2f30c8543eecfc0b89bb2492fa1a414aaf48e8396f034e7dd6b921"},"source":{"id":"2402.00838","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-16T22:57:12.208514Z","id":"847cde5a-ec48-47f0-9f9e-28dc9a13c3ab","model_set":{"reader":"grok-4.3"},"one_line_summary":"OLMo delivers a fully open competitive language model with training data, code, and evaluations to enable community-driven scientific research on LMs.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"OLMo is a competitive open language model released with its full training data, training code, and evaluation code to enable scientific study.","strongest_claim":"we have built OLMo, a competitive, truly Open Language Model, to enable the scientific study of language models. Unlike most prior efforts that have only released model weights and inference code, we release OLMo alongside open training data and training and evaluation code.","weakest_assumption":"That the released OLMo is sufficiently competitive with closed models and that the research community will actively use the openness for rigorous scientific study rather than just inference."}},"verdict_id":"847cde5a-ec48-47f0-9f9e-28dc9a13c3ab"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5c49910e9074f758f43c36cc41bd4774f49d57771738bbd10f019dd7510bb233","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d4d53a4b872203f217ea0bfe53384663009baa5cf1fe61d21c475db395032681","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-01T18:28:55Z","title_canon_sha256":"d1edfe6bb041c3fcb3b826f1a0d3bb001721e5ea66f746a6fe7c393057a4ba82"},"schema_version":"1.0","source":{"id":"2402.00838","kind":"arxiv","version":4}},"canonical_sha256":"68f4d2ffa6a0f1e4ca7e400bcaa1ba556b7033f8df5509364d01ad4917a1d67a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"68f4d2ffa6a0f1e4ca7e400bcaa1ba556b7033f8df5509364d01ad4917a1d67a","first_computed_at":"2026-05-17T23:38:46.313699Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.313699Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RouIsjsPMan7iIboNwCno0iReikyPmJq8yb1imC0R4w/pJy2JLvTs9sq9UyM0VO4u/TNNTngjNAUwj0MgiIcCw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.314233Z","signed_message":"canonical_sha256_bytes"},"source_id":"2402.00838","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5c49910e9074f758f43c36cc41bd4774f49d57771738bbd10f019dd7510bb233","sha256:0764477f021a60cba23f7e381866d92c5209e521b0977efc3499feb9932fa3a9"],"state_sha256":"96c3d22a2d814006df3238f9955a27b9ee96c87fa70d8787cb3ddf446eff78e0"}