{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:PJBYY7AJFOWFXGVRP7ROR7TO5U","short_pith_number":"pith:PJBYY7AJ","schema_version":"1.0","canonical_sha256":"7a438c7c092bac5b9ab17fe2e8fe6eed2e12ae3e627ddce1188bc27987b2b2f4","source":{"kind":"arxiv","id":"2606.20936","version":1},"attestation_state":"computed","paper":{"title":"Comparing Transformers and Hybrid Models at the Token Level","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"William Merrill, Yanhong Li","submitted_at":"2026-06-18T20:57:46Z","abstract_excerpt":"Hybrid language models that mix attention and recurrent layers have shown promise: theoretically, recurrent layers ameliorate the limitations of pure transformers on state tracking, and empirically, hybrids can outperform pure transformers in loss and downstream evaluations \\citep{waleffe2024empirical,merrill2026olmohybrid}. Yet it remains unclear which data or capabilities drive these gains, and to what degree they reflect the theoretical advantages motivating hybrid models. We address this question using the open weights from Olmo 3 \\citep{olmo2025olmo3} and Olmo Hybrid \\citep{merrill2026olm"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.20936","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-18T20:57:46Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"0ab34a9b2d1933879e087e544d7031f005f9158da5eb04fc3e94bf146b7c20f5","abstract_canon_sha256":"5b9e94b98a4e06623da46d1e17205aac635035cd27eb37be957b88130fa6be78"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:12:22.542741Z","signature_b64":"4Ro8fRIAHrcqf2EA9CqEbL8/traTTLmUndlfFjXaJL2atdJHTHArmnuXXMNikjatIhjb5jbI9U7skxf8biM4AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7a438c7c092bac5b9ab17fe2e8fe6eed2e12ae3e627ddce1188bc27987b2b2f4","last_reissued_at":"2026-06-23T01:12:22.542255Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:12:22.542255Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Comparing Transformers and Hybrid Models at the Token Level","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"William Merrill, Yanhong Li","submitted_at":"2026-06-18T20:57:46Z","abstract_excerpt":"Hybrid language models that mix attention and recurrent layers have shown promise: theoretically, recurrent layers ameliorate the limitations of pure transformers on state tracking, and empirically, hybrids can outperform pure transformers in loss and downstream evaluations \\citep{waleffe2024empirical,merrill2026olmohybrid}. Yet it remains unclear which data or capabilities drive these gains, and to what degree they reflect the theoretical advantages motivating hybrid models. We address this question using the open weights from Olmo 3 \\citep{olmo2025olmo3} and Olmo Hybrid \\citep{merrill2026olm"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.20936","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.20936/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.20936","created_at":"2026-06-23T01:12:22.542350+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.20936v1","created_at":"2026-06-23T01:12:22.542350+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.20936","created_at":"2026-06-23T01:12:22.542350+00:00"},{"alias_kind":"pith_short_12","alias_value":"PJBYY7AJFOWF","created_at":"2026-06-23T01:12:22.542350+00:00"},{"alias_kind":"pith_short_16","alias_value":"PJBYY7AJFOWFXGVR","created_at":"2026-06-23T01:12:22.542350+00:00"},{"alias_kind":"pith_short_8","alias_value":"PJBYY7AJ","created_at":"2026-06-23T01:12:22.542350+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U","json":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U.json","graph_json":"https://pith.science/api/pith-number/PJBYY7AJFOWFXGVRP7ROR7TO5U/graph.json","events_json":"https://pith.science/api/pith-number/PJBYY7AJFOWFXGVRP7ROR7TO5U/events.json","paper":"https://pith.science/paper/PJBYY7AJ"},"agent_actions":{"view_html":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U","download_json":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U.json","view_paper":"https://pith.science/paper/PJBYY7AJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.20936&json=true","fetch_graph":"https://pith.science/api/pith-number/PJBYY7AJFOWFXGVRP7ROR7TO5U/graph.json","fetch_events":"https://pith.science/api/pith-number/PJBYY7AJFOWFXGVRP7ROR7TO5U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U/action/storage_attestation","attest_author":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U/action/author_attestation","sign_citation":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U/action/citation_signature","submit_replication":"https://pith.science/pith/PJBYY7AJFOWFXGVRP7ROR7TO5U/action/replication_record"}},"created_at":"2026-06-23T01:12:22.542350+00:00","updated_at":"2026-06-23T01:12:22.542350+00:00"}