{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2020:L4DYAMP4CO2UVHDPGSULJ2LCZG","short_pith_number":"pith:L4DYAMP4","canonical_record":{"source":{"id":"2007.14062","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-07-28T08:34:04Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"fe93aca2c56e0c722572fa55194f449de0bdf4351c41094903d025e445c17514","abstract_canon_sha256":"715aa306ef7fc43efc29c6b8f31619884065434b7f7310d8bce8aba1b16c8446"},"schema_version":"1.0"},"canonical_sha256":"5f078031fc13b54a9c6f34a8b4e962c9ba684e49e63756b2309e9e3feca98726","source":{"kind":"arxiv","id":"2007.14062","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2007.14062","created_at":"2026-05-17T23:38:45Z"},{"alias_kind":"arxiv_version","alias_value":"2007.14062v2","created_at":"2026-05-17T23:38:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2007.14062","created_at":"2026-05-17T23:38:45Z"},{"alias_kind":"pith_short_12","alias_value":"L4DYAMP4CO2U","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"L4DYAMP4CO2UVHDP","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"L4DYAMP4","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2020:L4DYAMP4CO2UVHDPGSULJ2LCZG","target":"record","payload":{"canonical_record":{"source":{"id":"2007.14062","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-07-28T08:34:04Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"fe93aca2c56e0c722572fa55194f449de0bdf4351c41094903d025e445c17514","abstract_canon_sha256":"715aa306ef7fc43efc29c6b8f31619884065434b7f7310d8bce8aba1b16c8446"},"schema_version":"1.0"},"canonical_sha256":"5f078031fc13b54a9c6f34a8b4e962c9ba684e49e63756b2309e9e3feca98726","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:45.914063Z","signature_b64":"AnL1cMm6eigkbbnK5WhK7aW29OXyAWv+IgiH2+XS0Qkm8+H+noUAHEF7Qaw50sQFN+rDJaPYAkkWZMl/17KWAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5f078031fc13b54a9c6f34a8b4e962c9ba684e49e63756b2309e9e3feca98726","last_reissued_at":"2026-05-17T23:38:45.913475Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:45.913475Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2007.14062","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lMpoG9nR33WVTxjMNZG8lRWKyBwzlKF+oVziiGma4vdHMXjDW3aQcfjeYspsU1AVvRYbpMR8h3iFilqLDplUAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T10:16:59.359135Z"},"content_sha256":"53f07e8de89b747a7bbe0f18882f1d1c5345cd05d3031b4c65dc1b902c744f3a","schema_version":"1.0","event_id":"sha256:53f07e8de89b747a7bbe0f18882f1d1c5345cd05d3031b4c65dc1b902c744f3a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2020:L4DYAMP4CO2UVHDPGSULJ2LCZG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Big Bird: Transformers for Longer Sequences","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"BigBird's sparse attention preserves universal approximation and Turing completeness while scaling transformers to much longer sequences.","cross_cats":["cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Amr Ahmed, Anirudh Ravula, Avinava Dubey, Chris Alberti, Guru Guruganesh, Joshua Ainslie, Li Yang, Manzil Zaheer, Philip Pham, Qifan Wang, Santiago Ontanon","submitted_at":"2020-07-28T08:34:04Z","abstract_excerpt":"Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP. Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our theoretical analysis re"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We show that BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The chosen combination of global, local, and random attention tokens is sufficient to retain the expressive power of full attention for the tasks considered.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"BigBird replaces full attention in Transformers with a sparse pattern that achieves linear complexity while remaining a universal approximator and Turing complete.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"BigBird's sparse attention preserves universal approximation and Turing completeness while scaling transformers to much longer sequences.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"a43d586c044c5b0640e66c8f8ff2fc137ca2ae8d106081e8e9b85beb0f6edd1e"},"source":{"id":"2007.14062","kind":"arxiv","version":2},"verdict":{"id":"f3c96626-36fe-4079-9630-4fb0f76dd032","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T01:49:52.952447Z","strongest_claim":"We show that BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model.","one_line_summary":"BigBird replaces full attention in Transformers with a sparse pattern that achieves linear complexity while remaining a universal approximator and Turing complete.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The chosen combination of global, local, and random attention tokens is sufficient to retain the expressive power of full attention for the tasks considered.","pith_extraction_headline":"BigBird's sparse attention preserves universal approximation and Turing completeness while scaling transformers to much longer sequences."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"c27924985aba7b2fe8a9a869dcd1a43127386e0ddea32c1434d04c96b90a4484"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"f3c96626-36fe-4079-9630-4fb0f76dd032"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:45Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3kXqOQnyKerpa5UPQLO7OUztcqb+FwN6O5wR2zxm289kSGefgaM91sMedQfT5B+QT9RJjMFWjjRy0DyGFkSsCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T10:16:59.359951Z"},"content_sha256":"964a5f91753dadd7c2fbb50c06a44fce8c9c2930b1df794ace64927238b4495b","schema_version":"1.0","event_id":"sha256:964a5f91753dadd7c2fbb50c06a44fce8c9c2930b1df794ace64927238b4495b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG/bundle.json","state_url":"https://pith.science/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T10:16:59Z","links":{"resolver":"https://pith.science/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG","bundle":"https://pith.science/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG/bundle.json","state":"https://pith.science/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/L4DYAMP4CO2UVHDPGSULJ2LCZG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:L4DYAMP4CO2UVHDPGSULJ2LCZG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"715aa306ef7fc43efc29c6b8f31619884065434b7f7310d8bce8aba1b16c8446","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-07-28T08:34:04Z","title_canon_sha256":"fe93aca2c56e0c722572fa55194f449de0bdf4351c41094903d025e445c17514"},"schema_version":"1.0","source":{"id":"2007.14062","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2007.14062","created_at":"2026-05-17T23:38:45Z"},{"alias_kind":"arxiv_version","alias_value":"2007.14062v2","created_at":"2026-05-17T23:38:45Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2007.14062","created_at":"2026-05-17T23:38:45Z"},{"alias_kind":"pith_short_12","alias_value":"L4DYAMP4CO2U","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"L4DYAMP4CO2UVHDP","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"L4DYAMP4","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:964a5f91753dadd7c2fbb50c06a44fce8c9c2930b1df794ace64927238b4495b","target":"graph","created_at":"2026-05-17T23:38:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We show that BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The chosen combination of global, local, and random attention tokens is sufficient to retain the expressive power of full attention for the tasks considered."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"BigBird replaces full attention in Transformers with a sparse pattern that achieves linear complexity while remaining a universal approximator and Turing complete."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"BigBird's sparse attention preserves universal approximation and Turing completeness while scaling transformers to much longer sequences."}],"snapshot_sha256":"a43d586c044c5b0640e66c8f8ff2fc137ca2ae8d106081e8e9b85beb0f6edd1e"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"c27924985aba7b2fe8a9a869dcd1a43127386e0ddea32c1434d04c96b90a4484"},"paper":{"abstract_excerpt":"Transformers-based models, such as BERT, have been one of the most successful deep learning models for NLP. Unfortunately, one of their core limitations is the quadratic dependency (mainly in terms of memory) on the sequence length due to their full attention mechanism. To remedy this, we propose, BigBird, a sparse attention mechanism that reduces this quadratic dependency to linear. We show that BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model. Along the way, our theoretical analysis re","authors_text":"Amr Ahmed, Anirudh Ravula, Avinava Dubey, Chris Alberti, Guru Guruganesh, Joshua Ainslie, Li Yang, Manzil Zaheer, Philip Pham, Qifan Wang, Santiago Ontanon","cross_cats":["cs.CL","stat.ML"],"headline":"BigBird's sparse attention preserves universal approximation and Turing completeness while scaling transformers to much longer sequences.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-07-28T08:34:04Z","title":"Big Bird: Transformers for Longer Sequences"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2007.14062","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T01:49:52.952447Z","id":"f3c96626-36fe-4079-9630-4fb0f76dd032","model_set":{"reader":"grok-4.3"},"one_line_summary":"BigBird replaces full attention in Transformers with a sparse pattern that achieves linear complexity while remaining a universal approximator and Turing complete.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"BigBird's sparse attention preserves universal approximation and Turing completeness while scaling transformers to much longer sequences.","strongest_claim":"We show that BigBird is a universal approximator of sequence functions and is Turing complete, thereby preserving these properties of the quadratic, full attention model.","weakest_assumption":"The chosen combination of global, local, and random attention tokens is sufficient to retain the expressive power of full attention for the tasks considered."}},"verdict_id":"f3c96626-36fe-4079-9630-4fb0f76dd032"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:53f07e8de89b747a7bbe0f18882f1d1c5345cd05d3031b4c65dc1b902c744f3a","target":"record","created_at":"2026-05-17T23:38:45Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"715aa306ef7fc43efc29c6b8f31619884065434b7f7310d8bce8aba1b16c8446","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2020-07-28T08:34:04Z","title_canon_sha256":"fe93aca2c56e0c722572fa55194f449de0bdf4351c41094903d025e445c17514"},"schema_version":"1.0","source":{"id":"2007.14062","kind":"arxiv","version":2}},"canonical_sha256":"5f078031fc13b54a9c6f34a8b4e962c9ba684e49e63756b2309e9e3feca98726","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5f078031fc13b54a9c6f34a8b4e962c9ba684e49e63756b2309e9e3feca98726","first_computed_at":"2026-05-17T23:38:45.913475Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:45.913475Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"AnL1cMm6eigkbbnK5WhK7aW29OXyAWv+IgiH2+XS0Qkm8+H+noUAHEF7Qaw50sQFN+rDJaPYAkkWZMl/17KWAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:45.914063Z","signed_message":"canonical_sha256_bytes"},"source_id":"2007.14062","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:53f07e8de89b747a7bbe0f18882f1d1c5345cd05d3031b4c65dc1b902c744f3a","sha256:964a5f91753dadd7c2fbb50c06a44fce8c9c2930b1df794ace64927238b4495b"],"state_sha256":"54854f1338a016212c916446baa2377648558e278d735b5f0b9cb36a7ae65a36"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OkNaAMbQHYKo/TecHJQrx055Ub4UfttCKufq1+9v4npn6CMf4JYt2RkbKXsxHQIhZNVYbm2HeBYaWrsJ9VtMAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T10:16:59.363729Z","bundle_sha256":"b1bf9906359d08ef2b251cae2fb49099040321327f446ab6e7dc65336a489d0e"}}