{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:OLTV2PMKJUAUDNK3TKNQ56HWK4","short_pith_number":"pith:OLTV2PMK","schema_version":"1.0","canonical_sha256":"72e75d3d8a4d0141b55b9a9b0ef8f6572c775cf5f4e597534579fc364ce403bb","source":{"kind":"arxiv","id":"1804.00857","version":1},"attestation_state":"computed","paper":{"title":"Bi-Directional Block Self-Attention for Fast and Memory-Efficient Sequence Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Chengqi Zhang, Guodong Long, Jing Jiang, Tao Shen, Tianyi Zhou","submitted_at":"2018-04-03T07:41:10Z","abstract_excerpt":"Recurrent neural networks (RNN), convolutional neural networks (CNN) and self-attention networks (SAN) are commonly used to produce context-aware representations. RNN can capture long-range dependency but is hard to parallelize and not time-efficient. CNN focuses on local dependency but does not perform well on some tasks. SAN can model both such dependencies via highly parallelizable computation, but memory requirement grows rapidly in line with sequence length. In this paper, we propose a model, called \"bi-directional block self-attention network (Bi-BloSAN)\", for RNN/CNN-free sequence encod"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1804.00857","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-03T07:41:10Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"2739fe244635584d81e5eda2b44ea1aff5e2de7d1eb21a488f76c76f4b8bdf86","abstract_canon_sha256":"eef30ba983d4616a1bd60e0bc74bbe9d4cba49d63cb8c6850f4c8f35940d344a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:19:31.948047Z","signature_b64":"fKzGZdLn03ET+JIkYQwMf0IUj6YwJNZYr67HYa4Inz/1Y9HgPXKJONyP1o7849vYZysWB4i2FScw8tdpe0oiCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"72e75d3d8a4d0141b55b9a9b0ef8f6572c775cf5f4e597534579fc364ce403bb","last_reissued_at":"2026-05-18T00:19:31.947488Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:19:31.947488Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Bi-Directional Block Self-Attention for Fast and Memory-Efficient Sequence Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Chengqi Zhang, Guodong Long, Jing Jiang, Tao Shen, Tianyi Zhou","submitted_at":"2018-04-03T07:41:10Z","abstract_excerpt":"Recurrent neural networks (RNN), convolutional neural networks (CNN) and self-attention networks (SAN) are commonly used to produce context-aware representations. RNN can capture long-range dependency but is hard to parallelize and not time-efficient. CNN focuses on local dependency but does not perform well on some tasks. SAN can model both such dependencies via highly parallelizable computation, but memory requirement grows rapidly in line with sequence length. In this paper, we propose a model, called \"bi-directional block self-attention network (Bi-BloSAN)\", for RNN/CNN-free sequence encod"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.00857","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1804.00857","created_at":"2026-05-18T00:19:31.947597+00:00"},{"alias_kind":"arxiv_version","alias_value":"1804.00857v1","created_at":"2026-05-18T00:19:31.947597+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.00857","created_at":"2026-05-18T00:19:31.947597+00:00"},{"alias_kind":"pith_short_12","alias_value":"OLTV2PMKJUAU","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"OLTV2PMKJUAUDNK3","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"OLTV2PMK","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"1909.11942","citing_title":"ALBERT: A Lite BERT for Self-supervised Learning of Language Representations","ref_index":31,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4","json":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4.json","graph_json":"https://pith.science/api/pith-number/OLTV2PMKJUAUDNK3TKNQ56HWK4/graph.json","events_json":"https://pith.science/api/pith-number/OLTV2PMKJUAUDNK3TKNQ56HWK4/events.json","paper":"https://pith.science/paper/OLTV2PMK"},"agent_actions":{"view_html":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4","download_json":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4.json","view_paper":"https://pith.science/paper/OLTV2PMK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1804.00857&json=true","fetch_graph":"https://pith.science/api/pith-number/OLTV2PMKJUAUDNK3TKNQ56HWK4/graph.json","fetch_events":"https://pith.science/api/pith-number/OLTV2PMKJUAUDNK3TKNQ56HWK4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4/action/storage_attestation","attest_author":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4/action/author_attestation","sign_citation":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4/action/citation_signature","submit_replication":"https://pith.science/pith/OLTV2PMKJUAUDNK3TKNQ56HWK4/action/replication_record"}},"created_at":"2026-05-18T00:19:31.947597+00:00","updated_at":"2026-05-18T00:19:31.947597+00:00"}