{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:JY4SI7Q6T7QG4GHPEII447XM2E","short_pith_number":"pith:JY4SI7Q6","schema_version":"1.0","canonical_sha256":"4e39247e1e9fe06e18ef2211ce7eecd1349d251e258a02126e14be42a688a3df","source":{"kind":"arxiv","id":"1611.01576","version":2},"attestation_state":"computed","paper":{"title":"Quasi-Recurrent Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.LG"],"primary_cat":"cs.NE","authors_text":"Caiming Xiong, James Bradbury, Richard Socher, Stephen Merity","submitted_at":"2016-11-05T00:31:25Z","abstract_excerpt":"Recurrent neural networks are a powerful tool for modeling sequential data, but the dependence of each timestep's computation on the previous timestep's output limits parallelism and makes RNNs unwieldy for very long sequences. We introduce quasi-recurrent neural networks (QRNNs), an approach to neural sequence modeling that alternates convolutional layers, which apply in parallel across timesteps, and a minimalist recurrent pooling function that applies in parallel across channels. Despite lacking trainable recurrent layers, stacked QRNNs have better predictive accuracy than stacked LSTMs of "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1611.01576","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.NE","submitted_at":"2016-11-05T00:31:25Z","cross_cats_sorted":["cs.AI","cs.CL","cs.LG"],"title_canon_sha256":"7f29f9e78fb6739e3ea025913ee0ab4fde2089acfd49ab0f175bfd9a7b395f6e","abstract_canon_sha256":"cbdb8842bdaa394272f57cc6da3d6b8359bdfce5ce1c0f5b22acafbd5964f015"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:57:37.114201Z","signature_b64":"N45GNaXc7Tr6yBXgKBkysSBzq6Go6MbitvEgW0b2+Z37bLo0H9L3N25tscRLt2UEoAhnDne6HUrAtSC0pJHrDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4e39247e1e9fe06e18ef2211ce7eecd1349d251e258a02126e14be42a688a3df","last_reissued_at":"2026-05-18T00:57:37.113602Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:57:37.113602Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Quasi-Recurrent Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.LG"],"primary_cat":"cs.NE","authors_text":"Caiming Xiong, James Bradbury, Richard Socher, Stephen Merity","submitted_at":"2016-11-05T00:31:25Z","abstract_excerpt":"Recurrent neural networks are a powerful tool for modeling sequential data, but the dependence of each timestep's computation on the previous timestep's output limits parallelism and makes RNNs unwieldy for very long sequences. We introduce quasi-recurrent neural networks (QRNNs), an approach to neural sequence modeling that alternates convolutional layers, which apply in parallel across timesteps, and a minimalist recurrent pooling function that applies in parallel across channels. Despite lacking trainable recurrent layers, stacked QRNNs have better predictive accuracy than stacked LSTMs of "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.01576","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1611.01576","created_at":"2026-05-18T00:57:37.113721+00:00"},{"alias_kind":"arxiv_version","alias_value":"1611.01576v2","created_at":"2026-05-18T00:57:37.113721+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.01576","created_at":"2026-05-18T00:57:37.113721+00:00"},{"alias_kind":"pith_short_12","alias_value":"JY4SI7Q6T7QG","created_at":"2026-05-18T12:30:25.849896+00:00"},{"alias_kind":"pith_short_16","alias_value":"JY4SI7Q6T7QG4GHP","created_at":"2026-05-18T12:30:25.849896+00:00"},{"alias_kind":"pith_short_8","alias_value":"JY4SI7Q6","created_at":"2026-05-18T12:30:25.849896+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":5,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1907.03187","citing_title":"Applying a Pre-trained Language Model to Spanish Twitter Humor Prediction","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"1911.05507","citing_title":"Compressive Transformers for Long-Range Sequence Modelling","ref_index":119,"is_internal_anchor":true},{"citing_arxiv_id":"2402.19427","citing_title":"Griffin: Mixing Gated Linear Recurrences with Local Attention for Efficient Language Models","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2405.21060","citing_title":"Transformers are SSMs: Generalized Models and Efficient Algorithms Through Structured State Space Duality","ref_index":16,"is_internal_anchor":false},{"citing_arxiv_id":"2312.00752","citing_title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces","ref_index":11,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E","json":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E.json","graph_json":"https://pith.science/api/pith-number/JY4SI7Q6T7QG4GHPEII447XM2E/graph.json","events_json":"https://pith.science/api/pith-number/JY4SI7Q6T7QG4GHPEII447XM2E/events.json","paper":"https://pith.science/paper/JY4SI7Q6"},"agent_actions":{"view_html":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E","download_json":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E.json","view_paper":"https://pith.science/paper/JY4SI7Q6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1611.01576&json=true","fetch_graph":"https://pith.science/api/pith-number/JY4SI7Q6T7QG4GHPEII447XM2E/graph.json","fetch_events":"https://pith.science/api/pith-number/JY4SI7Q6T7QG4GHPEII447XM2E/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E/action/timestamp_anchor","attest_storage":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E/action/storage_attestation","attest_author":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E/action/author_attestation","sign_citation":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E/action/citation_signature","submit_replication":"https://pith.science/pith/JY4SI7Q6T7QG4GHPEII447XM2E/action/replication_record"}},"created_at":"2026-05-18T00:57:37.113721+00:00","updated_at":"2026-05-18T00:57:37.113721+00:00"}