{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:FOP6ZOFQK6YP4NVQLOBS74QMD4","short_pith_number":"pith:FOP6ZOFQ","schema_version":"1.0","canonical_sha256":"2b9fecb8b057b0fe36b05b832ff20c1f04ea1adbdf363af5e140ce91a3e143b0","source":{"kind":"arxiv","id":"1704.01444","version":2},"attestation_state":"computed","paper":{"title":"Learning to Generate Reviews and Discovering Sentiment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.NE"],"primary_cat":"cs.LG","authors_text":"Alec Radford, Ilya Sutskever, Rafal Jozefowicz","submitted_at":"2017-04-05T14:20:28Z","abstract_excerpt":"We explore the properties of byte-level recurrent language models. When given sufficient amounts of capacity, training data, and compute time, the representations learned by these models include disentangled features corresponding to high-level concepts. Specifically, we find a single unit which performs sentiment analysis. These representations, learned in an unsupervised manner, achieve state of the art on the binary subset of the Stanford Sentiment Treebank. They are also very data efficient. When using only a handful of labeled examples, our approach matches the performance of strong basel"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1704.01444","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-04-05T14:20:28Z","cross_cats_sorted":["cs.CL","cs.NE"],"title_canon_sha256":"a145c0c52d7c8df5110416d353f22032676d470ae8d976ce8af55593cc4a5dd3","abstract_canon_sha256":"603c34142414d37913270dabf158a0063ee0e6845ba91e6dc7b984f6f6cc6bba"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:46:54.864243Z","signature_b64":"T+f8g+Mz3JpAFOtjnqZfEda/mKfgZqTb+KFNw3W1/1WhyCxQOt5pLthv8Hp+PYcFFL1Cry4fxv8UC5Aj34kjCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2b9fecb8b057b0fe36b05b832ff20c1f04ea1adbdf363af5e140ce91a3e143b0","last_reissued_at":"2026-05-18T00:46:54.863679Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:46:54.863679Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning to Generate Reviews and Discovering Sentiment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.NE"],"primary_cat":"cs.LG","authors_text":"Alec Radford, Ilya Sutskever, Rafal Jozefowicz","submitted_at":"2017-04-05T14:20:28Z","abstract_excerpt":"We explore the properties of byte-level recurrent language models. When given sufficient amounts of capacity, training data, and compute time, the representations learned by these models include disentangled features corresponding to high-level concepts. Specifically, we find a single unit which performs sentiment analysis. These representations, learned in an unsupervised manner, achieve state of the art on the binary subset of the Stanford Sentiment Treebank. They are also very data efficient. When using only a handful of labeled examples, our approach matches the performance of strong basel"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.01444","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1704.01444","created_at":"2026-05-18T00:46:54.863771+00:00"},{"alias_kind":"arxiv_version","alias_value":"1704.01444v2","created_at":"2026-05-18T00:46:54.863771+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.01444","created_at":"2026-05-18T00:46:54.863771+00:00"},{"alias_kind":"pith_short_12","alias_value":"FOP6ZOFQK6YP","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_16","alias_value":"FOP6ZOFQK6YP4NVQ","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_8","alias_value":"FOP6ZOFQ","created_at":"2026-05-18T12:31:15.632608+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":14,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"1907.00462","citing_title":"Inter and Intra Document Attention for Depression Risk Assessment","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"1907.00455","citing_title":"Multiplicative Models for Recurrent Language Modeling","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"1907.06690","citing_title":"A Scalable Framework for Multilevel Streaming Data Analytics using Deep Learning","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2405.07987","citing_title":"The Platonic Representation Hypothesis","ref_index":130,"is_internal_anchor":true},{"citing_arxiv_id":"2211.00593","citing_title":"Interpretability in the Wild: a Circuit for Indirect Object Identification in GPT-2 small","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2309.16588","citing_title":"Vision Transformers Need Registers","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2205.11487","citing_title":"Photorealistic Text-to-Image Diffusion Models with Deep Language Understanding","ref_index":46,"is_internal_anchor":false},{"citing_arxiv_id":"2605.09630","citing_title":"Scratchpad Patching: Decoupling Compute from Patch Size in Byte-Level Language Models","ref_index":75,"is_internal_anchor":false},{"citing_arxiv_id":"2209.10652","citing_title":"Toy Models of Superposition","ref_index":5,"is_internal_anchor":false},{"citing_arxiv_id":"2309.07864","citing_title":"The Rise and Potential of Large Language Model Based Agents: A Survey","ref_index":49,"is_internal_anchor":false},{"citing_arxiv_id":"2303.18223","citing_title":"A Survey of Large Language Models","ref_index":123,"is_internal_anchor":false},{"citing_arxiv_id":"1909.08593","citing_title":"Fine-Tuning Language Models from Human Preferences","ref_index":22,"is_internal_anchor":false},{"citing_arxiv_id":"2604.14090","citing_title":"From Weights to Activations: Is Steering the Next Frontier of Adaptation?","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2304.07193","citing_title":"DINOv2: Learning Robust Visual Features without Supervision","ref_index":20,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4","json":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4.json","graph_json":"https://pith.science/api/pith-number/FOP6ZOFQK6YP4NVQLOBS74QMD4/graph.json","events_json":"https://pith.science/api/pith-number/FOP6ZOFQK6YP4NVQLOBS74QMD4/events.json","paper":"https://pith.science/paper/FOP6ZOFQ"},"agent_actions":{"view_html":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4","download_json":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4.json","view_paper":"https://pith.science/paper/FOP6ZOFQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1704.01444&json=true","fetch_graph":"https://pith.science/api/pith-number/FOP6ZOFQK6YP4NVQLOBS74QMD4/graph.json","fetch_events":"https://pith.science/api/pith-number/FOP6ZOFQK6YP4NVQLOBS74QMD4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4/action/storage_attestation","attest_author":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4/action/author_attestation","sign_citation":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4/action/citation_signature","submit_replication":"https://pith.science/pith/FOP6ZOFQK6YP4NVQLOBS74QMD4/action/replication_record"}},"created_at":"2026-05-18T00:46:54.863771+00:00","updated_at":"2026-05-18T00:46:54.863771+00:00"}