{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:I452AMLSZVTA4RBYR335OPRJRE","short_pith_number":"pith:I452AMLS","schema_version":"1.0","canonical_sha256":"473ba03172cd660e44388ef7d73e29892ff73fd0832dd623106b5271a5755b36","source":{"kind":"arxiv","id":"2310.15154","version":1},"attestation_state":"computed","paper":{"title":"Linear Representations of Sentiment in Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Sentiment in large language models is captured by one direction in activation space, with positive and negative at opposite poles.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Atticus Geiger, Curt Tigges, Neel Nanda, Oskar John Hollinsworth","submitted_at":"2023-10-23T17:55:31Z","abstract_excerpt":"Sentiment is a pervasive feature in natural language text, yet it is an open question how sentiment is represented within Large Language Models (LLMs). In this study, we reveal that across a range of models, sentiment is represented linearly: a single direction in activation space mostly captures the feature across a range of tasks with one extreme for positive and the other for negative. Through causal interventions, we isolate this direction and show it is causally relevant in both toy tasks and real world datasets such as Stanford Sentiment Treebank. Through this case study we model a thoro"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2310.15154","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2023-10-23T17:55:31Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"021e9c596b91e444484c14a187a7d3dc84869cdd4b13f68ee4532b1dffe16db7","abstract_canon_sha256":"ad633a244a79a763073ea1df51bc22044cae72f9b828905346cab0a22cbf0868"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:52.529627Z","signature_b64":"0AxnbSg0saqboJbW1z/Qt0/IuJ/LT+ywshKVeQoRTVT69V6+UIUrOepHUeFp2eqkeaDfcZAIipAZDittSed/Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"473ba03172cd660e44388ef7d73e29892ff73fd0832dd623106b5271a5755b36","last_reissued_at":"2026-05-17T23:38:52.529178Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:52.529178Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Linear Representations of Sentiment in Large Language Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Sentiment in large language models is captured by one direction in activation space, with positive and negative at opposite poles.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Atticus Geiger, Curt Tigges, Neel Nanda, Oskar John Hollinsworth","submitted_at":"2023-10-23T17:55:31Z","abstract_excerpt":"Sentiment is a pervasive feature in natural language text, yet it is an open question how sentiment is represented within Large Language Models (LLMs). In this study, we reveal that across a range of models, sentiment is represented linearly: a single direction in activation space mostly captures the feature across a range of tasks with one extreme for positive and the other for negative. Through causal interventions, we isolate this direction and show it is causally relevant in both toy tasks and real world datasets such as Stanford Sentiment Treebank. Through this case study we model a thoro"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"sentiment is represented linearly: a single direction in activation space mostly captures the feature across a range of tasks with one extreme for positive and the other for negative.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the identified direction is the primary and stable representation of sentiment rather than one of several correlated directions that happen to align on the chosen datasets and models.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Sentiment is represented as a single linear direction in LLM activation space that is causally relevant across tasks and is summarized at punctuation and names in addition to charged words.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Sentiment in large language models is captured by one direction in activation space, with positive and negative at opposite poles.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"723ea1bd44ac9b4ad91a7f845cf064cb9e939837ad4f7cde5bc3caa21bd41d40"},"source":{"id":"2310.15154","kind":"arxiv","version":1},"verdict":{"id":"fa3b0018-a569-4c41-a321-403d043b76fd","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T12:37:30.988087Z","strongest_claim":"sentiment is represented linearly: a single direction in activation space mostly captures the feature across a range of tasks with one extreme for positive and the other for negative.","one_line_summary":"Sentiment is represented as a single linear direction in LLM activation space that is causally relevant across tasks and is summarized at punctuation and names in addition to charged words.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the identified direction is the primary and stable representation of sentiment rather than one of several correlated directions that happen to align on the chosen datasets and models.","pith_extraction_headline":"Sentiment in large language models is captured by one direction in activation space, with positive and negative at opposite poles."},"references":{"count":122,"sample":[{"doi":"","year":2021,"title":"Eliciting latent knowledge: How to tell if your eyes deceive you , author=. 2021 , month=","work_id":"97920acf-6383-4636-b93b-cf646b7b9e8f","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1901,"title":"Karl Pearson F.R.S. , title =. The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science , volume =. 1901 , publisher =","work_id":"b8ab6ba1-6f30-436e-8feb-8eaa39326473","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Information Theory, IEEE Transactions on , volume=","work_id":"87b0e008-4246-4fce-9c7c-fd4b36790357","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":1958,"title":"Journal of the Royal Statistical Society: Series B (Methodological) , volume=","work_id":"7e058df2-2b5c-46e8-b8f1-a584271c6e0b","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Finding Alignments Between Interpretable Causal Variables and Distributed Neural Representations , author=. 2023 , eprint=","work_id":"cbb90bdc-7161-4182-a54f-aa9b25aaf24a","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":122,"snapshot_sha256":"8808a5fa2619ec3cd2a54608ce5291a941fe210471ba586ef8acd348a1f0a3ad","internal_anchors":1},"formal_canon":{"evidence_count":2,"snapshot_sha256":"06ce9b182fafc8bb3d45d9a56cc920cce5d23b4fa4ded86a9e6959511795d9f9"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2310.15154","created_at":"2026-05-17T23:38:52.529246+00:00"},{"alias_kind":"arxiv_version","alias_value":"2310.15154v1","created_at":"2026-05-17T23:38:52.529246+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2310.15154","created_at":"2026-05-17T23:38:52.529246+00:00"},{"alias_kind":"pith_short_12","alias_value":"I452AMLSZVTA","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"I452AMLSZVTA4RBY","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"I452AMLS","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":23,"internal_anchor_count":23,"sample":[{"citing_arxiv_id":"2605.15455","citing_title":"Multi-Turn Neural Transparency: Surfacing Neural Activations Improves User Calibration to LLM Behavioral Drift","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"2510.24941","citing_title":"Can Aha Moments Be Fake? Towards Quantifying Decorative and True Thinking in Chain-of-Thought","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2511.02135","citing_title":"Graph-Based Alternatives to LLMs for Human Simulation","ref_index":73,"is_internal_anchor":true},{"citing_arxiv_id":"2404.15255","citing_title":"How to use and interpret activation patching","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14075","citing_title":"Rethinking Layer Relevance in Large Language Models Beyond Cosine Similarity","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12809","citing_title":"Correcting Influence: Unboxing LLM Outputs with Orthogonal Latent Spaces","ref_index":84,"is_internal_anchor":true},{"citing_arxiv_id":"2604.02608","citing_title":"Steerable but Not Decodable: Function Vectors Operate Beyond the Logit Lens","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2406.11717","citing_title":"Refusal in Language Models Is Mediated by a Single Direction","ref_index":190,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27169","citing_title":"Semantic Structure of Feature Space in Large Language Models","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09967","citing_title":"Tensor Product Representation Probes Reveal Shared Structure Across Linear Directions","ref_index":24,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09314","citing_title":"How LLMs Are Persuaded: A Few Attention Heads, Rerouted","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09239","citing_title":"Repeated-Token Counting Reveals a Dissociation Between Representations and Outputs","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2312.06681","citing_title":"Steering Llama 2 via Contrastive Activation Addition","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05653","citing_title":"Negative Before Positive: Asymmetric Valence Processing in Large Language Models","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19052","citing_title":"Cell-Based Representation of Relational Binding in Language Models","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07729","citing_title":"Emotion Concepts and their Function in a Large Language Model","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08846","citing_title":"Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs","ref_index":98,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07148","citing_title":"Uncovering and Shaping the Latent Representation of 3D Scene Topology in Vision-Language Models","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07990","citing_title":"Tool Calling is Linearly Readable and Steerable in Language Models","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06979","citing_title":"PLOT: Progressive Localization via Optimal Transport in Neural Causal Abstraction","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07886","citing_title":"Linear Representations of Hierarchical Concepts in Language Models","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18519","citing_title":"LLM Safety From Within: Detecting Harmful Content with Internal Representations","ref_index":75,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19678","citing_title":"Exploring Language-Agnosticity in Function Vectors: A Case Study in Machine Translation","ref_index":19,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE","json":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE.json","graph_json":"https://pith.science/api/pith-number/I452AMLSZVTA4RBYR335OPRJRE/graph.json","events_json":"https://pith.science/api/pith-number/I452AMLSZVTA4RBYR335OPRJRE/events.json","paper":"https://pith.science/paper/I452AMLS"},"agent_actions":{"view_html":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE","download_json":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE.json","view_paper":"https://pith.science/paper/I452AMLS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2310.15154&json=true","fetch_graph":"https://pith.science/api/pith-number/I452AMLSZVTA4RBYR335OPRJRE/graph.json","fetch_events":"https://pith.science/api/pith-number/I452AMLSZVTA4RBYR335OPRJRE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE/action/storage_attestation","attest_author":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE/action/author_attestation","sign_citation":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE/action/citation_signature","submit_replication":"https://pith.science/pith/I452AMLSZVTA4RBYR335OPRJRE/action/replication_record"}},"created_at":"2026-05-17T23:38:52.529246+00:00","updated_at":"2026-05-17T23:38:52.529246+00:00"}