{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:HCXI3JNJYMNAWEAN43BEVXF4LD","short_pith_number":"pith:HCXI3JNJ","canonical_record":{"source":{"id":"1611.06933","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-21T18:30:17Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"7bea0aaf3585d6f0d4812c49e45bd7c10f086162b359b1e9a776fe15976ae810","abstract_canon_sha256":"bbebc9d673e3bfa6eb8f2efe5759c31637f2c2e589a4899ed2a09d1282dfb027"},"schema_version":"1.0"},"canonical_sha256":"38ae8da5a9c31a0b100de6c24adcbc58e93ced1a5537a062cf2cafed7fbb994e","source":{"kind":"arxiv","id":"1611.06933","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.06933","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"arxiv_version","alias_value":"1611.06933v1","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.06933","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"pith_short_12","alias_value":"HCXI3JNJYMNA","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"HCXI3JNJYMNAWEAN","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"HCXI3JNJ","created_at":"2026-05-18T12:30:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:HCXI3JNJYMNAWEAN43BEVXF4LD","target":"record","payload":{"canonical_record":{"source":{"id":"1611.06933","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-21T18:30:17Z","cross_cats_sorted":["cs.CL","stat.ML"],"title_canon_sha256":"7bea0aaf3585d6f0d4812c49e45bd7c10f086162b359b1e9a776fe15976ae810","abstract_canon_sha256":"bbebc9d673e3bfa6eb8f2efe5759c31637f2c2e589a4899ed2a09d1282dfb027"},"schema_version":"1.0"},"canonical_sha256":"38ae8da5a9c31a0b100de6c24adcbc58e93ced1a5537a062cf2cafed7fbb994e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:57:32.489493Z","signature_b64":"BhvHsZXqCDGb0YXP9X3MCUyDz2/k0WZp1eoU1mtaqJNBxP5lp5b39VsF/EyRXt+yRO/G2nm5qi6TSyxJ/4RNAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"38ae8da5a9c31a0b100de6c24adcbc58e93ced1a5537a062cf2cafed7fbb994e","last_reissued_at":"2026-05-18T00:57:32.488800Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:57:32.488800Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1611.06933","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:57:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WXD64gvJRnD8XALPXscFFWlhL1Uxq4V5QnLxODxKRQ0L6uRhROiKJU6MT830Y9YyxT6vj4uNbTkiG8M05797Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T19:56:43.108631Z"},"content_sha256":"76ffd52cdb328da7090533108f38e463f514f0825ff6cf9936e2c59a32d923df","schema_version":"1.0","event_id":"sha256:76ffd52cdb328da7090533108f38e463f514f0825ff6cf9936e2c59a32d923df"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:HCXI3JNJYMNAWEAN43BEVXF4LD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Unsupervised Learning for Lexicon-Based Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","stat.ML"],"primary_cat":"cs.LG","authors_text":"Jacob Eisenstein","submitted_at":"2016-11-21T18:30:17Z","abstract_excerpt":"In lexicon-based classification, documents are assigned labels by comparing the number of words that appear from two opposed lexicons, such as positive and negative sentiment. Creating such words lists is often easier than labeling instances, and they can be debugged by non-experts if classification performance is unsatisfactory. However, there is little analysis or justification of this classification heuristic. This paper describes a set of assumptions that can be used to derive a probabilistic justification for lexicon-based classification, as well as an analysis of its expected accuracy. O"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.06933","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:57:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"w48LnKGTcxwrtKgwryyIjEU6v06B12+lj5Bj50VcUyNyHwV8zXKNfXqKuixDQZO26JawL3MHfICfy/QrjJkFCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T19:56:43.109272Z"},"content_sha256":"61183de667b933136a6f3c53799eb0863519ad6efcdb8196ca9f51d3d175f737","schema_version":"1.0","event_id":"sha256:61183de667b933136a6f3c53799eb0863519ad6efcdb8196ca9f51d3d175f737"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HCXI3JNJYMNAWEAN43BEVXF4LD/bundle.json","state_url":"https://pith.science/pith/HCXI3JNJYMNAWEAN43BEVXF4LD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HCXI3JNJYMNAWEAN43BEVXF4LD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T19:56:43Z","links":{"resolver":"https://pith.science/pith/HCXI3JNJYMNAWEAN43BEVXF4LD","bundle":"https://pith.science/pith/HCXI3JNJYMNAWEAN43BEVXF4LD/bundle.json","state":"https://pith.science/pith/HCXI3JNJYMNAWEAN43BEVXF4LD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HCXI3JNJYMNAWEAN43BEVXF4LD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:HCXI3JNJYMNAWEAN43BEVXF4LD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bbebc9d673e3bfa6eb8f2efe5759c31637f2c2e589a4899ed2a09d1282dfb027","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-21T18:30:17Z","title_canon_sha256":"7bea0aaf3585d6f0d4812c49e45bd7c10f086162b359b1e9a776fe15976ae810"},"schema_version":"1.0","source":{"id":"1611.06933","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1611.06933","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"arxiv_version","alias_value":"1611.06933v1","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.06933","created_at":"2026-05-18T00:57:32Z"},{"alias_kind":"pith_short_12","alias_value":"HCXI3JNJYMNA","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"HCXI3JNJYMNAWEAN","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"HCXI3JNJ","created_at":"2026-05-18T12:30:19Z"}],"graph_snapshots":[{"event_id":"sha256:61183de667b933136a6f3c53799eb0863519ad6efcdb8196ca9f51d3d175f737","target":"graph","created_at":"2026-05-18T00:57:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In lexicon-based classification, documents are assigned labels by comparing the number of words that appear from two opposed lexicons, such as positive and negative sentiment. Creating such words lists is often easier than labeling instances, and they can be debugged by non-experts if classification performance is unsatisfactory. However, there is little analysis or justification of this classification heuristic. This paper describes a set of assumptions that can be used to derive a probabilistic justification for lexicon-based classification, as well as an analysis of its expected accuracy. O","authors_text":"Jacob Eisenstein","cross_cats":["cs.CL","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-21T18:30:17Z","title":"Unsupervised Learning for Lexicon-Based Classification"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.06933","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:76ffd52cdb328da7090533108f38e463f514f0825ff6cf9936e2c59a32d923df","target":"record","created_at":"2026-05-18T00:57:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bbebc9d673e3bfa6eb8f2efe5759c31637f2c2e589a4899ed2a09d1282dfb027","cross_cats_sorted":["cs.CL","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-21T18:30:17Z","title_canon_sha256":"7bea0aaf3585d6f0d4812c49e45bd7c10f086162b359b1e9a776fe15976ae810"},"schema_version":"1.0","source":{"id":"1611.06933","kind":"arxiv","version":1}},"canonical_sha256":"38ae8da5a9c31a0b100de6c24adcbc58e93ced1a5537a062cf2cafed7fbb994e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"38ae8da5a9c31a0b100de6c24adcbc58e93ced1a5537a062cf2cafed7fbb994e","first_computed_at":"2026-05-18T00:57:32.488800Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:57:32.488800Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"BhvHsZXqCDGb0YXP9X3MCUyDz2/k0WZp1eoU1mtaqJNBxP5lp5b39VsF/EyRXt+yRO/G2nm5qi6TSyxJ/4RNAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:57:32.489493Z","signed_message":"canonical_sha256_bytes"},"source_id":"1611.06933","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:76ffd52cdb328da7090533108f38e463f514f0825ff6cf9936e2c59a32d923df","sha256:61183de667b933136a6f3c53799eb0863519ad6efcdb8196ca9f51d3d175f737"],"state_sha256":"4e81950038e8b89bb024fc982ac7f6e4b3210eb94e2a4b93835d55a0dbc02f44"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2ZcVdQ5OcqYHrmEzK7p8y3l/gCSvSo/IGIz85MSjdRde81qvAWrdkWx572dsJIA0tqrGc8dl9LrYtKdQq/vEDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T19:56:43.113472Z","bundle_sha256":"ef771d6ff5e1fc2d3602013ca32115339eeb8aa67965934531744da0e683c747"}}