{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2012:FCDXOZSFFOQ5DIE2GQ3HZOFMR5","short_pith_number":"pith:FCDXOZSF","schema_version":"1.0","canonical_sha256":"28877766452ba1d1a09a34367cb8ac8f7e33a003621f3a77eedc14f7bbae6d86","source":{"kind":"arxiv","id":"1212.3228","version":1},"attestation_state":"computed","paper":{"title":"Language Without Words: A Pointillist Model for Natural Language Processing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.SI"],"primary_cat":"cs.CL","authors_text":"Anhei Shu, Dan Wallach, David Phipps, George Luger, Jedidiah Crandall, Mohit Tiwari, Peiyou Song","submitted_at":"2012-12-11T20:19:58Z","abstract_excerpt":"This paper explores two separate questions: Can we perform natural language processing tasks without a lexicon?; and, Should we? Existing natural language processing techniques are either based on words as units or use units such as grams only for basic classification tasks. How close can a machine come to reasoning about the meanings of words and phrases in a corpus without using any lexicon, based only on grams?\n  Our own motivation for posing this question is based on our efforts to find popular trends in words and phrases from online Chinese social media. This form of written Chinese uses "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1212.3228","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2012-12-11T20:19:58Z","cross_cats_sorted":["cs.IR","cs.SI"],"title_canon_sha256":"5b8de82280f6c6b6a6fc6b4eeb0c74a44b4f7f0a3a8fb0b0fe78c8e9be704874","abstract_canon_sha256":"c3b417b55e4a6527a8ac29fa973df75f27546c121719ec87b2d56743df506e97"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:38:33.492039Z","signature_b64":"zHcabUCGZp/1/9S5JA8AFyUsV2qf1/lgYpJhZXu62RkxpDBpYQFcMbimQ9nH3celjUdnVmVncvs7A9+72Di9BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"28877766452ba1d1a09a34367cb8ac8f7e33a003621f3a77eedc14f7bbae6d86","last_reissued_at":"2026-05-18T03:38:33.491347Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:38:33.491347Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Language Without Words: A Pointillist Model for Natural Language Processing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.SI"],"primary_cat":"cs.CL","authors_text":"Anhei Shu, Dan Wallach, David Phipps, George Luger, Jedidiah Crandall, Mohit Tiwari, Peiyou Song","submitted_at":"2012-12-11T20:19:58Z","abstract_excerpt":"This paper explores two separate questions: Can we perform natural language processing tasks without a lexicon?; and, Should we? Existing natural language processing techniques are either based on words as units or use units such as grams only for basic classification tasks. How close can a machine come to reasoning about the meanings of words and phrases in a corpus without using any lexicon, based only on grams?\n  Our own motivation for posing this question is based on our efforts to find popular trends in words and phrases from online Chinese social media. This form of written Chinese uses "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1212.3228","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1212.3228","created_at":"2026-05-18T03:38:33.491464+00:00"},{"alias_kind":"arxiv_version","alias_value":"1212.3228v1","created_at":"2026-05-18T03:38:33.491464+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1212.3228","created_at":"2026-05-18T03:38:33.491464+00:00"},{"alias_kind":"pith_short_12","alias_value":"FCDXOZSFFOQ5","created_at":"2026-05-18T12:27:06.952714+00:00"},{"alias_kind":"pith_short_16","alias_value":"FCDXOZSFFOQ5DIE2","created_at":"2026-05-18T12:27:06.952714+00:00"},{"alias_kind":"pith_short_8","alias_value":"FCDXOZSF","created_at":"2026-05-18T12:27:06.952714+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5","json":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5.json","graph_json":"https://pith.science/api/pith-number/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/graph.json","events_json":"https://pith.science/api/pith-number/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/events.json","paper":"https://pith.science/paper/FCDXOZSF"},"agent_actions":{"view_html":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5","download_json":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5.json","view_paper":"https://pith.science/paper/FCDXOZSF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1212.3228&json=true","fetch_graph":"https://pith.science/api/pith-number/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/graph.json","fetch_events":"https://pith.science/api/pith-number/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/action/storage_attestation","attest_author":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/action/author_attestation","sign_citation":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/action/citation_signature","submit_replication":"https://pith.science/pith/FCDXOZSFFOQ5DIE2GQ3HZOFMR5/action/replication_record"}},"created_at":"2026-05-18T03:38:33.491464+00:00","updated_at":"2026-05-18T03:38:33.491464+00:00"}