{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:BUTDHUMEY2CQUQZQH6HWBCITID","short_pith_number":"pith:BUTDHUME","canonical_record":{"source":{"id":"1712.09518","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-12-27T08:02:26Z","cross_cats_sorted":[],"title_canon_sha256":"bfdd83f24fd66966c18410dfda9d987c3bdd10f1f1020e03614e7a23d8c7ae6d","abstract_canon_sha256":"f039fe794734552ee088d19a4066254d469e7237fe2e79194e309a5d1e7ace44"},"schema_version":"1.0"},"canonical_sha256":"0d2633d184c6850a43303f8f60891340f85da2e245f435ed419daf207cbc1278","source":{"kind":"arxiv","id":"1712.09518","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.09518","created_at":"2026-05-18T00:27:08Z"},{"alias_kind":"arxiv_version","alias_value":"1712.09518v1","created_at":"2026-05-18T00:27:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.09518","created_at":"2026-05-18T00:27:08Z"},{"alias_kind":"pith_short_12","alias_value":"BUTDHUMEY2CQ","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"BUTDHUMEY2CQUQZQ","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"BUTDHUME","created_at":"2026-05-18T12:31:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:BUTDHUMEY2CQUQZQH6HWBCITID","target":"record","payload":{"canonical_record":{"source":{"id":"1712.09518","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-12-27T08:02:26Z","cross_cats_sorted":[],"title_canon_sha256":"bfdd83f24fd66966c18410dfda9d987c3bdd10f1f1020e03614e7a23d8c7ae6d","abstract_canon_sha256":"f039fe794734552ee088d19a4066254d469e7237fe2e79194e309a5d1e7ace44"},"schema_version":"1.0"},"canonical_sha256":"0d2633d184c6850a43303f8f60891340f85da2e245f435ed419daf207cbc1278","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:27:08.102544Z","signature_b64":"YCDaLfnGR8N4+26LLpKgowt7iHaEUZLLe9o3VtP0S6vJPlhOyzXnElp0Cp1ehuicdXArsGLR7D87QxdBMJPPCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0d2633d184c6850a43303f8f60891340f85da2e245f435ed419daf207cbc1278","last_reissued_at":"2026-05-18T00:27:08.102064Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:27:08.102064Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.09518","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:27:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"i0OgU/P33JWvTrx8X0tWcRzhBJlrY5cIU+VpBQJD5KlXGmFWzN5SMj0BxZe2oDsAfiYizBVXMDjyGOWjZ27kCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T22:58:43.788270Z"},"content_sha256":"d225f197408fbc5fc31c83259e20e0a2c6cfc69926ebc164cc1ee683edf88402","schema_version":"1.0","event_id":"sha256:d225f197408fbc5fc31c83259e20e0a2c6cfc69926ebc164cc1ee683edf88402"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:BUTDHUMEY2CQUQZQH6HWBCITID","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improving Text Normalization by Optimizing Nearest Neighbor Matching","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Asim Karim, Salman Ahmad Ansari, Usman Zafar","submitted_at":"2017-12-27T08:02:26Z","abstract_excerpt":"Text normalization is an essential task in the processing and analysis of social media that is dominated with informal writing. It aims to map informal words to their intended standard forms. Previously proposed text normalization approaches typically require manual selection of parameters for improved performance. In this paper, we present an automatic optimizationbased nearest neighbor matching approach for text normalization. This approach is motivated by the observation that text normalization is essentially a matching problem and nearest neighbor matching with an adaptive similarity funct"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.09518","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:27:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mB1PY6H7T9W/ZVcsw1pdyOEzIUz+83w2m3BzmTRrDOHtUn3EofMvl0LHL3hAoKlPEJ234q2fPl9XDP1LX+UcCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T22:58:43.788654Z"},"content_sha256":"3eb3daaf905bd87e00a92a6ebba596fe001d902bf5ff27a75d03ad07d0d3de2c","schema_version":"1.0","event_id":"sha256:3eb3daaf905bd87e00a92a6ebba596fe001d902bf5ff27a75d03ad07d0d3de2c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BUTDHUMEY2CQUQZQH6HWBCITID/bundle.json","state_url":"https://pith.science/pith/BUTDHUMEY2CQUQZQH6HWBCITID/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BUTDHUMEY2CQUQZQH6HWBCITID/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T22:58:43Z","links":{"resolver":"https://pith.science/pith/BUTDHUMEY2CQUQZQH6HWBCITID","bundle":"https://pith.science/pith/BUTDHUMEY2CQUQZQH6HWBCITID/bundle.json","state":"https://pith.science/pith/BUTDHUMEY2CQUQZQH6HWBCITID/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BUTDHUMEY2CQUQZQH6HWBCITID/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:BUTDHUMEY2CQUQZQH6HWBCITID","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f039fe794734552ee088d19a4066254d469e7237fe2e79194e309a5d1e7ace44","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-12-27T08:02:26Z","title_canon_sha256":"bfdd83f24fd66966c18410dfda9d987c3bdd10f1f1020e03614e7a23d8c7ae6d"},"schema_version":"1.0","source":{"id":"1712.09518","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.09518","created_at":"2026-05-18T00:27:08Z"},{"alias_kind":"arxiv_version","alias_value":"1712.09518v1","created_at":"2026-05-18T00:27:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.09518","created_at":"2026-05-18T00:27:08Z"},{"alias_kind":"pith_short_12","alias_value":"BUTDHUMEY2CQ","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"BUTDHUMEY2CQUQZQ","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"BUTDHUME","created_at":"2026-05-18T12:31:08Z"}],"graph_snapshots":[{"event_id":"sha256:3eb3daaf905bd87e00a92a6ebba596fe001d902bf5ff27a75d03ad07d0d3de2c","target":"graph","created_at":"2026-05-18T00:27:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Text normalization is an essential task in the processing and analysis of social media that is dominated with informal writing. It aims to map informal words to their intended standard forms. Previously proposed text normalization approaches typically require manual selection of parameters for improved performance. In this paper, we present an automatic optimizationbased nearest neighbor matching approach for text normalization. This approach is motivated by the observation that text normalization is essentially a matching problem and nearest neighbor matching with an adaptive similarity funct","authors_text":"Asim Karim, Salman Ahmad Ansari, Usman Zafar","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-12-27T08:02:26Z","title":"Improving Text Normalization by Optimizing Nearest Neighbor Matching"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.09518","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d225f197408fbc5fc31c83259e20e0a2c6cfc69926ebc164cc1ee683edf88402","target":"record","created_at":"2026-05-18T00:27:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f039fe794734552ee088d19a4066254d469e7237fe2e79194e309a5d1e7ace44","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2017-12-27T08:02:26Z","title_canon_sha256":"bfdd83f24fd66966c18410dfda9d987c3bdd10f1f1020e03614e7a23d8c7ae6d"},"schema_version":"1.0","source":{"id":"1712.09518","kind":"arxiv","version":1}},"canonical_sha256":"0d2633d184c6850a43303f8f60891340f85da2e245f435ed419daf207cbc1278","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0d2633d184c6850a43303f8f60891340f85da2e245f435ed419daf207cbc1278","first_computed_at":"2026-05-18T00:27:08.102064Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:27:08.102064Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"YCDaLfnGR8N4+26LLpKgowt7iHaEUZLLe9o3VtP0S6vJPlhOyzXnElp0Cp1ehuicdXArsGLR7D87QxdBMJPPCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:27:08.102544Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.09518","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d225f197408fbc5fc31c83259e20e0a2c6cfc69926ebc164cc1ee683edf88402","sha256:3eb3daaf905bd87e00a92a6ebba596fe001d902bf5ff27a75d03ad07d0d3de2c"],"state_sha256":"ac62bdbe0431fd4abc82a868e1b80d4dcd5193a92a2e88581eb2ef8cd509fcf9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/GBrjrkqFFC5dUvmXKQK9rPTW2B7gPawIwxyUygITkk1ZGFiSuI5FFnrVr3icQKbO3dHSaqw2iXsHPaucjXcAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T22:58:43.792220Z","bundle_sha256":"db30e03c16da4ab665a212fc0a98da6bcbb837d0b9d7bfc8b68e0f1d4cbbd383"}}