{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:3E3XUEQM2B6B76IA6SGCO76NCZ","short_pith_number":"pith:3E3XUEQM","canonical_record":{"source":{"id":"1804.08186","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-22T22:30:30Z","cross_cats_sorted":[],"title_canon_sha256":"365995498ff3f7250ed8930eaf3c747dbaae9d17dcf5307e3681f1f711895aae","abstract_canon_sha256":"259b8cb8e3ae813ebed80e4947c27ab8b3d8c923a9b411c53a15a0d0e10618a4"},"schema_version":"1.0"},"canonical_sha256":"d9377a120cd07c1ff900f48c277fcd1665d7fa9158ba87e170efa63c05c545ce","source":{"kind":"arxiv","id":"1804.08186","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.08186","created_at":"2026-05-18T00:00:13Z"},{"alias_kind":"arxiv_version","alias_value":"1804.08186v2","created_at":"2026-05-18T00:00:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.08186","created_at":"2026-05-18T00:00:13Z"},{"alias_kind":"pith_short_12","alias_value":"3E3XUEQM2B6B","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"3E3XUEQM2B6B76IA","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"3E3XUEQM","created_at":"2026-05-18T12:32:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:3E3XUEQM2B6B76IA6SGCO76NCZ","target":"record","payload":{"canonical_record":{"source":{"id":"1804.08186","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-22T22:30:30Z","cross_cats_sorted":[],"title_canon_sha256":"365995498ff3f7250ed8930eaf3c747dbaae9d17dcf5307e3681f1f711895aae","abstract_canon_sha256":"259b8cb8e3ae813ebed80e4947c27ab8b3d8c923a9b411c53a15a0d0e10618a4"},"schema_version":"1.0"},"canonical_sha256":"d9377a120cd07c1ff900f48c277fcd1665d7fa9158ba87e170efa63c05c545ce","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:13.261739Z","signature_b64":"xe46c8EyJHTvTH02zWfY6OPMLym9Y4sELiPB7nHE9ThuhD6EDemmwmyJKpV/8USu66LMd0izdhfE55xPQoClAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d9377a120cd07c1ff900f48c277fcd1665d7fa9158ba87e170efa63c05c545ce","last_reissued_at":"2026-05-18T00:00:13.261242Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:13.261242Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1804.08186","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"J3vV2zoBYdxQFt4aQDiZq+ZblcD5FJxsAyecJKO6Nil+TzC41/E18k5EjDEEDRqzEa0roz4+71Mhu+78rpwiDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T12:18:16.936375Z"},"content_sha256":"c6f72431b093aed76c7c83235813ff0bbc29f8f146aaa1d4138e0db7e27935d3","schema_version":"1.0","event_id":"sha256:c6f72431b093aed76c7c83235813ff0bbc29f8f146aaa1d4138e0db7e27935d3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:3E3XUEQM2B6B76IA6SGCO76NCZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Automatic Language Identification in Texts: A Survey","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Krister Lind\\'en, Marco Lui, Marcos Zampieri, Timothy Baldwin, Tommi Jauhiainen","submitted_at":"2018-04-22T22:30:30Z","abstract_excerpt":"Language identification (LI) is the problem of determining the natural language that a document or part thereof is written in. Automatic LI has been extensively researched for over fifty years. Today, LI is a key part of many text processing pipelines, as text processing techniques generally assume that the language of the input text is known. Research in this area has recently been especially active. This article provides a brief history of LI research, and an extensive survey of the features and methods used so far in the LI literature. For describing the features and methods we introduce a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.08186","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:00:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FrMIZMEdH4xeImNaiywY6HjBarfmA2uab/o7B2DRM/lc5ngO6VY+WvaK3YHvwD9cZPlrZB4EETc4ECt0uw7LDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T12:18:16.936722Z"},"content_sha256":"9708117546315fb4b95fccccf4a0681295d4ceee98ecad565fdefd22b7872999","schema_version":"1.0","event_id":"sha256:9708117546315fb4b95fccccf4a0681295d4ceee98ecad565fdefd22b7872999"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3E3XUEQM2B6B76IA6SGCO76NCZ/bundle.json","state_url":"https://pith.science/pith/3E3XUEQM2B6B76IA6SGCO76NCZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3E3XUEQM2B6B76IA6SGCO76NCZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T12:18:16Z","links":{"resolver":"https://pith.science/pith/3E3XUEQM2B6B76IA6SGCO76NCZ","bundle":"https://pith.science/pith/3E3XUEQM2B6B76IA6SGCO76NCZ/bundle.json","state":"https://pith.science/pith/3E3XUEQM2B6B76IA6SGCO76NCZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3E3XUEQM2B6B76IA6SGCO76NCZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:3E3XUEQM2B6B76IA6SGCO76NCZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"259b8cb8e3ae813ebed80e4947c27ab8b3d8c923a9b411c53a15a0d0e10618a4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-22T22:30:30Z","title_canon_sha256":"365995498ff3f7250ed8930eaf3c747dbaae9d17dcf5307e3681f1f711895aae"},"schema_version":"1.0","source":{"id":"1804.08186","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1804.08186","created_at":"2026-05-18T00:00:13Z"},{"alias_kind":"arxiv_version","alias_value":"1804.08186v2","created_at":"2026-05-18T00:00:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.08186","created_at":"2026-05-18T00:00:13Z"},{"alias_kind":"pith_short_12","alias_value":"3E3XUEQM2B6B","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_16","alias_value":"3E3XUEQM2B6B76IA","created_at":"2026-05-18T12:32:02Z"},{"alias_kind":"pith_short_8","alias_value":"3E3XUEQM","created_at":"2026-05-18T12:32:02Z"}],"graph_snapshots":[{"event_id":"sha256:9708117546315fb4b95fccccf4a0681295d4ceee98ecad565fdefd22b7872999","target":"graph","created_at":"2026-05-18T00:00:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Language identification (LI) is the problem of determining the natural language that a document or part thereof is written in. Automatic LI has been extensively researched for over fifty years. Today, LI is a key part of many text processing pipelines, as text processing techniques generally assume that the language of the input text is known. Research in this area has recently been especially active. This article provides a brief history of LI research, and an extensive survey of the features and methods used so far in the LI literature. For describing the features and methods we introduce a ","authors_text":"Krister Lind\\'en, Marco Lui, Marcos Zampieri, Timothy Baldwin, Tommi Jauhiainen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-22T22:30:30Z","title":"Automatic Language Identification in Texts: A Survey"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.08186","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c6f72431b093aed76c7c83235813ff0bbc29f8f146aaa1d4138e0db7e27935d3","target":"record","created_at":"2026-05-18T00:00:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"259b8cb8e3ae813ebed80e4947c27ab8b3d8c923a9b411c53a15a0d0e10618a4","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-04-22T22:30:30Z","title_canon_sha256":"365995498ff3f7250ed8930eaf3c747dbaae9d17dcf5307e3681f1f711895aae"},"schema_version":"1.0","source":{"id":"1804.08186","kind":"arxiv","version":2}},"canonical_sha256":"d9377a120cd07c1ff900f48c277fcd1665d7fa9158ba87e170efa63c05c545ce","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d9377a120cd07c1ff900f48c277fcd1665d7fa9158ba87e170efa63c05c545ce","first_computed_at":"2026-05-18T00:00:13.261242Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:00:13.261242Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"xe46c8EyJHTvTH02zWfY6OPMLym9Y4sELiPB7nHE9ThuhD6EDemmwmyJKpV/8USu66LMd0izdhfE55xPQoClAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:00:13.261739Z","signed_message":"canonical_sha256_bytes"},"source_id":"1804.08186","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c6f72431b093aed76c7c83235813ff0bbc29f8f146aaa1d4138e0db7e27935d3","sha256:9708117546315fb4b95fccccf4a0681295d4ceee98ecad565fdefd22b7872999"],"state_sha256":"e1bf1164b5e68bf25de98e2828261d204188f212721b588ce6a46c14c2002cca"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IZ5n77ZGKC5FlIQP5Li72r9bZBsl5b1JV6AOblYYhOyGywJqnxNyU8YAVzoye7TzO/PQeGSyzDNrhCsXECU+Ag==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T12:18:16.938648Z","bundle_sha256":"11d9db7c02f1c0eba4b9e469547e20ef46a9ff951fa1b00b03e892ac8ad98224"}}