{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:ATCQSAPZ4ANKYFFHSI55W5TKUE","short_pith_number":"pith:ATCQSAPZ","canonical_record":{"source":{"id":"1701.01908","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-08T03:13:37Z","cross_cats_sorted":[],"title_canon_sha256":"a6777f18e4a9329a6384a5e69de6075052fd03c48695eb6b976f6a23b569b71b","abstract_canon_sha256":"87856e7e3a4996c780d80db3ef57764f43a287f0c39d617fde6947314dd60987"},"schema_version":"1.0"},"canonical_sha256":"04c50901f9e01aac14a7923bdb766aa109334a8d1984883e1e29de21a991546e","source":{"kind":"arxiv","id":"1701.01908","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.01908","created_at":"2026-05-18T00:53:13Z"},{"alias_kind":"arxiv_version","alias_value":"1701.01908v1","created_at":"2026-05-18T00:53:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.01908","created_at":"2026-05-18T00:53:13Z"},{"alias_kind":"pith_short_12","alias_value":"ATCQSAPZ4ANK","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"ATCQSAPZ4ANKYFFH","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"ATCQSAPZ","created_at":"2026-05-18T12:31:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:ATCQSAPZ4ANKYFFHSI55W5TKUE","target":"record","payload":{"canonical_record":{"source":{"id":"1701.01908","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-08T03:13:37Z","cross_cats_sorted":[],"title_canon_sha256":"a6777f18e4a9329a6384a5e69de6075052fd03c48695eb6b976f6a23b569b71b","abstract_canon_sha256":"87856e7e3a4996c780d80db3ef57764f43a287f0c39d617fde6947314dd60987"},"schema_version":"1.0"},"canonical_sha256":"04c50901f9e01aac14a7923bdb766aa109334a8d1984883e1e29de21a991546e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:53:13.129151Z","signature_b64":"qjCo4KJLySA1ARGDb7CTe+BpmqeDqPXKU6eFramra+cdKRA6YPcvCplXVrXHMnUf/eHUAjtDhsaoLcI8X3J+DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"04c50901f9e01aac14a7923bdb766aa109334a8d1984883e1e29de21a991546e","last_reissued_at":"2026-05-18T00:53:13.128663Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:53:13.128663Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1701.01908","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:53:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OOuE9VOgyCyGa9XCBGO/lugiq+BtEQ1sx6vNRjoNQc0ZQGIXHlfpEkaWbTZ5/Pb3+ZDiSdmPJcKJfp5xvkKpBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T13:48:17.742392Z"},"content_sha256":"01e11844e0e278184088858becfe01eedc71fd2d0ad67b8ac3c30646aa13c758","schema_version":"1.0","event_id":"sha256:01e11844e0e278184088858becfe01eedc71fd2d0ad67b8ac3c30646aa13c758"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:ATCQSAPZ4ANKYFFHSI55W5TKUE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Sentence-level dialects identification in the greater China region","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Fan Xu, Maoxi Li, Mingwen Wang","submitted_at":"2017-01-08T03:13:37Z","abstract_excerpt":"Identifying the different varieties of the same language is more challenging than unrelated languages identification. In this paper, we propose an approach to discriminate language varieties or dialects of Mandarin Chinese for the Mainland China, Hong Kong, Taiwan, Macao, Malaysia and Singapore, a.k.a., the Greater China Region (GCR). When applied to the dialects identification of the GCR, we find that the commonly used character-level or word-level uni-gram feature is not very efficient since there exist several specific problems such as the ambiguity and context-dependent characteristic of w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.01908","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:53:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VoG5mHQRBepLYh17iFXtVZ9OmxcXj0mmS7EZbN244bqB7C0qzoxXRpFJ7F3ci9fDw47+GkaLt1feqGobIZA7DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T13:48:17.742742Z"},"content_sha256":"fa06f83f8dc0f365034c24522cadd5cf8e9fba5a1288720312309e15193d3a21","schema_version":"1.0","event_id":"sha256:fa06f83f8dc0f365034c24522cadd5cf8e9fba5a1288720312309e15193d3a21"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE/bundle.json","state_url":"https://pith.science/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T13:48:17Z","links":{"resolver":"https://pith.science/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE","bundle":"https://pith.science/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE/bundle.json","state":"https://pith.science/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ATCQSAPZ4ANKYFFHSI55W5TKUE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:ATCQSAPZ4ANKYFFHSI55W5TKUE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"87856e7e3a4996c780d80db3ef57764f43a287f0c39d617fde6947314dd60987","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-08T03:13:37Z","title_canon_sha256":"a6777f18e4a9329a6384a5e69de6075052fd03c48695eb6b976f6a23b569b71b"},"schema_version":"1.0","source":{"id":"1701.01908","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1701.01908","created_at":"2026-05-18T00:53:13Z"},{"alias_kind":"arxiv_version","alias_value":"1701.01908v1","created_at":"2026-05-18T00:53:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1701.01908","created_at":"2026-05-18T00:53:13Z"},{"alias_kind":"pith_short_12","alias_value":"ATCQSAPZ4ANK","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"ATCQSAPZ4ANKYFFH","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"ATCQSAPZ","created_at":"2026-05-18T12:31:08Z"}],"graph_snapshots":[{"event_id":"sha256:fa06f83f8dc0f365034c24522cadd5cf8e9fba5a1288720312309e15193d3a21","target":"graph","created_at":"2026-05-18T00:53:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Identifying the different varieties of the same language is more challenging than unrelated languages identification. In this paper, we propose an approach to discriminate language varieties or dialects of Mandarin Chinese for the Mainland China, Hong Kong, Taiwan, Macao, Malaysia and Singapore, a.k.a., the Greater China Region (GCR). When applied to the dialects identification of the GCR, we find that the commonly used character-level or word-level uni-gram feature is not very efficient since there exist several specific problems such as the ambiguity and context-dependent characteristic of w","authors_text":"Fan Xu, Maoxi Li, Mingwen Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-08T03:13:37Z","title":"Sentence-level dialects identification in the greater China region"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1701.01908","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:01e11844e0e278184088858becfe01eedc71fd2d0ad67b8ac3c30646aa13c758","target":"record","created_at":"2026-05-18T00:53:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"87856e7e3a4996c780d80db3ef57764f43a287f0c39d617fde6947314dd60987","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-01-08T03:13:37Z","title_canon_sha256":"a6777f18e4a9329a6384a5e69de6075052fd03c48695eb6b976f6a23b569b71b"},"schema_version":"1.0","source":{"id":"1701.01908","kind":"arxiv","version":1}},"canonical_sha256":"04c50901f9e01aac14a7923bdb766aa109334a8d1984883e1e29de21a991546e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"04c50901f9e01aac14a7923bdb766aa109334a8d1984883e1e29de21a991546e","first_computed_at":"2026-05-18T00:53:13.128663Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:53:13.128663Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qjCo4KJLySA1ARGDb7CTe+BpmqeDqPXKU6eFramra+cdKRA6YPcvCplXVrXHMnUf/eHUAjtDhsaoLcI8X3J+DA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:53:13.129151Z","signed_message":"canonical_sha256_bytes"},"source_id":"1701.01908","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:01e11844e0e278184088858becfe01eedc71fd2d0ad67b8ac3c30646aa13c758","sha256:fa06f83f8dc0f365034c24522cadd5cf8e9fba5a1288720312309e15193d3a21"],"state_sha256":"1cc0f9f260e879a06e47110dc52d3868c58d8ae2b4d44a6696632b53929e9d1c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Orucq4uAwR0GM+b5K38HN6gSqHSpA4L11bRmxpkmr+QEZkSdR/cdDdjmw+miBmp4/pNcs9LjwlIDdXOxC9FZDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T13:48:17.744738Z","bundle_sha256":"f290a2ed970abf819d244d6bb9a66ba937ead1bcab0824359c69edc207faedaf"}}