{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:7QTI55WIQBKGREEQRBXDHKCBQ7","short_pith_number":"pith:7QTI55WI","canonical_record":{"source":{"id":"1805.07231","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-18T14:17:07Z","cross_cats_sorted":[],"title_canon_sha256":"870a0177e55b38d502858dc717dd5c432f9e307e1b6ae6dcde507c9f3d5df91c","abstract_canon_sha256":"3ebdcda0f09e6e8850310c2b0ceceff9f813b5752d1f2db8f593aeb1b331555b"},"schema_version":"1.0"},"canonical_sha256":"fc268ef6c88054689090886e33a84187e84a68cd1259effba15b5a0af36340c2","source":{"kind":"arxiv","id":"1805.07231","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.07231","created_at":"2026-05-18T00:10:10Z"},{"alias_kind":"arxiv_version","alias_value":"1805.07231v2","created_at":"2026-05-18T00:10:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.07231","created_at":"2026-05-18T00:10:10Z"},{"alias_kind":"pith_short_12","alias_value":"7QTI55WIQBKG","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"7QTI55WIQBKGREEQ","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"7QTI55WI","created_at":"2026-05-18T12:32:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:7QTI55WIQBKGREEQRBXDHKCBQ7","target":"record","payload":{"canonical_record":{"source":{"id":"1805.07231","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-18T14:17:07Z","cross_cats_sorted":[],"title_canon_sha256":"870a0177e55b38d502858dc717dd5c432f9e307e1b6ae6dcde507c9f3d5df91c","abstract_canon_sha256":"3ebdcda0f09e6e8850310c2b0ceceff9f813b5752d1f2db8f593aeb1b331555b"},"schema_version":"1.0"},"canonical_sha256":"fc268ef6c88054689090886e33a84187e84a68cd1259effba15b5a0af36340c2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:10:10.184918Z","signature_b64":"+HB31HSu/9DbYRM6f7VFb9R2htMN82RHTywKt8qDP6q65RFyAmji3g9gRM5C/w1XqMeDrd/ISDS3fCRsXzO2AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fc268ef6c88054689090886e33a84187e84a68cd1259effba15b5a0af36340c2","last_reissued_at":"2026-05-18T00:10:10.184289Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:10:10.184289Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.07231","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:10:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zTyORnA4bVsOjLB79xBAsqNCpw6i/C8O8C5hLE7IescwdYWh9a2by7Yuem6+f416u3qsbBSCfANHqqgVEFEKCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T21:41:13.396382Z"},"content_sha256":"7b5ab33c517c3c32b0ba708634d38a51396a9053519dac7e0351b79f3acbaca3","schema_version":"1.0","event_id":"sha256:7b5ab33c517c3c32b0ba708634d38a51396a9053519dac7e0351b79f3acbaca3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:7QTI55WIQBKGREEQRBXDHKCBQ7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Study on Dialog Act Recognition using Character-Level Tokenization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"David Martins de Matos, Eug\\'enio Ribeiro, Ricardo Ribeiro","submitted_at":"2018-05-18T14:17:07Z","abstract_excerpt":"Dialog act recognition is an important step for dialog systems since it reveals the intention behind the uttered words. Most approaches on the task use word-level tokenization. In contrast, this paper explores the use of character-level tokenization. This is relevant since there is information at the sub-word level that is related to the function of the words and, thus, their intention. We also explore the use of different context windows around each token, which are able to capture important elements, such as affixes. Furthermore, we assess the importance of punctuation and capitalization. We"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.07231","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:10:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"y/stV2fS7dEIN4HM5Gi/d0VKRHb25WLzeduz2qNMDygVOkWeSZY4AECt8VVcGD0VT3djJrYcZbw+ltt4n3DpAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T21:41:13.397053Z"},"content_sha256":"9642ba09fdcc6c11bceefa2fbfa3aad976b4cefaa50d0b2ab13f6a93da8cb115","schema_version":"1.0","event_id":"sha256:9642ba09fdcc6c11bceefa2fbfa3aad976b4cefaa50d0b2ab13f6a93da8cb115"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7QTI55WIQBKGREEQRBXDHKCBQ7/bundle.json","state_url":"https://pith.science/pith/7QTI55WIQBKGREEQRBXDHKCBQ7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7QTI55WIQBKGREEQRBXDHKCBQ7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T21:41:13Z","links":{"resolver":"https://pith.science/pith/7QTI55WIQBKGREEQRBXDHKCBQ7","bundle":"https://pith.science/pith/7QTI55WIQBKGREEQRBXDHKCBQ7/bundle.json","state":"https://pith.science/pith/7QTI55WIQBKGREEQRBXDHKCBQ7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7QTI55WIQBKGREEQRBXDHKCBQ7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:7QTI55WIQBKGREEQRBXDHKCBQ7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3ebdcda0f09e6e8850310c2b0ceceff9f813b5752d1f2db8f593aeb1b331555b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-18T14:17:07Z","title_canon_sha256":"870a0177e55b38d502858dc717dd5c432f9e307e1b6ae6dcde507c9f3d5df91c"},"schema_version":"1.0","source":{"id":"1805.07231","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.07231","created_at":"2026-05-18T00:10:10Z"},{"alias_kind":"arxiv_version","alias_value":"1805.07231v2","created_at":"2026-05-18T00:10:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.07231","created_at":"2026-05-18T00:10:10Z"},{"alias_kind":"pith_short_12","alias_value":"7QTI55WIQBKG","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_16","alias_value":"7QTI55WIQBKGREEQ","created_at":"2026-05-18T12:32:11Z"},{"alias_kind":"pith_short_8","alias_value":"7QTI55WI","created_at":"2026-05-18T12:32:11Z"}],"graph_snapshots":[{"event_id":"sha256:9642ba09fdcc6c11bceefa2fbfa3aad976b4cefaa50d0b2ab13f6a93da8cb115","target":"graph","created_at":"2026-05-18T00:10:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Dialog act recognition is an important step for dialog systems since it reveals the intention behind the uttered words. Most approaches on the task use word-level tokenization. In contrast, this paper explores the use of character-level tokenization. This is relevant since there is information at the sub-word level that is related to the function of the words and, thus, their intention. We also explore the use of different context windows around each token, which are able to capture important elements, such as affixes. Furthermore, we assess the importance of punctuation and capitalization. We","authors_text":"David Martins de Matos, Eug\\'enio Ribeiro, Ricardo Ribeiro","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-18T14:17:07Z","title":"A Study on Dialog Act Recognition using Character-Level Tokenization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.07231","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7b5ab33c517c3c32b0ba708634d38a51396a9053519dac7e0351b79f3acbaca3","target":"record","created_at":"2026-05-18T00:10:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3ebdcda0f09e6e8850310c2b0ceceff9f813b5752d1f2db8f593aeb1b331555b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-18T14:17:07Z","title_canon_sha256":"870a0177e55b38d502858dc717dd5c432f9e307e1b6ae6dcde507c9f3d5df91c"},"schema_version":"1.0","source":{"id":"1805.07231","kind":"arxiv","version":2}},"canonical_sha256":"fc268ef6c88054689090886e33a84187e84a68cd1259effba15b5a0af36340c2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fc268ef6c88054689090886e33a84187e84a68cd1259effba15b5a0af36340c2","first_computed_at":"2026-05-18T00:10:10.184289Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:10:10.184289Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+HB31HSu/9DbYRM6f7VFb9R2htMN82RHTywKt8qDP6q65RFyAmji3g9gRM5C/w1XqMeDrd/ISDS3fCRsXzO2AQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:10:10.184918Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.07231","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7b5ab33c517c3c32b0ba708634d38a51396a9053519dac7e0351b79f3acbaca3","sha256:9642ba09fdcc6c11bceefa2fbfa3aad976b4cefaa50d0b2ab13f6a93da8cb115"],"state_sha256":"a56baabecadfd014f5209b35958f4cf73511a2928f5436123e77831a2121c33a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a/dvf439/OY1t0o4m6HkeYHTqeQfoR32Uk9GPOXgD7Dw8pqdoH6omhMrtR13f9dFqjzWE7BBZPZEvB6dA+L9BQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T21:41:13.401398Z","bundle_sha256":"1397b1fecea4eddb3fadbc7ce13df0197c9d6679eafb7508c0ff0c1ec7df9035"}}