{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:NWZ7CNMQ3I24IUNPKOLVV5E5RD","short_pith_number":"pith:NWZ7CNMQ","canonical_record":{"source":{"id":"1504.02490","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-04-09T20:21:32Z","cross_cats_sorted":[],"title_canon_sha256":"baf41595e8ece318f031f9fbb8f05257036eab0423ca15bf74768c7364f570b3","abstract_canon_sha256":"d6684d72e46e56c6828e88f79a6b8ad98020e9376213110f28d0e45c303f1920"},"schema_version":"1.0"},"canonical_sha256":"6db3f13590da35c451af53975af49d88deaed0839688c2b10bab42c78f3d849f","source":{"kind":"arxiv","id":"1504.02490","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1504.02490","created_at":"2026-05-18T02:19:07Z"},{"alias_kind":"arxiv_version","alias_value":"1504.02490v1","created_at":"2026-05-18T02:19:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1504.02490","created_at":"2026-05-18T02:19:07Z"},{"alias_kind":"pith_short_12","alias_value":"NWZ7CNMQ3I24","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_16","alias_value":"NWZ7CNMQ3I24IUNP","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_8","alias_value":"NWZ7CNMQ","created_at":"2026-05-18T12:29:34Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:NWZ7CNMQ3I24IUNPKOLVV5E5RD","target":"record","payload":{"canonical_record":{"source":{"id":"1504.02490","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-04-09T20:21:32Z","cross_cats_sorted":[],"title_canon_sha256":"baf41595e8ece318f031f9fbb8f05257036eab0423ca15bf74768c7364f570b3","abstract_canon_sha256":"d6684d72e46e56c6828e88f79a6b8ad98020e9376213110f28d0e45c303f1920"},"schema_version":"1.0"},"canonical_sha256":"6db3f13590da35c451af53975af49d88deaed0839688c2b10bab42c78f3d849f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:19:07.548379Z","signature_b64":"dM66GFTlt14UqJKyiCqh7HH1DLC3hQUxafJzmWo44TZ6XcaNuSNXQwQdiGp7fcRo4H3Uup3oe30hywNDHsXTDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6db3f13590da35c451af53975af49d88deaed0839688c2b10bab42c78f3d849f","last_reissued_at":"2026-05-18T02:19:07.547806Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:19:07.547806Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1504.02490","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:19:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Zcdpd4lokDSHFHFPxBNDK1JKldhOS4oOUjQD5+LlKK2/1eIeQRuPjD8rOqaV3WxBFYuw/N8PBTn5G6gGznZkAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T05:47:21.368017Z"},"content_sha256":"d2681dae9a2a489dadc24d83529d0011e69d508d228a4c6fde07843e5d2699ed","schema_version":"1.0","event_id":"sha256:d2681dae9a2a489dadc24d83529d0011e69d508d228a4c6fde07843e5d2699ed"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:NWZ7CNMQ3I24IUNPKOLVV5E5RD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Leveraging Twitter for Low-Resource Conversational Speech Language Modeling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Aaron Jaech, Mari Ostendorf","submitted_at":"2015-04-09T20:21:32Z","abstract_excerpt":"In applications involving conversational speech, data sparsity is a limiting factor in building a better language model. We propose a simple, language-independent method to quickly harvest large amounts of data from Twitter to supplement a smaller training set that is more closely matched to the domain. The techniques lead to a significant reduction in perplexity on four low-resource languages even though the presence on Twitter of these languages is relatively small. We also find that the Twitter text is more useful for learning word classes than the in-domain text and that use of these word "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1504.02490","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:19:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"m29BLzcdvPhPa35xO0LxDqv0H+nejppTtgdCPOJ3AKDoHWYE1JGl9gQhZ+Fd3EQvkwxs6iAKishArQBz5J9LBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T05:47:21.368698Z"},"content_sha256":"704ba5903d4ac4dae489739330f6f372bf7682000634bc62a45288385c282720","schema_version":"1.0","event_id":"sha256:704ba5903d4ac4dae489739330f6f372bf7682000634bc62a45288385c282720"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD/bundle.json","state_url":"https://pith.science/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-12T05:47:21Z","links":{"resolver":"https://pith.science/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD","bundle":"https://pith.science/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD/bundle.json","state":"https://pith.science/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NWZ7CNMQ3I24IUNPKOLVV5E5RD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:NWZ7CNMQ3I24IUNPKOLVV5E5RD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d6684d72e46e56c6828e88f79a6b8ad98020e9376213110f28d0e45c303f1920","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-04-09T20:21:32Z","title_canon_sha256":"baf41595e8ece318f031f9fbb8f05257036eab0423ca15bf74768c7364f570b3"},"schema_version":"1.0","source":{"id":"1504.02490","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1504.02490","created_at":"2026-05-18T02:19:07Z"},{"alias_kind":"arxiv_version","alias_value":"1504.02490v1","created_at":"2026-05-18T02:19:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1504.02490","created_at":"2026-05-18T02:19:07Z"},{"alias_kind":"pith_short_12","alias_value":"NWZ7CNMQ3I24","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_16","alias_value":"NWZ7CNMQ3I24IUNP","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_8","alias_value":"NWZ7CNMQ","created_at":"2026-05-18T12:29:34Z"}],"graph_snapshots":[{"event_id":"sha256:704ba5903d4ac4dae489739330f6f372bf7682000634bc62a45288385c282720","target":"graph","created_at":"2026-05-18T02:19:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In applications involving conversational speech, data sparsity is a limiting factor in building a better language model. We propose a simple, language-independent method to quickly harvest large amounts of data from Twitter to supplement a smaller training set that is more closely matched to the domain. The techniques lead to a significant reduction in perplexity on four low-resource languages even though the presence on Twitter of these languages is relatively small. We also find that the Twitter text is more useful for learning word classes than the in-domain text and that use of these word ","authors_text":"Aaron Jaech, Mari Ostendorf","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-04-09T20:21:32Z","title":"Leveraging Twitter for Low-Resource Conversational Speech Language Modeling"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1504.02490","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d2681dae9a2a489dadc24d83529d0011e69d508d228a4c6fde07843e5d2699ed","target":"record","created_at":"2026-05-18T02:19:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d6684d72e46e56c6828e88f79a6b8ad98020e9376213110f28d0e45c303f1920","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-04-09T20:21:32Z","title_canon_sha256":"baf41595e8ece318f031f9fbb8f05257036eab0423ca15bf74768c7364f570b3"},"schema_version":"1.0","source":{"id":"1504.02490","kind":"arxiv","version":1}},"canonical_sha256":"6db3f13590da35c451af53975af49d88deaed0839688c2b10bab42c78f3d849f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6db3f13590da35c451af53975af49d88deaed0839688c2b10bab42c78f3d849f","first_computed_at":"2026-05-18T02:19:07.547806Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:19:07.547806Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dM66GFTlt14UqJKyiCqh7HH1DLC3hQUxafJzmWo44TZ6XcaNuSNXQwQdiGp7fcRo4H3Uup3oe30hywNDHsXTDg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:19:07.548379Z","signed_message":"canonical_sha256_bytes"},"source_id":"1504.02490","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d2681dae9a2a489dadc24d83529d0011e69d508d228a4c6fde07843e5d2699ed","sha256:704ba5903d4ac4dae489739330f6f372bf7682000634bc62a45288385c282720"],"state_sha256":"9166dffe1ce29c9946921e2d390f02c4fd22739ebae5d8a4f571d95bc2920029"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bzRR/FAO3bYFVT5thfjqmctElM7L7Xj+sfsrus1VBN40zuDmInJtswEaFhC8mT54oX0nSpDEPoLxYnHVR3+bCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-12T05:47:21.371396Z","bundle_sha256":"69879849955a5dce1f3c291c02d12cb91278bb33ed504e8432cd5c23a0a3c9d8"}}