{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:LLWDBG7T3MGE5K4X62ENZYTTZ2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ee192fe6b20d045b9de3da854fee6dad64c6240bb1b11d7d076280f6be230622","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","title_canon_sha256":"0e5fe3178879ef1302395533212d7d5b3c6a6882e5176cea95a800ec07482119"},"schema_version":"1.0","source":{"id":"1603.06785","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.06785","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"arxiv_version","alias_value":"1603.06785v1","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.06785","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"pith_short_12","alias_value":"LLWDBG7T3MGE","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_16","alias_value":"LLWDBG7T3MGE5K4X","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_8","alias_value":"LLWDBG7T","created_at":"2026-05-18T12:30:29Z"}],"graph_snapshots":[{"event_id":"sha256:a533dec184ee335d9504c76f4b72c1d3c438385c4ce725c8eb01f3355d5ab0f1","target":"graph","created_at":"2026-05-18T01:18:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Parallel texts are a relatively rare language resource, however, they constitute a very useful research material with a wide range of applications. This study presents and analyses new methodologies we developed for obtaining such data from previously built comparable corpora. The methodologies are automatic and unsupervised which makes them good for large scale research. The task is highly practical as non-parallel multilingual data occur much more frequently than parallel corpora and accessing them is easy, although parallel sentences are a considerably more useful resource. In this study, w","authors_text":"Emilia Rejmund, Krzysztof Marasek, Krzysztof Wo{\\l}k","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","title":"Multi-domain machine translation enhancements by parallel data extraction from comparable corpora"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.06785","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:27583d9234561c8506a65051d0fd7d8c483a41a2b16d72b4d950a2cb5c4dd780","target":"record","created_at":"2026-05-18T01:18:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ee192fe6b20d045b9de3da854fee6dad64c6240bb1b11d7d076280f6be230622","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","title_canon_sha256":"0e5fe3178879ef1302395533212d7d5b3c6a6882e5176cea95a800ec07482119"},"schema_version":"1.0","source":{"id":"1603.06785","kind":"arxiv","version":1}},"canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","first_computed_at":"2026-05-18T01:18:34.724684Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:18:34.724684Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"700WaruuVPBHFERi/xEUX2LoWXf4vd6mtNIiAtkTL6DUDIMDKFEyEDutBlOXbN+7+zOF1luwuL04vOvTb7UaCA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:18:34.725103Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.06785","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:27583d9234561c8506a65051d0fd7d8c483a41a2b16d72b4d950a2cb5c4dd780","sha256:a533dec184ee335d9504c76f4b72c1d3c438385c4ce725c8eb01f3355d5ab0f1"],"state_sha256":"874986d35a5b6bb1a85be3517445807491a823e5d3a271a516112ff75380fbaa"}