{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:LLWDBG7T3MGE5K4X62ENZYTTZ2","short_pith_number":"pith:LLWDBG7T","canonical_record":{"source":{"id":"1603.06785","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"0e5fe3178879ef1302395533212d7d5b3c6a6882e5176cea95a800ec07482119","abstract_canon_sha256":"ee192fe6b20d045b9de3da854fee6dad64c6240bb1b11d7d076280f6be230622"},"schema_version":"1.0"},"canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","source":{"kind":"arxiv","id":"1603.06785","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.06785","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"arxiv_version","alias_value":"1603.06785v1","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.06785","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"pith_short_12","alias_value":"LLWDBG7T3MGE","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_16","alias_value":"LLWDBG7T3MGE5K4X","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_8","alias_value":"LLWDBG7T","created_at":"2026-05-18T12:30:29Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:LLWDBG7T3MGE5K4X62ENZYTTZ2","target":"record","payload":{"canonical_record":{"source":{"id":"1603.06785","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"0e5fe3178879ef1302395533212d7d5b3c6a6882e5176cea95a800ec07482119","abstract_canon_sha256":"ee192fe6b20d045b9de3da854fee6dad64c6240bb1b11d7d076280f6be230622"},"schema_version":"1.0"},"canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:18:34.725103Z","signature_b64":"700WaruuVPBHFERi/xEUX2LoWXf4vd6mtNIiAtkTL6DUDIMDKFEyEDutBlOXbN+7+zOF1luwuL04vOvTb7UaCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","last_reissued_at":"2026-05-18T01:18:34.724684Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:18:34.724684Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1603.06785","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:18:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fLjvL5N6/K0TjFyzy6MjEh+kWtyiRYP1q91bMEE+oQC0H5xx5LeCKYSuPYhnG9VTxOP1GJFpTvr5R389Z65QCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:35:56.085357Z"},"content_sha256":"27583d9234561c8506a65051d0fd7d8c483a41a2b16d72b4d950a2cb5c4dd780","schema_version":"1.0","event_id":"sha256:27583d9234561c8506a65051d0fd7d8c483a41a2b16d72b4d950a2cb5c4dd780"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:LLWDBG7T3MGE5K4X62ENZYTTZ2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-domain machine translation enhancements by parallel data extraction from comparable corpora","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.CL","authors_text":"Emilia Rejmund, Krzysztof Marasek, Krzysztof Wo{\\l}k","submitted_at":"2016-03-22T13:34:28Z","abstract_excerpt":"Parallel texts are a relatively rare language resource, however, they constitute a very useful research material with a wide range of applications. This study presents and analyses new methodologies we developed for obtaining such data from previously built comparable corpora. The methodologies are automatic and unsupervised which makes them good for large scale research. The task is highly practical as non-parallel multilingual data occur much more frequently than parallel corpora and accessing them is easy, although parallel sentences are a considerably more useful resource. In this study, w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.06785","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:18:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bFwnTMeljMDeRu5zjJmYX7YhK/l2YCikrF5Z/3TZj/wYrRtAp7Fq74MpJ8SwFhQdueJJE6DKpTes5zbyONGZBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:35:56.086042Z"},"content_sha256":"a533dec184ee335d9504c76f4b72c1d3c438385c4ce725c8eb01f3355d5ab0f1","schema_version":"1.0","event_id":"sha256:a533dec184ee335d9504c76f4b72c1d3c438385c4ce725c8eb01f3355d5ab0f1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2/bundle.json","state_url":"https://pith.science/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:35:56Z","links":{"resolver":"https://pith.science/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2","bundle":"https://pith.science/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2/bundle.json","state":"https://pith.science/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LLWDBG7T3MGE5K4X62ENZYTTZ2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:LLWDBG7T3MGE5K4X62ENZYTTZ2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ee192fe6b20d045b9de3da854fee6dad64c6240bb1b11d7d076280f6be230622","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","title_canon_sha256":"0e5fe3178879ef1302395533212d7d5b3c6a6882e5176cea95a800ec07482119"},"schema_version":"1.0","source":{"id":"1603.06785","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1603.06785","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"arxiv_version","alias_value":"1603.06785v1","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.06785","created_at":"2026-05-18T01:18:34Z"},{"alias_kind":"pith_short_12","alias_value":"LLWDBG7T3MGE","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_16","alias_value":"LLWDBG7T3MGE5K4X","created_at":"2026-05-18T12:30:29Z"},{"alias_kind":"pith_short_8","alias_value":"LLWDBG7T","created_at":"2026-05-18T12:30:29Z"}],"graph_snapshots":[{"event_id":"sha256:a533dec184ee335d9504c76f4b72c1d3c438385c4ce725c8eb01f3355d5ab0f1","target":"graph","created_at":"2026-05-18T01:18:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Parallel texts are a relatively rare language resource, however, they constitute a very useful research material with a wide range of applications. This study presents and analyses new methodologies we developed for obtaining such data from previously built comparable corpora. The methodologies are automatic and unsupervised which makes them good for large scale research. The task is highly practical as non-parallel multilingual data occur much more frequently than parallel corpora and accessing them is easy, although parallel sentences are a considerably more useful resource. In this study, w","authors_text":"Emilia Rejmund, Krzysztof Marasek, Krzysztof Wo{\\l}k","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","title":"Multi-domain machine translation enhancements by parallel data extraction from comparable corpora"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.06785","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:27583d9234561c8506a65051d0fd7d8c483a41a2b16d72b4d950a2cb5c4dd780","target":"record","created_at":"2026-05-18T01:18:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ee192fe6b20d045b9de3da854fee6dad64c6240bb1b11d7d076280f6be230622","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2016-03-22T13:34:28Z","title_canon_sha256":"0e5fe3178879ef1302395533212d7d5b3c6a6882e5176cea95a800ec07482119"},"schema_version":"1.0","source":{"id":"1603.06785","kind":"arxiv","version":1}},"canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5aec309bf3db0c4eab97f688dce273ce8a94381e404acb7574b2216f39f115c0","first_computed_at":"2026-05-18T01:18:34.724684Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:18:34.724684Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"700WaruuVPBHFERi/xEUX2LoWXf4vd6mtNIiAtkTL6DUDIMDKFEyEDutBlOXbN+7+zOF1luwuL04vOvTb7UaCA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:18:34.725103Z","signed_message":"canonical_sha256_bytes"},"source_id":"1603.06785","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:27583d9234561c8506a65051d0fd7d8c483a41a2b16d72b4d950a2cb5c4dd780","sha256:a533dec184ee335d9504c76f4b72c1d3c438385c4ce725c8eb01f3355d5ab0f1"],"state_sha256":"874986d35a5b6bb1a85be3517445807491a823e5d3a271a516112ff75380fbaa"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"M4XJNF9aoccp+95zT2n5wU2aImbrTO2xGK7WO+CcLqkcJZhwfnfvxyk9W5CKkSQyW25qSqJlJ1/lrSj0cxf8CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:35:56.089727Z","bundle_sha256":"acd9533115cdb6ccdc7921bd687e9a45a6925d287ded2a9d3bcbf04d19617fac"}}