{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:2OQZ7IR74FUXYM3FZJUFIK5FVY","short_pith_number":"pith:2OQZ7IR7","canonical_record":{"source":{"id":"1509.09093","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-09-30T09:29:51Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"e597d6cf4dc773aa9f8e8d769098190673b749c12754cba8ddcb7a9084397f7a","abstract_canon_sha256":"6281e207d00f239e36b721deef75dae53527369a690500dc014d802ec9bad15d"},"schema_version":"1.0"},"canonical_sha256":"d3a19fa23fe1697c3365ca68542ba5ae36ef0f28e681db0b6825d9f93368c9d3","source":{"kind":"arxiv","id":"1509.09093","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1509.09093","created_at":"2026-05-18T01:31:29Z"},{"alias_kind":"arxiv_version","alias_value":"1509.09093v1","created_at":"2026-05-18T01:31:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1509.09093","created_at":"2026-05-18T01:31:29Z"},{"alias_kind":"pith_short_12","alias_value":"2OQZ7IR74FUX","created_at":"2026-05-18T12:29:02Z"},{"alias_kind":"pith_short_16","alias_value":"2OQZ7IR74FUXYM3F","created_at":"2026-05-18T12:29:02Z"},{"alias_kind":"pith_short_8","alias_value":"2OQZ7IR7","created_at":"2026-05-18T12:29:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:2OQZ7IR74FUXYM3FZJUFIK5FVY","target":"record","payload":{"canonical_record":{"source":{"id":"1509.09093","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-09-30T09:29:51Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"e597d6cf4dc773aa9f8e8d769098190673b749c12754cba8ddcb7a9084397f7a","abstract_canon_sha256":"6281e207d00f239e36b721deef75dae53527369a690500dc014d802ec9bad15d"},"schema_version":"1.0"},"canonical_sha256":"d3a19fa23fe1697c3365ca68542ba5ae36ef0f28e681db0b6825d9f93368c9d3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:31:29.065384Z","signature_b64":"BfLgnuFQUbtY5ZATM4HFJVVwf1r4GiWwdUkqX/FVv+7KmAmMO3EFrzREBB9amAIYV+xNVG2c4BU98yq46Z7uBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d3a19fa23fe1697c3365ca68542ba5ae36ef0f28e681db0b6825d9f93368c9d3","last_reissued_at":"2026-05-18T01:31:29.064521Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:31:29.064521Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1509.09093","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:31:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PtdeWo+xO0b6UcciAgSejQQdHNL5dc3RbYW7ND3HrFY7x5mvARCgysW/cEW/C1Kdd7LnJ7nbM+1IEpOVz0DbBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:48:26.514473Z"},"content_sha256":"25495d383460be626277187061f7b256471503bb4ce4d011a686a3df9e2d2dcd","schema_version":"1.0","event_id":"sha256:25495d383460be626277187061f7b256471503bb4ce4d011a686a3df9e2d2dcd"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:2OQZ7IR74FUXYM3FZJUFIK5FVY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Sentence Meaning Based Alignment Method for Parallel Text Corpora Preparation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CL","authors_text":"Krzysztof Marasek, Krzysztof Wo{\\l}k","submitted_at":"2015-09-30T09:29:51Z","abstract_excerpt":"Text alignment is crucial to the accuracy of Machine Translation (MT) systems, some NLP tools or any other text processing tasks requiring bilingual data. This research proposes a language independent sentence alignment approach based on Polish (not position-sensitive language) to English experiments. This alignment approach was developed on the TED Talks corpus, but can be used for any text domain or language pair. The proposed approach implements various heuristics for sentence recognition. Some of them value synonyms and semantic text structure analysis as a part of additional information. "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1509.09093","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:31:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3F0Pzl/OIA0EJ/tx/Jydwnx+DVphQfWWTXP6tv1GqoqDHrtJGeD9B/y+ZTxsocM9ZZck4pw7GoPSSrjpe7TTDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:48:26.514951Z"},"content_sha256":"48aaeefe15633bdf88920b94b093241477c841d62827a5ead39e1e56ccf6a0c5","schema_version":"1.0","event_id":"sha256:48aaeefe15633bdf88920b94b093241477c841d62827a5ead39e1e56ccf6a0c5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY/bundle.json","state_url":"https://pith.science/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:48:26Z","links":{"resolver":"https://pith.science/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY","bundle":"https://pith.science/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY/bundle.json","state":"https://pith.science/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/2OQZ7IR74FUXYM3FZJUFIK5FVY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:2OQZ7IR74FUXYM3FZJUFIK5FVY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6281e207d00f239e36b721deef75dae53527369a690500dc014d802ec9bad15d","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-09-30T09:29:51Z","title_canon_sha256":"e597d6cf4dc773aa9f8e8d769098190673b749c12754cba8ddcb7a9084397f7a"},"schema_version":"1.0","source":{"id":"1509.09093","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1509.09093","created_at":"2026-05-18T01:31:29Z"},{"alias_kind":"arxiv_version","alias_value":"1509.09093v1","created_at":"2026-05-18T01:31:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1509.09093","created_at":"2026-05-18T01:31:29Z"},{"alias_kind":"pith_short_12","alias_value":"2OQZ7IR74FUX","created_at":"2026-05-18T12:29:02Z"},{"alias_kind":"pith_short_16","alias_value":"2OQZ7IR74FUXYM3F","created_at":"2026-05-18T12:29:02Z"},{"alias_kind":"pith_short_8","alias_value":"2OQZ7IR7","created_at":"2026-05-18T12:29:02Z"}],"graph_snapshots":[{"event_id":"sha256:48aaeefe15633bdf88920b94b093241477c841d62827a5ead39e1e56ccf6a0c5","target":"graph","created_at":"2026-05-18T01:31:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Text alignment is crucial to the accuracy of Machine Translation (MT) systems, some NLP tools or any other text processing tasks requiring bilingual data. This research proposes a language independent sentence alignment approach based on Polish (not position-sensitive language) to English experiments. This alignment approach was developed on the TED Talks corpus, but can be used for any text domain or language pair. The proposed approach implements various heuristics for sentence recognition. Some of them value synonyms and semantic text structure analysis as a part of additional information. ","authors_text":"Krzysztof Marasek, Krzysztof Wo{\\l}k","cross_cats":["cs.IR"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-09-30T09:29:51Z","title":"A Sentence Meaning Based Alignment Method for Parallel Text Corpora Preparation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1509.09093","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:25495d383460be626277187061f7b256471503bb4ce4d011a686a3df9e2d2dcd","target":"record","created_at":"2026-05-18T01:31:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6281e207d00f239e36b721deef75dae53527369a690500dc014d802ec9bad15d","cross_cats_sorted":["cs.IR"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-09-30T09:29:51Z","title_canon_sha256":"e597d6cf4dc773aa9f8e8d769098190673b749c12754cba8ddcb7a9084397f7a"},"schema_version":"1.0","source":{"id":"1509.09093","kind":"arxiv","version":1}},"canonical_sha256":"d3a19fa23fe1697c3365ca68542ba5ae36ef0f28e681db0b6825d9f93368c9d3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d3a19fa23fe1697c3365ca68542ba5ae36ef0f28e681db0b6825d9f93368c9d3","first_computed_at":"2026-05-18T01:31:29.064521Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:31:29.064521Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"BfLgnuFQUbtY5ZATM4HFJVVwf1r4GiWwdUkqX/FVv+7KmAmMO3EFrzREBB9amAIYV+xNVG2c4BU98yq46Z7uBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T01:31:29.065384Z","signed_message":"canonical_sha256_bytes"},"source_id":"1509.09093","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:25495d383460be626277187061f7b256471503bb4ce4d011a686a3df9e2d2dcd","sha256:48aaeefe15633bdf88920b94b093241477c841d62827a5ead39e1e56ccf6a0c5"],"state_sha256":"0cec2496485df6309b7e8d3c6fb9c8867bef0d3aad5b8007a98342d185c0dc3d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mt8anxwiuwmhgT+pxPZjDfzFgXHSaIJ670z+fHRiUAjUqxTyDg9FdMnd/IsC22S2igqDz8TqW2kg6tE67gErBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:48:26.519129Z","bundle_sha256":"419d754a03c032ecf864a0cca433d352a6321a7bb8a52a2486517596cad6ee46"}}