{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:ODUR4DLJI655UDJLQUXPGTR2IR","short_pith_number":"pith:ODUR4DLJ","canonical_record":{"source":{"id":"1805.07731","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-20T08:40:06Z","cross_cats_sorted":[],"title_canon_sha256":"5dac958f1d48cd1cbafb07f19d125e952471177e45168d131565048c8315a098","abstract_canon_sha256":"6ee308952e43cb7ef39a1c606a6f1179f7ed38ba32da4b802cf0572dcfcdb9be"},"schema_version":"1.0"},"canonical_sha256":"70e91e0d6947bbda0d2b852ef34e3a44450338289fc12ce7f2b086ec68ae3cfb","source":{"kind":"arxiv","id":"1805.07731","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.07731","created_at":"2026-05-18T00:15:32Z"},{"alias_kind":"arxiv_version","alias_value":"1805.07731v1","created_at":"2026-05-18T00:15:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.07731","created_at":"2026-05-18T00:15:32Z"},{"alias_kind":"pith_short_12","alias_value":"ODUR4DLJI655","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_16","alias_value":"ODUR4DLJI655UDJL","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_8","alias_value":"ODUR4DLJ","created_at":"2026-05-18T12:32:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:ODUR4DLJI655UDJLQUXPGTR2IR","target":"record","payload":{"canonical_record":{"source":{"id":"1805.07731","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-20T08:40:06Z","cross_cats_sorted":[],"title_canon_sha256":"5dac958f1d48cd1cbafb07f19d125e952471177e45168d131565048c8315a098","abstract_canon_sha256":"6ee308952e43cb7ef39a1c606a6f1179f7ed38ba32da4b802cf0572dcfcdb9be"},"schema_version":"1.0"},"canonical_sha256":"70e91e0d6947bbda0d2b852ef34e3a44450338289fc12ce7f2b086ec68ae3cfb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:15:32.688445Z","signature_b64":"EfEoyLTszhuLbEOmurcpuaTYzAKXJQjX3hSbixEtADYSQN88I153ZF+xZ50Kop3uymt4m3vfq5g9bA3HPaPaAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"70e91e0d6947bbda0d2b852ef34e3a44450338289fc12ce7f2b086ec68ae3cfb","last_reissued_at":"2026-05-18T00:15:32.687782Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:15:32.687782Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.07731","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:15:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"x7uN2kmIZibQWYrFTqtmfo3MI2joWjivDWr5LJ08+O5iRGb1wFt0lxYudIqei7VgiYI0IkXcPXKj8+O4jAcFBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T18:51:49.659675Z"},"content_sha256":"7a75801c75fac6fbb937cb0d588ca9d111e7d8207a81341fc4a2779d0ee975db","schema_version":"1.0","event_id":"sha256:7a75801c75fac6fbb937cb0d588ca9d111e7d8207a81341fc4a2779d0ee975db"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:ODUR4DLJI655UDJLQUXPGTR2IR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Generating High-Quality Surface Realizations Using Data Augmentation and Factored Sequence Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chris Hokamp, Henry Elder","submitted_at":"2018-05-20T08:40:06Z","abstract_excerpt":"This work presents a new state of the art in reconstruction of surface realizations from obfuscated text. We identify the lack of sufficient training data as the major obstacle to training high-performing models, and solve this issue by generating large amounts of synthetic training data. We also propose preprocessing techniques which make the structure contained in the input features more accessible to sequence models. Our models were ranked first on all evaluation metrics in the English portion of the 2018 Surface Realization shared task."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.07731","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:15:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"V39d2mAeq3BnfZoIBG7Z73hd9Lrfiz5BwTzZ6WQg+hZqNZNQOsE6rEbSk58Wae3O4AWMx7z8xmdHqMgwMQc0CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T18:51:49.660427Z"},"content_sha256":"1cbb4a8db1c4717d9961e2e416b5f4eb4013fb2b3e1bf59ff72bf22e56212b0a","schema_version":"1.0","event_id":"sha256:1cbb4a8db1c4717d9961e2e416b5f4eb4013fb2b3e1bf59ff72bf22e56212b0a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ODUR4DLJI655UDJLQUXPGTR2IR/bundle.json","state_url":"https://pith.science/pith/ODUR4DLJI655UDJLQUXPGTR2IR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ODUR4DLJI655UDJLQUXPGTR2IR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T18:51:49Z","links":{"resolver":"https://pith.science/pith/ODUR4DLJI655UDJLQUXPGTR2IR","bundle":"https://pith.science/pith/ODUR4DLJI655UDJLQUXPGTR2IR/bundle.json","state":"https://pith.science/pith/ODUR4DLJI655UDJLQUXPGTR2IR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ODUR4DLJI655UDJLQUXPGTR2IR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:ODUR4DLJI655UDJLQUXPGTR2IR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6ee308952e43cb7ef39a1c606a6f1179f7ed38ba32da4b802cf0572dcfcdb9be","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-20T08:40:06Z","title_canon_sha256":"5dac958f1d48cd1cbafb07f19d125e952471177e45168d131565048c8315a098"},"schema_version":"1.0","source":{"id":"1805.07731","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.07731","created_at":"2026-05-18T00:15:32Z"},{"alias_kind":"arxiv_version","alias_value":"1805.07731v1","created_at":"2026-05-18T00:15:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.07731","created_at":"2026-05-18T00:15:32Z"},{"alias_kind":"pith_short_12","alias_value":"ODUR4DLJI655","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_16","alias_value":"ODUR4DLJI655UDJL","created_at":"2026-05-18T12:32:43Z"},{"alias_kind":"pith_short_8","alias_value":"ODUR4DLJ","created_at":"2026-05-18T12:32:43Z"}],"graph_snapshots":[{"event_id":"sha256:1cbb4a8db1c4717d9961e2e416b5f4eb4013fb2b3e1bf59ff72bf22e56212b0a","target":"graph","created_at":"2026-05-18T00:15:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This work presents a new state of the art in reconstruction of surface realizations from obfuscated text. We identify the lack of sufficient training data as the major obstacle to training high-performing models, and solve this issue by generating large amounts of synthetic training data. We also propose preprocessing techniques which make the structure contained in the input features more accessible to sequence models. Our models were ranked first on all evaluation metrics in the English portion of the 2018 Surface Realization shared task.","authors_text":"Chris Hokamp, Henry Elder","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-20T08:40:06Z","title":"Generating High-Quality Surface Realizations Using Data Augmentation and Factored Sequence Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.07731","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7a75801c75fac6fbb937cb0d588ca9d111e7d8207a81341fc4a2779d0ee975db","target":"record","created_at":"2026-05-18T00:15:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6ee308952e43cb7ef39a1c606a6f1179f7ed38ba32da4b802cf0572dcfcdb9be","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-05-20T08:40:06Z","title_canon_sha256":"5dac958f1d48cd1cbafb07f19d125e952471177e45168d131565048c8315a098"},"schema_version":"1.0","source":{"id":"1805.07731","kind":"arxiv","version":1}},"canonical_sha256":"70e91e0d6947bbda0d2b852ef34e3a44450338289fc12ce7f2b086ec68ae3cfb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"70e91e0d6947bbda0d2b852ef34e3a44450338289fc12ce7f2b086ec68ae3cfb","first_computed_at":"2026-05-18T00:15:32.687782Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:15:32.687782Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EfEoyLTszhuLbEOmurcpuaTYzAKXJQjX3hSbixEtADYSQN88I153ZF+xZ50Kop3uymt4m3vfq5g9bA3HPaPaAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:15:32.688445Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.07731","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7a75801c75fac6fbb937cb0d588ca9d111e7d8207a81341fc4a2779d0ee975db","sha256:1cbb4a8db1c4717d9961e2e416b5f4eb4013fb2b3e1bf59ff72bf22e56212b0a"],"state_sha256":"9a1b7370f61c2b58804754bc292a1e3db34f505f6b02a8a4064058054bbcf6b6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CLk7UtgThQeKs5nLwMhsNzv1HsdQacf+LHqb9tLLJSQ9IiCZhu/QHx9Xjjybnv8MVtimEQD0n3r7fTsBKtKRCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T18:51:49.664969Z","bundle_sha256":"19f42e8f730c0346b05a5350ba28207bc4206d788ce01cbf37bc2c08b2d85ac1"}}