{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:WQT7XCIIOK3KOVUNTQ4NMSE2L2","short_pith_number":"pith:WQT7XCII","schema_version":"1.0","canonical_sha256":"b427fb890872b6a7568d9c38d6489a5e8ae0e36ab0d6e23e3ae75c32c807f8c9","source":{"kind":"arxiv","id":"1906.03741","version":1},"attestation_state":"computed","paper":{"title":"BIGPATENT: A Large-Scale Dataset for Abstractive and Coherent Summarization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Chen Li, Eva Sharma, Lu Wang","submitted_at":"2019-06-10T00:24:26Z","abstract_excerpt":"Most existing text summarization datasets are compiled from the news domain, where summaries have a flattened discourse structure. In such datasets, summary-worthy content often appears in the beginning of input articles. Moreover, large segments from input articles are present verbatim in their respective summaries. These issues impede the learning and evaluation of systems that can understand an article's global content structure as well as produce abstractive summaries with high compression ratio. In this work, we present a novel dataset, BIGPATENT, consisting of 1.3 million records of U.S."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.03741","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2019-06-10T00:24:26Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"4da11bfc984441473b529645fd4078a7fff178020bce033b6cc0d02e2c880aa5","abstract_canon_sha256":"95a6ee8d420d30a27605c817e4c498fc90c32fd3bd213528aceec6cda79ba0c2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:45.094551Z","signature_b64":"NlvtYwSNbu82evPxGB8myGe866r+9WS0a/6YueUOEMXFtjtSvLJnxuJqoQ6uYrCGNX1mB1c6l38N2QsUZzpgAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b427fb890872b6a7568d9c38d6489a5e8ae0e36ab0d6e23e3ae75c32c807f8c9","last_reissued_at":"2026-05-17T23:43:45.093981Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:45.093981Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"BIGPATENT: A Large-Scale Dataset for Abstractive and Coherent Summarization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Chen Li, Eva Sharma, Lu Wang","submitted_at":"2019-06-10T00:24:26Z","abstract_excerpt":"Most existing text summarization datasets are compiled from the news domain, where summaries have a flattened discourse structure. In such datasets, summary-worthy content often appears in the beginning of input articles. Moreover, large segments from input articles are present verbatim in their respective summaries. These issues impede the learning and evaluation of systems that can understand an article's global content structure as well as produce abstractive summaries with high compression ratio. In this work, we present a novel dataset, BIGPATENT, consisting of 1.3 million records of U.S."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.03741","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.03741","created_at":"2026-05-17T23:43:45.094064+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.03741v1","created_at":"2026-05-17T23:43:45.094064+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.03741","created_at":"2026-05-17T23:43:45.094064+00:00"},{"alias_kind":"pith_short_12","alias_value":"WQT7XCIIOK3K","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"WQT7XCIIOK3KOVUN","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"WQT7XCII","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2402.01613","citing_title":"Nomic Embed: Training a Reproducible Long Context Text Embedder","ref_index":117,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02930","citing_title":"Analysis and Explainability of LLMs Via Evolutionary Methods","ref_index":35,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2","json":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2.json","graph_json":"https://pith.science/api/pith-number/WQT7XCIIOK3KOVUNTQ4NMSE2L2/graph.json","events_json":"https://pith.science/api/pith-number/WQT7XCIIOK3KOVUNTQ4NMSE2L2/events.json","paper":"https://pith.science/paper/WQT7XCII"},"agent_actions":{"view_html":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2","download_json":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2.json","view_paper":"https://pith.science/paper/WQT7XCII","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.03741&json=true","fetch_graph":"https://pith.science/api/pith-number/WQT7XCIIOK3KOVUNTQ4NMSE2L2/graph.json","fetch_events":"https://pith.science/api/pith-number/WQT7XCIIOK3KOVUNTQ4NMSE2L2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2/action/storage_attestation","attest_author":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2/action/author_attestation","sign_citation":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2/action/citation_signature","submit_replication":"https://pith.science/pith/WQT7XCIIOK3KOVUNTQ4NMSE2L2/action/replication_record"}},"created_at":"2026-05-17T23:43:45.094064+00:00","updated_at":"2026-05-17T23:43:45.094064+00:00"}