{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:LFGGTTFUCACJRN6OTZKAT36K2L","short_pith_number":"pith:LFGGTTFU","canonical_record":{"source":{"id":"2606.24828","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T17:12:06Z","cross_cats_sorted":[],"title_canon_sha256":"2cd9b62d187becdf54b0999181835c86d63513ff64acc583026efefba1e4d065","abstract_canon_sha256":"a677d6a2934886ff885b7a335880552851466037cf1c70ae64eba93cb43e100b"},"schema_version":"1.0"},"canonical_sha256":"594c69ccb4100498b7ce9e5409efcad2c9501f94444651d138a34d91d8457e49","source":{"kind":"arxiv","id":"2606.24828","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24828","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24828v1","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24828","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"pith_short_12","alias_value":"LFGGTTFUCACJ","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"pith_short_16","alias_value":"LFGGTTFUCACJRN6O","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"pith_short_8","alias_value":"LFGGTTFU","created_at":"2026-06-24T01:15:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:LFGGTTFUCACJRN6OTZKAT36K2L","target":"record","payload":{"canonical_record":{"source":{"id":"2606.24828","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T17:12:06Z","cross_cats_sorted":[],"title_canon_sha256":"2cd9b62d187becdf54b0999181835c86d63513ff64acc583026efefba1e4d065","abstract_canon_sha256":"a677d6a2934886ff885b7a335880552851466037cf1c70ae64eba93cb43e100b"},"schema_version":"1.0"},"canonical_sha256":"594c69ccb4100498b7ce9e5409efcad2c9501f94444651d138a34d91d8457e49","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:43.403060Z","signature_b64":"zjZQZUuxpD572J2fLe0pfXDEMKq+dm5bPSzHTUr/QnegkuRZlmU2ILpg/JYCfqWdvlGvjT/yIo9WVPP6rwyPAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"594c69ccb4100498b7ce9e5409efcad2c9501f94444651d138a34d91d8457e49","last_reissued_at":"2026-06-24T01:15:43.402664Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:43.402664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.24828","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GZDqmTrhLPTI6JIFkvnPFxi02TiYJ3k7jp6j2sMxqTwCUk9hrGWUFB3M8zUWNQyBrAPoJEhjAkV0f7g/MjnrAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T00:07:04.220148Z"},"content_sha256":"c1122ffdc1a2169480c7dbe25071aaf71433adc0182f2533f1bb490a36df4bff","schema_version":"1.0","event_id":"sha256:c1122ffdc1a2169480c7dbe25071aaf71433adc0182f2533f1bb490a36df4bff"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:LFGGTTFUCACJRN6OTZKAT36K2L","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Less is More: Quality-Aware Training Data Selection for Scientific Summarization","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Grigorios Tsoumakas, Maria Nefeli Paraskevopoulou, Tatiana Passali","submitted_at":"2026-06-23T17:12:06Z","abstract_excerpt":"Scientific long-document summarization datasets commonly treat author-written abstracts as gold reference summaries, although their quality and alignment with the source article vary. At the same time, publicly available scientific summarization datasets remain limited in scale and structure for modern long-context models. In this work, we address both challenges by a) constructing and releasing one of the largest biomedical and life science datasets for long-document summarization, containing 1.88 million PMC articles, and b) analyzing the reference quality of author-written abstracts with so"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24828","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24828/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-24T01:15:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lBnuNyaGjcW/tCwYX+xOepqxpgJWyP+MqqblOHX6eXKF5FxuddtSuZu1SMxri+Wvt+w7gVvRhJbP9nbo0UwRAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T00:07:04.220532Z"},"content_sha256":"cf4438dddb228d300e651cb3d5e70909c1a3c8b928d1a3eb7ea25d4dda2c8890","schema_version":"1.0","event_id":"sha256:cf4438dddb228d300e651cb3d5e70909c1a3c8b928d1a3eb7ea25d4dda2c8890"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LFGGTTFUCACJRN6OTZKAT36K2L/bundle.json","state_url":"https://pith.science/pith/LFGGTTFUCACJRN6OTZKAT36K2L/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LFGGTTFUCACJRN6OTZKAT36K2L/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T00:07:04Z","links":{"resolver":"https://pith.science/pith/LFGGTTFUCACJRN6OTZKAT36K2L","bundle":"https://pith.science/pith/LFGGTTFUCACJRN6OTZKAT36K2L/bundle.json","state":"https://pith.science/pith/LFGGTTFUCACJRN6OTZKAT36K2L/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LFGGTTFUCACJRN6OTZKAT36K2L/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:LFGGTTFUCACJRN6OTZKAT36K2L","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a677d6a2934886ff885b7a335880552851466037cf1c70ae64eba93cb43e100b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T17:12:06Z","title_canon_sha256":"2cd9b62d187becdf54b0999181835c86d63513ff64acc583026efefba1e4d065"},"schema_version":"1.0","source":{"id":"2606.24828","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.24828","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"arxiv_version","alias_value":"2606.24828v1","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24828","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"pith_short_12","alias_value":"LFGGTTFUCACJ","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"pith_short_16","alias_value":"LFGGTTFUCACJRN6O","created_at":"2026-06-24T01:15:43Z"},{"alias_kind":"pith_short_8","alias_value":"LFGGTTFU","created_at":"2026-06-24T01:15:43Z"}],"graph_snapshots":[{"event_id":"sha256:cf4438dddb228d300e651cb3d5e70909c1a3c8b928d1a3eb7ea25d4dda2c8890","target":"graph","created_at":"2026-06-24T01:15:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.24828/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Scientific long-document summarization datasets commonly treat author-written abstracts as gold reference summaries, although their quality and alignment with the source article vary. At the same time, publicly available scientific summarization datasets remain limited in scale and structure for modern long-context models. In this work, we address both challenges by a) constructing and releasing one of the largest biomedical and life science datasets for long-document summarization, containing 1.88 million PMC articles, and b) analyzing the reference quality of author-written abstracts with so","authors_text":"Grigorios Tsoumakas, Maria Nefeli Paraskevopoulou, Tatiana Passali","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T17:12:06Z","title":"Less is More: Quality-Aware Training Data Selection for Scientific Summarization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24828","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c1122ffdc1a2169480c7dbe25071aaf71433adc0182f2533f1bb490a36df4bff","target":"record","created_at":"2026-06-24T01:15:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a677d6a2934886ff885b7a335880552851466037cf1c70ae64eba93cb43e100b","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T17:12:06Z","title_canon_sha256":"2cd9b62d187becdf54b0999181835c86d63513ff64acc583026efefba1e4d065"},"schema_version":"1.0","source":{"id":"2606.24828","kind":"arxiv","version":1}},"canonical_sha256":"594c69ccb4100498b7ce9e5409efcad2c9501f94444651d138a34d91d8457e49","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"594c69ccb4100498b7ce9e5409efcad2c9501f94444651d138a34d91d8457e49","first_computed_at":"2026-06-24T01:15:43.402664Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-24T01:15:43.402664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zjZQZUuxpD572J2fLe0pfXDEMKq+dm5bPSzHTUr/QnegkuRZlmU2ILpg/JYCfqWdvlGvjT/yIo9WVPP6rwyPAQ==","signature_status":"signed_v1","signed_at":"2026-06-24T01:15:43.403060Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.24828","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c1122ffdc1a2169480c7dbe25071aaf71433adc0182f2533f1bb490a36df4bff","sha256:cf4438dddb228d300e651cb3d5e70909c1a3c8b928d1a3eb7ea25d4dda2c8890"],"state_sha256":"8af780407d71cdc296d5cb359586d54f75234b586cccaaf2684b4c06c5d4c452"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QdWlrMVzpAkd4ZcfDI+4V32WFox+oDYJ7uKaw7Poe2lCIpcTRh8L5iZBM1hXWqN9vPPcjlqVLfJfGA5jDZXXDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T00:07:04.222496Z","bundle_sha256":"84d810fd219a74f4c40055795528f8aaedf7fc461734a602961ffcaf6171f609"}}