{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:NQUAIQAFLUPCQ26TTT2VNWOCWX","short_pith_number":"pith:NQUAIQAF","canonical_record":{"source":{"id":"2605.17379","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-17T10:45:01Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"50fa848f2f1c4de9deb70bf5df073e3722fc7bdfbb83c46bc7dd5416e0defe12","abstract_canon_sha256":"fe8b12425a7e6180efd3d9e192be888370064fc73d8cd810b7d7982913ff7b15"},"schema_version":"1.0"},"canonical_sha256":"6c280440055d1e286bd39cf556d9c2b5db6621e7680d6ce35930cc8d8523f6a8","source":{"kind":"arxiv","id":"2605.17379","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17379","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17379v1","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17379","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"pith_short_12","alias_value":"NQUAIQAFLUPC","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"pith_short_16","alias_value":"NQUAIQAFLUPCQ26T","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"pith_short_8","alias_value":"NQUAIQAF","created_at":"2026-05-20T00:03:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:NQUAIQAFLUPCQ26TTT2VNWOCWX","target":"record","payload":{"canonical_record":{"source":{"id":"2605.17379","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-17T10:45:01Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"50fa848f2f1c4de9deb70bf5df073e3722fc7bdfbb83c46bc7dd5416e0defe12","abstract_canon_sha256":"fe8b12425a7e6180efd3d9e192be888370064fc73d8cd810b7d7982913ff7b15"},"schema_version":"1.0"},"canonical_sha256":"6c280440055d1e286bd39cf556d9c2b5db6621e7680d6ce35930cc8d8523f6a8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:55.583282Z","signature_b64":"8TOzsmQDsJ4GMi4HNih/JIm9Ca1szCMdA/xXVwrxHvek8y9DKOeWwShu8gPIBJYC2m3xbghqpYThfuR7GgSrBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6c280440055d1e286bd39cf556d9c2b5db6621e7680d6ce35930cc8d8523f6a8","last_reissued_at":"2026-05-20T00:03:55.582504Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:55.582504Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.17379","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3gMiFHvj71EzCmcDVLcPKwDHq3qvcrWD/MGBtx6EqmO7Kbmn3WQudM9fGC5I9BGYoalSQPEgdD1c8dboxPxqCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T08:07:44.281048Z"},"content_sha256":"a006387fcd2261163af99b4a3017b93fbefc46f2faf244677952cdb04120b390","schema_version":"1.0","event_id":"sha256:a006387fcd2261163af99b4a3017b93fbefc46f2faf244677952cdb04120b390"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:NQUAIQAFLUPCQ26TTT2VNWOCWX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Faster with Better Tokens: Parameter-Efficient Vocabulary Adaptation for Specialized Text Summarization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Gunjan Balde, Mainack Mondal, Niloy Ganguly, Soumyadeep Roy","submitted_at":"2026-05-17T10:45:01Z","abstract_excerpt":"Large language models pretrained on general-domain corpora often exhibit tokenization inefficiencies when applied to specialized domains. Although continual pretraining for domain adaptation partially alleviate performance degradation, it does not resolve the fundamental vocabulary mismatch. To address this gap, we introduce a targeted parameter-efficient domain adaptation approach that combines vocabulary adaptation with pretraining for LLM-based text summarization. Our unified framework augments pretrained tokenizers with domain-specific tokens while selectively replacing under-trained and u"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17379","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17379/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T21:41:57.771398Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.709333Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"f2aca72447b7df6b4a5d590a02f5afd46778ec987198531ff21062006b485194"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nwhln5basooLi2CYZZXz3gxP/EHGPGp6kRRII47LrU6+s5XrxiX4fjkwRAJ3O+b2ppP228+wmawvz8IWhGQyAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T08:07:44.281847Z"},"content_sha256":"a908455d73c76ed1fa0ced0d8a903593163f9bac6a6f4f1af689397a0ad1c5ed","schema_version":"1.0","event_id":"sha256:a908455d73c76ed1fa0ced0d8a903593163f9bac6a6f4f1af689397a0ad1c5ed"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX/bundle.json","state_url":"https://pith.science/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T08:07:44Z","links":{"resolver":"https://pith.science/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX","bundle":"https://pith.science/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX/bundle.json","state":"https://pith.science/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NQUAIQAFLUPCQ26TTT2VNWOCWX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NQUAIQAFLUPCQ26TTT2VNWOCWX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fe8b12425a7e6180efd3d9e192be888370064fc73d8cd810b7d7982913ff7b15","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-17T10:45:01Z","title_canon_sha256":"50fa848f2f1c4de9deb70bf5df073e3722fc7bdfbb83c46bc7dd5416e0defe12"},"schema_version":"1.0","source":{"id":"2605.17379","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17379","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17379v1","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17379","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"pith_short_12","alias_value":"NQUAIQAFLUPC","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"pith_short_16","alias_value":"NQUAIQAFLUPCQ26T","created_at":"2026-05-20T00:03:55Z"},{"alias_kind":"pith_short_8","alias_value":"NQUAIQAF","created_at":"2026-05-20T00:03:55Z"}],"graph_snapshots":[{"event_id":"sha256:a908455d73c76ed1fa0ced0d8a903593163f9bac6a6f4f1af689397a0ad1c5ed","target":"graph","created_at":"2026-05-20T00:03:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T21:41:57.771398Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.709333Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.17379/integrity.json","findings":[],"snapshot_sha256":"f2aca72447b7df6b4a5d590a02f5afd46778ec987198531ff21062006b485194","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models pretrained on general-domain corpora often exhibit tokenization inefficiencies when applied to specialized domains. Although continual pretraining for domain adaptation partially alleviate performance degradation, it does not resolve the fundamental vocabulary mismatch. To address this gap, we introduce a targeted parameter-efficient domain adaptation approach that combines vocabulary adaptation with pretraining for LLM-based text summarization. Our unified framework augments pretrained tokenizers with domain-specific tokens while selectively replacing under-trained and u","authors_text":"Gunjan Balde, Mainack Mondal, Niloy Ganguly, Soumyadeep Roy","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-17T10:45:01Z","title":"Learning Faster with Better Tokens: Parameter-Efficient Vocabulary Adaptation for Specialized Text Summarization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17379","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a006387fcd2261163af99b4a3017b93fbefc46f2faf244677952cdb04120b390","target":"record","created_at":"2026-05-20T00:03:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fe8b12425a7e6180efd3d9e192be888370064fc73d8cd810b7d7982913ff7b15","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-05-17T10:45:01Z","title_canon_sha256":"50fa848f2f1c4de9deb70bf5df073e3722fc7bdfbb83c46bc7dd5416e0defe12"},"schema_version":"1.0","source":{"id":"2605.17379","kind":"arxiv","version":1}},"canonical_sha256":"6c280440055d1e286bd39cf556d9c2b5db6621e7680d6ce35930cc8d8523f6a8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6c280440055d1e286bd39cf556d9c2b5db6621e7680d6ce35930cc8d8523f6a8","first_computed_at":"2026-05-20T00:03:55.582504Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:55.582504Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8TOzsmQDsJ4GMi4HNih/JIm9Ca1szCMdA/xXVwrxHvek8y9DKOeWwShu8gPIBJYC2m3xbghqpYThfuR7GgSrBg==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:55.583282Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.17379","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a006387fcd2261163af99b4a3017b93fbefc46f2faf244677952cdb04120b390","sha256:a908455d73c76ed1fa0ced0d8a903593163f9bac6a6f4f1af689397a0ad1c5ed"],"state_sha256":"2bb09483826c1e56db73030c6bd3e9756ad8961f5f11b2287ac71797c093edec"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HLJUwcIe14jKEWJIqzMnqrZH5Jmuxyl/+NS9XepzbVlT8qiyJq6ZJqtvuIhiCR4MGAJcHRCZrTYZMbPBx79bAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T08:07:44.285707Z","bundle_sha256":"7843f9829db40e31e3acc7502e7dfb87d41015ce2fe072970cb1b8c50ad02aa0"}}