{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MJJ564QCD6BDWARD5RLMHPZIWT","short_pith_number":"pith:MJJ564QC","schema_version":"1.0","canonical_sha256":"6253df72021f823b0223ec56c3bf28b4d7416d8ebb6d1aaac86dd3c2cab02bd7","source":{"kind":"arxiv","id":"2605.20005","version":1},"attestation_state":"computed","paper":{"title":"Fine-Tuning Without Forgetting via Loss-Adaptive Learning Rates","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aldan Creo, Babak Salimi, Jiongli Zhu, Parjanya Prajakta Prashant","submitted_at":"2026-05-19T15:36:52Z","abstract_excerpt":"Fine-tuning large language models on new data improves task performance but degrades capabilities learned during pretraining, a phenomenon known as catastrophic forgetting. Existing methods mitigate this by modifying the fine-tuning objective to suppress high-loss tokens or sequences, but these tokens are essential for learning new tasks, especially those with poor pretraining coverage. In such settings, hard tokens should still contribute to learning, so forgetting must be controlled without suppressing them. We identify a simple mechanism for doing so: per-step forgetting is bounded by the p"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.20005","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-19T15:36:52Z","cross_cats_sorted":[],"title_canon_sha256":"e40ef6f2289684e3cec8fa69ed196f8b4b6c03276799a8ae2bcf0121a7e436b3","abstract_canon_sha256":"7e6c0d671d62b716e4a66385439cf430db0b86f86e27c3f54c18b054fba12728"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T02:05:58.708092Z","signature_b64":"UJ2sGMs4LzHG87h6e4oqtGKT0lISsdl9dJbsODY9Zn9SCLhlwAXYUH4ByJ0l083Z/wYqfwxE75j994D3wejXCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6253df72021f823b0223ec56c3bf28b4d7416d8ebb6d1aaac86dd3c2cab02bd7","last_reissued_at":"2026-05-20T02:05:58.707666Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T02:05:58.707666Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Fine-Tuning Without Forgetting via Loss-Adaptive Learning Rates","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Aldan Creo, Babak Salimi, Jiongli Zhu, Parjanya Prajakta Prashant","submitted_at":"2026-05-19T15:36:52Z","abstract_excerpt":"Fine-tuning large language models on new data improves task performance but degrades capabilities learned during pretraining, a phenomenon known as catastrophic forgetting. Existing methods mitigate this by modifying the fine-tuning objective to suppress high-loss tokens or sequences, but these tokens are essential for learning new tasks, especially those with poor pretraining coverage. In such settings, hard tokens should still contribute to learning, so forgetting must be controlled without suppressing them. We identify a simple mechanism for doing so: per-step forgetting is bounded by the p"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20005","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20005/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.20005","created_at":"2026-05-20T02:05:58.707727+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.20005v1","created_at":"2026-05-20T02:05:58.707727+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20005","created_at":"2026-05-20T02:05:58.707727+00:00"},{"alias_kind":"pith_short_12","alias_value":"MJJ564QCD6BD","created_at":"2026-05-20T02:05:58.707727+00:00"},{"alias_kind":"pith_short_16","alias_value":"MJJ564QCD6BDWARD","created_at":"2026-05-20T02:05:58.707727+00:00"},{"alias_kind":"pith_short_8","alias_value":"MJJ564QC","created_at":"2026-05-20T02:05:58.707727+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT","json":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT.json","graph_json":"https://pith.science/api/pith-number/MJJ564QCD6BDWARD5RLMHPZIWT/graph.json","events_json":"https://pith.science/api/pith-number/MJJ564QCD6BDWARD5RLMHPZIWT/events.json","paper":"https://pith.science/paper/MJJ564QC"},"agent_actions":{"view_html":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT","download_json":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT.json","view_paper":"https://pith.science/paper/MJJ564QC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.20005&json=true","fetch_graph":"https://pith.science/api/pith-number/MJJ564QCD6BDWARD5RLMHPZIWT/graph.json","fetch_events":"https://pith.science/api/pith-number/MJJ564QCD6BDWARD5RLMHPZIWT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT/action/storage_attestation","attest_author":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT/action/author_attestation","sign_citation":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT/action/citation_signature","submit_replication":"https://pith.science/pith/MJJ564QCD6BDWARD5RLMHPZIWT/action/replication_record"}},"created_at":"2026-05-20T02:05:58.707727+00:00","updated_at":"2026-05-20T02:05:58.707727+00:00"}