{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TQP7M23ZT253RPCH6ZLUFGE4TY","short_pith_number":"pith:TQP7M23Z","schema_version":"1.0","canonical_sha256":"9c1ff66b799ebbb8bc47f65742989c9e06b58db488887de733c2bf2ace8b85f5","source":{"kind":"arxiv","id":"2602.11137","version":2},"attestation_state":"computed","paper":{"title":"Weight Decay Improves Language Model Plasticity","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Hanlin Zhang, Sebastian Bordt, Sham Kakade, Tessa Han","submitted_at":"2026-02-11T18:49:26Z","abstract_excerpt":"Large language models are typically trained in two broad phases: pretraining to produce a base model, followed by further training to improve downstream performance. However, hyperparameter optimization and scaling laws are studied primarily from the perspective of the base model's validation loss, overlooking a crucial model property: downstream adaptability. In this work, we study pretraining from the perspective of model plasticity, that is, the ability of the base model to successfully adapt to downstream tasks upon additional training. We focus on the role of weight decay, a key regulariz"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.11137","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-11T18:49:26Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"ca1f355cad451bf1bf9a34f3f63f1e9d8db4a015e72088a89ea9ee88e529d82a","abstract_canon_sha256":"4955c7c7c4cd9118f639fb01924e875c002ab366addf56fc6668bd68e51ce200"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:02:33.926896Z","signature_b64":"Kv/lf1fI00nXp1gzyEDf6VYpdNVabGEUoKvE+0zpZJJ0DuwC5UEB7CMXXJlw4iAjocH+IZGaNxBC4/DD6Mb+Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9c1ff66b799ebbb8bc47f65742989c9e06b58db488887de733c2bf2ace8b85f5","last_reissued_at":"2026-06-01T01:02:33.926016Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:02:33.926016Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Weight Decay Improves Language Model Plasticity","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Hanlin Zhang, Sebastian Bordt, Sham Kakade, Tessa Han","submitted_at":"2026-02-11T18:49:26Z","abstract_excerpt":"Large language models are typically trained in two broad phases: pretraining to produce a base model, followed by further training to improve downstream performance. However, hyperparameter optimization and scaling laws are studied primarily from the perspective of the base model's validation loss, overlooking a crucial model property: downstream adaptability. In this work, we study pretraining from the perspective of model plasticity, that is, the ability of the base model to successfully adapt to downstream tasks upon additional training. We focus on the role of weight decay, a key regulariz"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.11137","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.11137/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.11137","created_at":"2026-06-01T01:02:33.926125+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.11137v2","created_at":"2026-06-01T01:02:33.926125+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.11137","created_at":"2026-06-01T01:02:33.926125+00:00"},{"alias_kind":"pith_short_12","alias_value":"TQP7M23ZT253","created_at":"2026-06-01T01:02:33.926125+00:00"},{"alias_kind":"pith_short_16","alias_value":"TQP7M23ZT253RPCH","created_at":"2026-06-01T01:02:33.926125+00:00"},{"alias_kind":"pith_short_8","alias_value":"TQP7M23Z","created_at":"2026-06-01T01:02:33.926125+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY","json":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY.json","graph_json":"https://pith.science/api/pith-number/TQP7M23ZT253RPCH6ZLUFGE4TY/graph.json","events_json":"https://pith.science/api/pith-number/TQP7M23ZT253RPCH6ZLUFGE4TY/events.json","paper":"https://pith.science/paper/TQP7M23Z"},"agent_actions":{"view_html":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY","download_json":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY.json","view_paper":"https://pith.science/paper/TQP7M23Z","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.11137&json=true","fetch_graph":"https://pith.science/api/pith-number/TQP7M23ZT253RPCH6ZLUFGE4TY/graph.json","fetch_events":"https://pith.science/api/pith-number/TQP7M23ZT253RPCH6ZLUFGE4TY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY/action/storage_attestation","attest_author":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY/action/author_attestation","sign_citation":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY/action/citation_signature","submit_replication":"https://pith.science/pith/TQP7M23ZT253RPCH6ZLUFGE4TY/action/replication_record"}},"created_at":"2026-06-01T01:02:33.926125+00:00","updated_at":"2026-06-01T01:02:33.926125+00:00"}