{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TLQKIUVSAQQJPV2TVGNJFQWRRH","short_pith_number":"pith:TLQKIUVS","schema_version":"1.0","canonical_sha256":"9ae0a452b2042097d753a99a92c2d189dc9e916be4a7f551ac43b7dd8f06f186","source":{"kind":"arxiv","id":"2605.14773","version":1},"attestation_state":"computed","paper":{"title":"Beyond What to Select: A Plug-and-play Oscillatory Data-Volume Scheduling for Efficient Model Training","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Fangjian Su, Furao Shen, Guang Li, Hai Gan, Hanqi Zhu, Soujanya Poria, Suorong Yang","submitted_at":"2026-05-14T12:37:11Z","abstract_excerpt":"Data selection accelerates training by identifying representative training data while preserving model performance. However, existing methods mainly focus on designing sample-importance criteria, i.e., deciding what to select, while typically fixing the selected data volume as the target ratio throughout training. Thus, they are often dynamic in sample identity but static in data volume. In this work, we revisit data selection from an optimization perspective and show that selected-data training induces an implicit regularization effect modulated by the instantaneous selection ratio. This reve"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.14773","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T12:37:11Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"210acbdf7df5cd3cbc12b248d54dbec552672d3afad7afd5259b88426ab6b8bf","abstract_canon_sha256":"e450a83e0bcc7cbce535122cc3153948e2a72a99c21849e3cd4c97b84df2c7cb"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:58.634438Z","signature_b64":"t26mPYuigAvBPnaxxDIw1ouxVIN33O01N6uVgdaEghIRq0WiN62eRa8GJMS4U3jq3HtPeVJCW0ho27CdeZoCBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9ae0a452b2042097d753a99a92c2d189dc9e916be4a7f551ac43b7dd8f06f186","last_reissued_at":"2026-05-17T23:38:58.633732Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:58.633732Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Beyond What to Select: A Plug-and-play Oscillatory Data-Volume Scheduling for Efficient Model Training","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Fangjian Su, Furao Shen, Guang Li, Hai Gan, Hanqi Zhu, Soujanya Poria, Suorong Yang","submitted_at":"2026-05-14T12:37:11Z","abstract_excerpt":"Data selection accelerates training by identifying representative training data while preserving model performance. However, existing methods mainly focus on designing sample-importance criteria, i.e., deciding what to select, while typically fixing the selected data volume as the target ratio throughout training. Thus, they are often dynamic in sample identity but static in data volume. In this work, we revisit data selection from an optimization perspective and show that selected-data training induces an implicit regularization effect modulated by the instantaneous selection ratio. This reve"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.14773","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.14773","created_at":"2026-05-17T23:38:58.633847+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.14773v1","created_at":"2026-05-17T23:38:58.633847+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14773","created_at":"2026-05-17T23:38:58.633847+00:00"},{"alias_kind":"pith_short_12","alias_value":"TLQKIUVSAQQJ","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"TLQKIUVSAQQJPV2T","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"TLQKIUVS","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH","json":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH.json","graph_json":"https://pith.science/api/pith-number/TLQKIUVSAQQJPV2TVGNJFQWRRH/graph.json","events_json":"https://pith.science/api/pith-number/TLQKIUVSAQQJPV2TVGNJFQWRRH/events.json","paper":"https://pith.science/paper/TLQKIUVS"},"agent_actions":{"view_html":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH","download_json":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH.json","view_paper":"https://pith.science/paper/TLQKIUVS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.14773&json=true","fetch_graph":"https://pith.science/api/pith-number/TLQKIUVSAQQJPV2TVGNJFQWRRH/graph.json","fetch_events":"https://pith.science/api/pith-number/TLQKIUVSAQQJPV2TVGNJFQWRRH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH/action/storage_attestation","attest_author":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH/action/author_attestation","sign_citation":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH/action/citation_signature","submit_replication":"https://pith.science/pith/TLQKIUVSAQQJPV2TVGNJFQWRRH/action/replication_record"}},"created_at":"2026-05-17T23:38:58.633847+00:00","updated_at":"2026-05-17T23:38:58.633847+00:00"}