{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:B64DL5LOC5HQCSQ27FL7DOO54H","short_pith_number":"pith:B64DL5LO","canonical_record":{"source":{"id":"2606.18663","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-17T04:02:38Z","cross_cats_sorted":[],"title_canon_sha256":"5648e385984ca021f4aac94256b712ce7e41264fd97c229248809c3bdbe2c8b9","abstract_canon_sha256":"c434a7e4b4f83f989f3397a3f7c3904f92d043b405ac704e4477c2706383ff01"},"schema_version":"1.0"},"canonical_sha256":"0fb835f56e174f014a1af957f1b9dde1fd5891f61e390bb0b66f3f7cd814f9ab","source":{"kind":"arxiv","id":"2606.18663","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.18663","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"arxiv_version","alias_value":"2606.18663v1","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18663","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"pith_short_12","alias_value":"B64DL5LOC5HQ","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"pith_short_16","alias_value":"B64DL5LOC5HQCSQ2","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"pith_short_8","alias_value":"B64DL5LO","created_at":"2026-06-19T16:11:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:B64DL5LOC5HQCSQ27FL7DOO54H","target":"record","payload":{"canonical_record":{"source":{"id":"2606.18663","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-17T04:02:38Z","cross_cats_sorted":[],"title_canon_sha256":"5648e385984ca021f4aac94256b712ce7e41264fd97c229248809c3bdbe2c8b9","abstract_canon_sha256":"c434a7e4b4f83f989f3397a3f7c3904f92d043b405ac704e4477c2706383ff01"},"schema_version":"1.0"},"canonical_sha256":"0fb835f56e174f014a1af957f1b9dde1fd5891f61e390bb0b66f3f7cd814f9ab","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:11:43.979015Z","signature_b64":"zEiUnzQgmxAOLrHLqfEO69EEHcUwSgRUNYuFcG2Dr6ZaB7nLkpj0UHZJuf9IHs/VfG+4Kv/DT0qDajjzGNe5DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0fb835f56e174f014a1af957f1b9dde1fd5891f61e390bb0b66f3f7cd814f9ab","last_reissued_at":"2026-06-19T16:11:43.978675Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:11:43.978675Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.18663","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:11:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"brqr+PSMHnE1iCJOd+1t/YXJXoqt+huDmY+4E26jtb51k6T76r2Esb+qN0rYOsWWmUd1ZIHGkxkRZQs1Ok5eAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T07:18:57.352115Z"},"content_sha256":"c146a6f680a03c9071d5fcb336b641976de6d242f067872313028a8e95aa04de","schema_version":"1.0","event_id":"sha256:c146a6f680a03c9071d5fcb336b641976de6d242f067872313028a8e95aa04de"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:B64DL5LOC5HQCSQ27FL7DOO54H","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"RegMix-D: Dynamic Data Mixing via Proxy Training Trajectories","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Akiko Aizawa, Kaiyan Zhao, Yoshimasa Tsuruoka, Zhongtao Miao","submitted_at":"2026-06-17T04:02:38Z","abstract_excerpt":"Data mixture selection is critical for Large Language Model pretraining. Existing methods such as RegMix select a single static mixture by fitting a regression model on small-scale proxy runs. We propose RegMix-D, a simple extension of RegMix to dynamic mixing. Our key observation is that proxy runs produce not only endpoint losses, but also full loss trajectories, which can be used to further improve data mixture. By training regression model on these trajectories, we can predict optimal mixtures at multiple training stages. RegMix-D supports two deployment modes: an offline variant that gene"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18663","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.18663/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-19T16:11:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EXFSpXNFI/1Pk2cCeaEzGw5TtvneDfSqa7MxC14jVwIZykKmq8/KI4I6e++6oHyYzOhz7RTO10KP/J/qsSk3Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T07:18:57.352540Z"},"content_sha256":"3629952b8e17a8e38ce40b622b1f2792044b430fcb4fc76234ad7a68e48d308a","schema_version":"1.0","event_id":"sha256:3629952b8e17a8e38ce40b622b1f2792044b430fcb4fc76234ad7a68e48d308a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/B64DL5LOC5HQCSQ27FL7DOO54H/bundle.json","state_url":"https://pith.science/pith/B64DL5LOC5HQCSQ27FL7DOO54H/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/B64DL5LOC5HQCSQ27FL7DOO54H/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T07:18:57Z","links":{"resolver":"https://pith.science/pith/B64DL5LOC5HQCSQ27FL7DOO54H","bundle":"https://pith.science/pith/B64DL5LOC5HQCSQ27FL7DOO54H/bundle.json","state":"https://pith.science/pith/B64DL5LOC5HQCSQ27FL7DOO54H/state.json","well_known_bundle":"https://pith.science/.well-known/pith/B64DL5LOC5HQCSQ27FL7DOO54H/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:B64DL5LOC5HQCSQ27FL7DOO54H","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c434a7e4b4f83f989f3397a3f7c3904f92d043b405ac704e4477c2706383ff01","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-17T04:02:38Z","title_canon_sha256":"5648e385984ca021f4aac94256b712ce7e41264fd97c229248809c3bdbe2c8b9"},"schema_version":"1.0","source":{"id":"2606.18663","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.18663","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"arxiv_version","alias_value":"2606.18663v1","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18663","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"pith_short_12","alias_value":"B64DL5LOC5HQ","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"pith_short_16","alias_value":"B64DL5LOC5HQCSQ2","created_at":"2026-06-19T16:11:43Z"},{"alias_kind":"pith_short_8","alias_value":"B64DL5LO","created_at":"2026-06-19T16:11:43Z"}],"graph_snapshots":[{"event_id":"sha256:3629952b8e17a8e38ce40b622b1f2792044b430fcb4fc76234ad7a68e48d308a","target":"graph","created_at":"2026-06-19T16:11:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.18663/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Data mixture selection is critical for Large Language Model pretraining. Existing methods such as RegMix select a single static mixture by fitting a regression model on small-scale proxy runs. We propose RegMix-D, a simple extension of RegMix to dynamic mixing. Our key observation is that proxy runs produce not only endpoint losses, but also full loss trajectories, which can be used to further improve data mixture. By training regression model on these trajectories, we can predict optimal mixtures at multiple training stages. RegMix-D supports two deployment modes: an offline variant that gene","authors_text":"Akiko Aizawa, Kaiyan Zhao, Yoshimasa Tsuruoka, Zhongtao Miao","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-17T04:02:38Z","title":"RegMix-D: Dynamic Data Mixing via Proxy Training Trajectories"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18663","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c146a6f680a03c9071d5fcb336b641976de6d242f067872313028a8e95aa04de","target":"record","created_at":"2026-06-19T16:11:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c434a7e4b4f83f989f3397a3f7c3904f92d043b405ac704e4477c2706383ff01","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-17T04:02:38Z","title_canon_sha256":"5648e385984ca021f4aac94256b712ce7e41264fd97c229248809c3bdbe2c8b9"},"schema_version":"1.0","source":{"id":"2606.18663","kind":"arxiv","version":1}},"canonical_sha256":"0fb835f56e174f014a1af957f1b9dde1fd5891f61e390bb0b66f3f7cd814f9ab","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0fb835f56e174f014a1af957f1b9dde1fd5891f61e390bb0b66f3f7cd814f9ab","first_computed_at":"2026-06-19T16:11:43.978675Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:11:43.978675Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zEiUnzQgmxAOLrHLqfEO69EEHcUwSgRUNYuFcG2Dr6ZaB7nLkpj0UHZJuf9IHs/VfG+4Kv/DT0qDajjzGNe5DQ==","signature_status":"signed_v1","signed_at":"2026-06-19T16:11:43.979015Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.18663","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c146a6f680a03c9071d5fcb336b641976de6d242f067872313028a8e95aa04de","sha256:3629952b8e17a8e38ce40b622b1f2792044b430fcb4fc76234ad7a68e48d308a"],"state_sha256":"b5ef6e430ea5bdfaa934b1e126c897a2fc42233f48ddc1fc05df516f0b561b70"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9KYke0SJDv1Xf2AzhbaZdjO6J0Wd0zfTmslMAyN7ApzvhYUX2YMqx1boKYcAzs3AQAD9JMyLbG68n3WRgjRgDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T07:18:57.354622Z","bundle_sha256":"8b66dead9088174979cb646528516d84781d42a4baf7c904d618fbafcdab5c2f"}}