{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:JEXKL4CBATHAE3MKQGMZPTG5L2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c4c975b61043e6f110644f48b193b3c5ef88fe414b13a54ec62437619ae45571","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","title_canon_sha256":"a49c653bb18a6e2f7444259973289940c2eacd4379c4ee7786770f903af2a2f1"},"schema_version":"1.0","source":{"id":"2606.18307","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.18307","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"arxiv_version","alias_value":"2606.18307v1","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.18307","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_12","alias_value":"JEXKL4CBATHA","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_16","alias_value":"JEXKL4CBATHAE3MK","created_at":"2026-06-19T16:10:57Z"},{"alias_kind":"pith_short_8","alias_value":"JEXKL4CB","created_at":"2026-06-19T16:10:57Z"}],"graph_snapshots":[{"event_id":"sha256:45efe505b3c9d249ca5e625f88fb3ae0a70d9d04aa45a3497ab7fdb979258a1d","target":"graph","created_at":"2026-06-19T16:10:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.18307/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Optimizing the training data distribution for Supervised Fine-Tuning (SFT) dictates the capability of Large Language Models (LLMs). While existing data curation methods excel at accelerating training under constrained budgets, they are less suited to elevating the capability upper bound. The challenge here is no longer to identify a smaller subset that preserves performance, but to refine the data distribution toward instances most capable of improving the final model. To address this problem, we explore instance-level data attribution using Influence Functions (IF). We identify that standard ","authors_text":"Lincheng Li, Tianyu Yu, Yuan Yao, Zefan Wang","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","title":"DRIFT: Refining Instruction Data via On-Policy Data Attribution"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.18307","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:583903873dcafc610448fae148da31dcf3ddd528860721c36723dbeb9d7b8f3f","target":"record","created_at":"2026-06-19T16:10:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c4c975b61043e6f110644f48b193b3c5ef88fe414b13a54ec62437619ae45571","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-16T07:21:49Z","title_canon_sha256":"a49c653bb18a6e2f7444259973289940c2eacd4379c4ee7786770f903af2a2f1"},"schema_version":"1.0","source":{"id":"2606.18307","kind":"arxiv","version":1}},"canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"492ea5f04104ce026d8a819997ccdd5e9b277d3a8003f3fc8c4343ab8a3bd482","first_computed_at":"2026-06-19T16:10:57.514511Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-19T16:10:57.514511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"jD0OwISYBLu7eSrcspJLAna21vuW+Pi4GbI1acYLHaUVu8v/AZFWV1F3wKD0hzkiJ7w5TFEIGYwPyslBs9UkBw==","signature_status":"signed_v1","signed_at":"2026-06-19T16:10:57.514846Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.18307","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:583903873dcafc610448fae148da31dcf3ddd528860721c36723dbeb9d7b8f3f","sha256:45efe505b3c9d249ca5e625f88fb3ae0a70d9d04aa45a3497ab7fdb979258a1d"],"state_sha256":"45f2de81c49b98dadfd10664f995005c80c128268dbc334822fa77ce10cf5656"}