{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2IXGW5PSRNZSLQOUYB4RHQHHBD","short_pith_number":"pith:2IXGW5PS","schema_version":"1.0","canonical_sha256":"d22e6b75f28b7325c1d4c07913c0e708eecf3128968e1c8e4a7ff2969a14b4ff","source":{"kind":"arxiv","id":"2602.18584","version":2},"attestation_state":"computed","paper":{"title":"GIST: Targeted Data Selection for Instruction Tuning via Coupled Optimization Geometry","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CV"],"primary_cat":"cs.LG","authors_text":"Chen Chen, Guanghui Min, Ke Wan, Tianhao Huang","submitted_at":"2026-02-20T19:44:24Z","abstract_excerpt":"Targeted data selection has emerged as a crucial paradigm for efficient instruction tuning, aiming to identify a small yet influential subset of training examples for a specific target task. In practice, influence is often measured through the effect of an example on parameter updates. To make selection scalable, many approaches leverage optimizer statistics (e.g., Adam states) as an axis-aligned surrogate for update geometry (i.e., diagonal precondition), implicitly treating parameters as coordinate-wise independent. We show that this assumption breaks down in parameter-efficient fine-tuning "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.18584","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-20T19:44:24Z","cross_cats_sorted":["cs.AI","cs.CV"],"title_canon_sha256":"a194d2d5b20ea76be0126f86ac7159a3f7aa5b578e9161348826d9b129524455","abstract_canon_sha256":"ef6a884339de03e10239f30bc95568d5fac0d989276acd0fd208b8ebbe920e00"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:10.050768Z","signature_b64":"0yuwPeSiAySBAQlZedUCherwQNBBoTx4zs0niS4JfHsPZq0lWAO1zoOb/gtqUPF7Z6VSiQJaj+XTRyGU8+Y3Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d22e6b75f28b7325c1d4c07913c0e708eecf3128968e1c8e4a7ff2969a14b4ff","last_reissued_at":"2026-05-20T00:02:10.049858Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:10.049858Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"GIST: Targeted Data Selection for Instruction Tuning via Coupled Optimization Geometry","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CV"],"primary_cat":"cs.LG","authors_text":"Chen Chen, Guanghui Min, Ke Wan, Tianhao Huang","submitted_at":"2026-02-20T19:44:24Z","abstract_excerpt":"Targeted data selection has emerged as a crucial paradigm for efficient instruction tuning, aiming to identify a small yet influential subset of training examples for a specific target task. In practice, influence is often measured through the effect of an example on parameter updates. To make selection scalable, many approaches leverage optimizer statistics (e.g., Adam states) as an axis-aligned surrogate for update geometry (i.e., diagonal precondition), implicitly treating parameters as coordinate-wise independent. We show that this assumption breaks down in parameter-efficient fine-tuning "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.18584","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.18584/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.18584","created_at":"2026-05-20T00:02:10.050011+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.18584v2","created_at":"2026-05-20T00:02:10.050011+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.18584","created_at":"2026-05-20T00:02:10.050011+00:00"},{"alias_kind":"pith_short_12","alias_value":"2IXGW5PSRNZS","created_at":"2026-05-20T00:02:10.050011+00:00"},{"alias_kind":"pith_short_16","alias_value":"2IXGW5PSRNZSLQOU","created_at":"2026-05-20T00:02:10.050011+00:00"},{"alias_kind":"pith_short_8","alias_value":"2IXGW5PS","created_at":"2026-05-20T00:02:10.050011+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.09404","citing_title":"Let the Target Select for Itself: Data Selection via Target-Aligned Paths","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06166","citing_title":"One Algorithm, Two Goals: Dual Scoring for Parameter and Data Selection in LLM Fine-Tuning","ref_index":34,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD","json":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD.json","graph_json":"https://pith.science/api/pith-number/2IXGW5PSRNZSLQOUYB4RHQHHBD/graph.json","events_json":"https://pith.science/api/pith-number/2IXGW5PSRNZSLQOUYB4RHQHHBD/events.json","paper":"https://pith.science/paper/2IXGW5PS"},"agent_actions":{"view_html":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD","download_json":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD.json","view_paper":"https://pith.science/paper/2IXGW5PS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.18584&json=true","fetch_graph":"https://pith.science/api/pith-number/2IXGW5PSRNZSLQOUYB4RHQHHBD/graph.json","fetch_events":"https://pith.science/api/pith-number/2IXGW5PSRNZSLQOUYB4RHQHHBD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD/action/storage_attestation","attest_author":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD/action/author_attestation","sign_citation":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD/action/citation_signature","submit_replication":"https://pith.science/pith/2IXGW5PSRNZSLQOUYB4RHQHHBD/action/replication_record"}},"created_at":"2026-05-20T00:02:10.050011+00:00","updated_at":"2026-05-20T00:02:10.050011+00:00"}