{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:WAF35MUYVSTIOTBBQOF54BZ7CB","short_pith_number":"pith:WAF35MUY","schema_version":"1.0","canonical_sha256":"b00bbeb298aca6874c21838bde073f10552d0c055582b256a59fc4ec3d1ff885","source":{"kind":"arxiv","id":"2606.21705","version":1},"attestation_state":"computed","paper":{"title":"Structural Assessment for Understanding and Guiding Dataset Distillation in Discrete Token Space","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Jianyang Gu, Jozsef Hamari, Mohsen Zardadi, Vyacheslav Kungurtsev, Yue Cao, Yu Hu, Zheng Liu","submitted_at":"2026-06-19T19:33:15Z","abstract_excerpt":"Dataset distillation (DD) has proven to reduce training cost while preserving accuracy. While promising, the factors that make one distilled dataset more effective than another remain poorly understood. In this work, we investigate this question through the lens of discrete visual tokenizers. Whereas many prior DD efforts emphasize matching global data distributions, we suggest that the effectiveness depends on which semantic concepts are captured and how they are composed. Discrete visual tokenizers provide a finite vocabulary that enables direct statistical analysis of such compositional str"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.21705","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-06-19T19:33:15Z","cross_cats_sorted":[],"title_canon_sha256":"4e005b52b36d1e61aebf30c7880b8a09eef30d084b9c965058170d029306257b","abstract_canon_sha256":"8b479bce284603f538c4be6364760ffedb2190cc72814b8e64539e624c67750f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:13:20.198271Z","signature_b64":"PLHVYavNEBhr52u8O0VNV1PrEWQIuPUH8BTpw8yAB0/BOs4aN4d6JRuLWPZz+VAOz/6wURjIzzs7rW5OAnenBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b00bbeb298aca6874c21838bde073f10552d0c055582b256a59fc4ec3d1ff885","last_reissued_at":"2026-06-23T01:13:20.197773Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:13:20.197773Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Structural Assessment for Understanding and Guiding Dataset Distillation in Discrete Token Space","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Jianyang Gu, Jozsef Hamari, Mohsen Zardadi, Vyacheslav Kungurtsev, Yue Cao, Yu Hu, Zheng Liu","submitted_at":"2026-06-19T19:33:15Z","abstract_excerpt":"Dataset distillation (DD) has proven to reduce training cost while preserving accuracy. While promising, the factors that make one distilled dataset more effective than another remain poorly understood. In this work, we investigate this question through the lens of discrete visual tokenizers. Whereas many prior DD efforts emphasize matching global data distributions, we suggest that the effectiveness depends on which semantic concepts are captured and how they are composed. Discrete visual tokenizers provide a finite vocabulary that enables direct statistical analysis of such compositional str"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21705","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21705/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.21705","created_at":"2026-06-23T01:13:20.197844+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.21705v1","created_at":"2026-06-23T01:13:20.197844+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21705","created_at":"2026-06-23T01:13:20.197844+00:00"},{"alias_kind":"pith_short_12","alias_value":"WAF35MUYVSTI","created_at":"2026-06-23T01:13:20.197844+00:00"},{"alias_kind":"pith_short_16","alias_value":"WAF35MUYVSTIOTBB","created_at":"2026-06-23T01:13:20.197844+00:00"},{"alias_kind":"pith_short_8","alias_value":"WAF35MUY","created_at":"2026-06-23T01:13:20.197844+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB","json":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB.json","graph_json":"https://pith.science/api/pith-number/WAF35MUYVSTIOTBBQOF54BZ7CB/graph.json","events_json":"https://pith.science/api/pith-number/WAF35MUYVSTIOTBBQOF54BZ7CB/events.json","paper":"https://pith.science/paper/WAF35MUY"},"agent_actions":{"view_html":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB","download_json":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB.json","view_paper":"https://pith.science/paper/WAF35MUY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.21705&json=true","fetch_graph":"https://pith.science/api/pith-number/WAF35MUYVSTIOTBBQOF54BZ7CB/graph.json","fetch_events":"https://pith.science/api/pith-number/WAF35MUYVSTIOTBBQOF54BZ7CB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB/action/storage_attestation","attest_author":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB/action/author_attestation","sign_citation":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB/action/citation_signature","submit_replication":"https://pith.science/pith/WAF35MUYVSTIOTBBQOF54BZ7CB/action/replication_record"}},"created_at":"2026-06-23T01:13:20.197844+00:00","updated_at":"2026-06-23T01:13:20.197844+00:00"}