{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:MPS2VC2WO7EYUFNVSGEWO4PDOD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0e3f88ad42567840b8e20e05aeacb8fb1a004b51d0211e56b03c28cb870756f9","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-02-11T00:23:13Z","title_canon_sha256":"92070141dcb1f1fab6aec3f71edcc7e5778ac9999c866fbe37dcaff81706102a"},"schema_version":"1.0","source":{"id":"2602.10388","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.10388","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"arxiv_version","alias_value":"2602.10388v3","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.10388","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_12","alias_value":"MPS2VC2WO7EY","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_16","alias_value":"MPS2VC2WO7EYUFNV","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_8","alias_value":"MPS2VC2W","created_at":"2026-05-29T01:04:36Z"}],"graph_snapshots":[{"event_id":"sha256:9800979df65a21ad58f66aa5ae4a9c3c6456ae773b99c141d104e694309c2167","target":"graph","created_at":"2026-05-29T01:04:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.10388/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The diversity of post-training data is critical for effective downstream performance in large language models (LLMs). Many existing approaches to constructing post-training data quantify diversity using text-based metrics that capture linguistic variation, but such metrics provide only weak signals for the task-relevant features that determine downstream performance. In this work, we introduce Feature Activation Coverage (FAC) which measures data diversity in an interpretable feature space. Building upon this metric, we further propose a diversity-driven data synthesis framework, named FAC Syn","authors_text":"Lijie Hu, Ninghao Liu, Xuansheng Wu, Yijiang Li, Zhongzhi Li","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-02-11T00:23:13Z","title":"Less is Enough: Synthesizing Diverse Data in LLM Feature Space with Sparse Autoencoders"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.10388","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fddfa5f4fb6aa50cbd0c226dada43f4c87f00b7c40e48199a15aa7790529e5fe","target":"record","created_at":"2026-05-29T01:04:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0e3f88ad42567840b8e20e05aeacb8fb1a004b51d0211e56b03c28cb870756f9","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-02-11T00:23:13Z","title_canon_sha256":"92070141dcb1f1fab6aec3f71edcc7e5778ac9999c866fbe37dcaff81706102a"},"schema_version":"1.0","source":{"id":"2602.10388","kind":"arxiv","version":3}},"canonical_sha256":"63e5aa8b5677c98a15b591896771e370ecd4798c0805e4ec7da768a117b03e7b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"63e5aa8b5677c98a15b591896771e370ecd4798c0805e4ec7da768a117b03e7b","first_computed_at":"2026-05-29T01:04:36.884326Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:04:36.884326Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wiKn5gkCapr8mUFIaPMGQuRohCjJ57uXIkP3HOFNou4iLl+n7t8eXIt37dKEoEHdqaNzSuo9Htl/RxLLcnreBw==","signature_status":"signed_v1","signed_at":"2026-05-29T01:04:36.884846Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.10388","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fddfa5f4fb6aa50cbd0c226dada43f4c87f00b7c40e48199a15aa7790529e5fe","sha256:9800979df65a21ad58f66aa5ae4a9c3c6456ae773b99c141d104e694309c2167"],"state_sha256":"9759157683df5b8a3a69d3db64933f45d79012880b68fe2df809b41223eed945"}