{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:6C554KTEHPIMJ5SKKJSOUISOAE","short_pith_number":"pith:6C554KTE","canonical_record":{"source":{"id":"2508.21589","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-08-29T12:47:27Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"4b794f07b400ae9ae90836a156b34a81433683255aea8520399b64af8fea6d07","abstract_canon_sha256":"687733d8de5c2b9281f78bc17109107dcb0e39ce013f91d4c861255c2dca7ad2"},"schema_version":"1.0"},"canonical_sha256":"f0bbde2a643bd0c4f64a5264ea224e010b5f5d4f8c47fae06f89f93ba7fcf47d","source":{"kind":"arxiv","id":"2508.21589","version":5},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.21589","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"arxiv_version","alias_value":"2508.21589v5","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.21589","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"pith_short_12","alias_value":"6C554KTEHPIM","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"pith_short_16","alias_value":"6C554KTEHPIMJ5SK","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"pith_short_8","alias_value":"6C554KTE","created_at":"2026-07-02T01:18:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:6C554KTEHPIMJ5SKKJSOUISOAE","target":"record","payload":{"canonical_record":{"source":{"id":"2508.21589","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-08-29T12:47:27Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"4b794f07b400ae9ae90836a156b34a81433683255aea8520399b64af8fea6d07","abstract_canon_sha256":"687733d8de5c2b9281f78bc17109107dcb0e39ce013f91d4c861255c2dca7ad2"},"schema_version":"1.0"},"canonical_sha256":"f0bbde2a643bd0c4f64a5264ea224e010b5f5d4f8c47fae06f89f93ba7fcf47d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:18:04.384928Z","signature_b64":"t6sMrdxxF8qRnC8wh3muFTRE5lCNgALJdiWU6sKflOpF84rKgMeYrSYMsWIWaaPAwIxkpA9HE+rwk9cR4UgnCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f0bbde2a643bd0c4f64a5264ea224e010b5f5d4f8c47fae06f89f93ba7fcf47d","last_reissued_at":"2026-07-02T01:18:04.384417Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:18:04.384417Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2508.21589","source_version":5,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T01:18:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qFKnMKVx7Yk6h6dQP4N0ASkaP4b6yquJnIztTHKHgbvDruIdYCqvcPMUZKUpZt2irQk+OfDmfGSB/II91cJyDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T17:02:11.900451Z"},"content_sha256":"6570507eac9a46cd6c6da6d999e26e87c97be2f201303caab04ad9ab93a30c17","schema_version":"1.0","event_id":"sha256:6570507eac9a46cd6c6da6d999e26e87c97be2f201303caab04ad9ab93a30c17"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:6C554KTEHPIMJ5SKKJSOUISOAE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Middo: Model-Informed Dynamic Data Optimization for Enhanced LLM Fine-Tuning via Closed-Loop Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Conghui He, Jiang Wu, Lijun Wu, Mengzhang Cai, Qizhi Pei, Xin Gao, Zhuoshi Pan, Zinan Tang","submitted_at":"2025-08-29T12:47:27Z","abstract_excerpt":"Supervised Fine-Tuning (SFT) Large Language Models (LLM) fundamentally rely on high-quality training data. While data selection and data synthesis are two common strategies to improve data quality, existing approaches often face limitations in static dataset curation that fail to adapt to evolving model capabilities. In this paper, we introduce Middo, a self-evolving Model-informed dynamic data optimization framework that uses model-aware data selection and context-preserving data refinement. Unlike conventional one-off filtering/synthesis methods, our framework establishes a closed-loop optim"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.21589","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.21589/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T01:18:04Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jsrJYfyWjx2GLHz0Nvut9ZMQ2YRfBpbitZreVrena1lB/BsHIQXuH1XvtIt9ju8bgQfkC1JpDZVjm2DtR3rpAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T17:02:11.900829Z"},"content_sha256":"7f76c754b4ee70384ec20dbace5147863cd22476843e10ee50c2d919ff7e1345","schema_version":"1.0","event_id":"sha256:7f76c754b4ee70384ec20dbace5147863cd22476843e10ee50c2d919ff7e1345"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6C554KTEHPIMJ5SKKJSOUISOAE/bundle.json","state_url":"https://pith.science/pith/6C554KTEHPIMJ5SKKJSOUISOAE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6C554KTEHPIMJ5SKKJSOUISOAE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-03T17:02:11Z","links":{"resolver":"https://pith.science/pith/6C554KTEHPIMJ5SKKJSOUISOAE","bundle":"https://pith.science/pith/6C554KTEHPIMJ5SKKJSOUISOAE/bundle.json","state":"https://pith.science/pith/6C554KTEHPIMJ5SKKJSOUISOAE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6C554KTEHPIMJ5SKKJSOUISOAE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:6C554KTEHPIMJ5SKKJSOUISOAE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"687733d8de5c2b9281f78bc17109107dcb0e39ce013f91d4c861255c2dca7ad2","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-08-29T12:47:27Z","title_canon_sha256":"4b794f07b400ae9ae90836a156b34a81433683255aea8520399b64af8fea6d07"},"schema_version":"1.0","source":{"id":"2508.21589","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.21589","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"arxiv_version","alias_value":"2508.21589v5","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.21589","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"pith_short_12","alias_value":"6C554KTEHPIM","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"pith_short_16","alias_value":"6C554KTEHPIMJ5SK","created_at":"2026-07-02T01:18:04Z"},{"alias_kind":"pith_short_8","alias_value":"6C554KTE","created_at":"2026-07-02T01:18:04Z"}],"graph_snapshots":[{"event_id":"sha256:7f76c754b4ee70384ec20dbace5147863cd22476843e10ee50c2d919ff7e1345","target":"graph","created_at":"2026-07-02T01:18:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2508.21589/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Supervised Fine-Tuning (SFT) Large Language Models (LLM) fundamentally rely on high-quality training data. While data selection and data synthesis are two common strategies to improve data quality, existing approaches often face limitations in static dataset curation that fail to adapt to evolving model capabilities. In this paper, we introduce Middo, a self-evolving Model-informed dynamic data optimization framework that uses model-aware data selection and context-preserving data refinement. Unlike conventional one-off filtering/synthesis methods, our framework establishes a closed-loop optim","authors_text":"Conghui He, Jiang Wu, Lijun Wu, Mengzhang Cai, Qizhi Pei, Xin Gao, Zhuoshi Pan, Zinan Tang","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-08-29T12:47:27Z","title":"Middo: Model-Informed Dynamic Data Optimization for Enhanced LLM Fine-Tuning via Closed-Loop Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.21589","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6570507eac9a46cd6c6da6d999e26e87c97be2f201303caab04ad9ab93a30c17","target":"record","created_at":"2026-07-02T01:18:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"687733d8de5c2b9281f78bc17109107dcb0e39ce013f91d4c861255c2dca7ad2","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-08-29T12:47:27Z","title_canon_sha256":"4b794f07b400ae9ae90836a156b34a81433683255aea8520399b64af8fea6d07"},"schema_version":"1.0","source":{"id":"2508.21589","kind":"arxiv","version":5}},"canonical_sha256":"f0bbde2a643bd0c4f64a5264ea224e010b5f5d4f8c47fae06f89f93ba7fcf47d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f0bbde2a643bd0c4f64a5264ea224e010b5f5d4f8c47fae06f89f93ba7fcf47d","first_computed_at":"2026-07-02T01:18:04.384417Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T01:18:04.384417Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"t6sMrdxxF8qRnC8wh3muFTRE5lCNgALJdiWU6sKflOpF84rKgMeYrSYMsWIWaaPAwIxkpA9HE+rwk9cR4UgnCg==","signature_status":"signed_v1","signed_at":"2026-07-02T01:18:04.384928Z","signed_message":"canonical_sha256_bytes"},"source_id":"2508.21589","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6570507eac9a46cd6c6da6d999e26e87c97be2f201303caab04ad9ab93a30c17","sha256:7f76c754b4ee70384ec20dbace5147863cd22476843e10ee50c2d919ff7e1345"],"state_sha256":"bc9fa559c2e728889c619b360be1de0d4d3525325043c5b76b41d76a8370eec4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ODy7kk1Na3d6NTeuoRZVbzuUNrURFrAqsZp6XxIi/La7n2J9BQpZDwX2ML0ekzeEjbSRjuYl6vTQCqcR/O2hBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-03T17:02:11.902914Z","bundle_sha256":"10e4bc524ceddaf0def61080c245b5ef4062acad586bdb76616312ad33a09562"}}