{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:OXJAAYHXM36USMZZZD5M24U7GK","short_pith_number":"pith:OXJAAYHX","canonical_record":{"source":{"id":"2606.21337","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-19T11:31:43Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"6d893f0ba412af066e761ebc0daec3e5f29b9255a51253a9e55cddea3f76a1c7","abstract_canon_sha256":"8cae848b88a2be2ad9ec34061f4a5006049e1e809ac052c02340d64a09760b45"},"schema_version":"1.0"},"canonical_sha256":"75d20060f766fd493339c8facd729f32a349a5b3e3da2d0997377cef7ef0dc0e","source":{"kind":"arxiv","id":"2606.21337","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21337","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21337v1","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21337","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"pith_short_12","alias_value":"OXJAAYHXM36U","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"pith_short_16","alias_value":"OXJAAYHXM36USMZZ","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"pith_short_8","alias_value":"OXJAAYHX","created_at":"2026-06-23T01:12:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:OXJAAYHXM36USMZZZD5M24U7GK","target":"record","payload":{"canonical_record":{"source":{"id":"2606.21337","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-19T11:31:43Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"6d893f0ba412af066e761ebc0daec3e5f29b9255a51253a9e55cddea3f76a1c7","abstract_canon_sha256":"8cae848b88a2be2ad9ec34061f4a5006049e1e809ac052c02340d64a09760b45"},"schema_version":"1.0"},"canonical_sha256":"75d20060f766fd493339c8facd729f32a349a5b3e3da2d0997377cef7ef0dc0e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T01:12:37.733997Z","signature_b64":"s/0EJb9ahIPgorYij4AI95O6K7t6xFIL83LUGh6aEoiNEVkaIpGQs36OtImujmJ8w4hpyS0l816IP0UM1V/vDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"75d20060f766fd493339c8facd729f32a349a5b3e3da2d0997377cef7ef0dc0e","last_reissued_at":"2026-06-23T01:12:37.733416Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T01:12:37.733416Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.21337","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:12:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/5DPPbI2QKxWAILkmVOjzFISi/yMPnmqH/FDPSSqYMeZmWreQ76u7UJobILiZh1WsQAWpU1uq+LkKSqp22soAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-23T12:55:09.943739Z"},"content_sha256":"89b7e667ba6ad4b8dfc5eb3e33664149808f48f6a6aca56260d484010247e538","schema_version":"1.0","event_id":"sha256:89b7e667ba6ad4b8dfc5eb3e33664149808f48f6a6aca56260d484010247e538"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:OXJAAYHXM36USMZZZD5M24U7GK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"DataClaw0: Agentic Tailoring Multimodal Data from Raw Streams","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Cong Wan, Jiangyang Li, Lin Peng, SongLin Dong, Xiangyang Luo, Yihong Gong, Zeyu Guo, Zhiheng Ma, Zijian Cai","submitted_at":"2026-06-19T11:31:43Z","abstract_excerpt":"Massive unstructured multimodal streams suffer from high \"data entropy,\" impeding both efficient human knowledge acquisition and high-quality AI post-training. Existing passive annotation paradigms, heavily reliant on heuristic rules or general VLMs, are costly, monotonous, and fail to unlock the deep procedural logic embedded in raw data. We elevate data processing to a learnable capability, proposing a paradigm shift towards Agentic Data Tailoring, which actively refining and structuring data to align with diverse user and downstream intents. To overcome the data scarcity bottleneck in train"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21337","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.21337/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T01:12:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6Zam2TxXRzg+foAmZFnsqZr72DONlxJWmi90lG/vjhOcJ+tdNdPCHe9DOgEPoocRU05nlsFLwEv8/a3N1szqCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-23T12:55:09.944115Z"},"content_sha256":"bb0ae36a82fb12e31e14d85fa5974913d4d07542aab67fc6c99db59fb054036c","schema_version":"1.0","event_id":"sha256:bb0ae36a82fb12e31e14d85fa5974913d4d07542aab67fc6c99db59fb054036c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OXJAAYHXM36USMZZZD5M24U7GK/bundle.json","state_url":"https://pith.science/pith/OXJAAYHXM36USMZZZD5M24U7GK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OXJAAYHXM36USMZZZD5M24U7GK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-23T12:55:09Z","links":{"resolver":"https://pith.science/pith/OXJAAYHXM36USMZZZD5M24U7GK","bundle":"https://pith.science/pith/OXJAAYHXM36USMZZZD5M24U7GK/bundle.json","state":"https://pith.science/pith/OXJAAYHXM36USMZZZD5M24U7GK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OXJAAYHXM36USMZZZD5M24U7GK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:OXJAAYHXM36USMZZZD5M24U7GK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8cae848b88a2be2ad9ec34061f4a5006049e1e809ac052c02340d64a09760b45","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-19T11:31:43Z","title_canon_sha256":"6d893f0ba412af066e761ebc0daec3e5f29b9255a51253a9e55cddea3f76a1c7"},"schema_version":"1.0","source":{"id":"2606.21337","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.21337","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"arxiv_version","alias_value":"2606.21337v1","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.21337","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"pith_short_12","alias_value":"OXJAAYHXM36U","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"pith_short_16","alias_value":"OXJAAYHXM36USMZZ","created_at":"2026-06-23T01:12:37Z"},{"alias_kind":"pith_short_8","alias_value":"OXJAAYHX","created_at":"2026-06-23T01:12:37Z"}],"graph_snapshots":[{"event_id":"sha256:bb0ae36a82fb12e31e14d85fa5974913d4d07542aab67fc6c99db59fb054036c","target":"graph","created_at":"2026-06-23T01:12:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.21337/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Massive unstructured multimodal streams suffer from high \"data entropy,\" impeding both efficient human knowledge acquisition and high-quality AI post-training. Existing passive annotation paradigms, heavily reliant on heuristic rules or general VLMs, are costly, monotonous, and fail to unlock the deep procedural logic embedded in raw data. We elevate data processing to a learnable capability, proposing a paradigm shift towards Agentic Data Tailoring, which actively refining and structuring data to align with diverse user and downstream intents. To overcome the data scarcity bottleneck in train","authors_text":"Cong Wan, Jiangyang Li, Lin Peng, SongLin Dong, Xiangyang Luo, Yihong Gong, Zeyu Guo, Zhiheng Ma, Zijian Cai","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-19T11:31:43Z","title":"DataClaw0: Agentic Tailoring Multimodal Data from Raw Streams"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.21337","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:89b7e667ba6ad4b8dfc5eb3e33664149808f48f6a6aca56260d484010247e538","target":"record","created_at":"2026-06-23T01:12:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8cae848b88a2be2ad9ec34061f4a5006049e1e809ac052c02340d64a09760b45","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-19T11:31:43Z","title_canon_sha256":"6d893f0ba412af066e761ebc0daec3e5f29b9255a51253a9e55cddea3f76a1c7"},"schema_version":"1.0","source":{"id":"2606.21337","kind":"arxiv","version":1}},"canonical_sha256":"75d20060f766fd493339c8facd729f32a349a5b3e3da2d0997377cef7ef0dc0e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"75d20060f766fd493339c8facd729f32a349a5b3e3da2d0997377cef7ef0dc0e","first_computed_at":"2026-06-23T01:12:37.733416Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T01:12:37.733416Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"s/0EJb9ahIPgorYij4AI95O6K7t6xFIL83LUGh6aEoiNEVkaIpGQs36OtImujmJ8w4hpyS0l816IP0UM1V/vDg==","signature_status":"signed_v1","signed_at":"2026-06-23T01:12:37.733997Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.21337","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:89b7e667ba6ad4b8dfc5eb3e33664149808f48f6a6aca56260d484010247e538","sha256:bb0ae36a82fb12e31e14d85fa5974913d4d07542aab67fc6c99db59fb054036c"],"state_sha256":"a6993f2831753703d15dc08fc5c67ad8c3689bdba4347a62087cf7cf97a8bde7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BT9kGTh/7sVkfbFpNmfpoDRMQvJV/ozO1sXJxhbWXxMOjKMYZbiNURvyKAL+I6g6xVRhdSXzdNACTYnVKRh8Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-23T12:55:09.946154Z","bundle_sha256":"8a161188f254ba35a02a1adf9c429c20480491829694c7e2767904ed877e5430"}}