{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:WSECIQZZOFJAZQFEBJ7UKWG462","short_pith_number":"pith:WSECIQZZ","canonical_record":{"source":{"id":"2605.19319","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-19T03:54:46Z","cross_cats_sorted":[],"title_canon_sha256":"d6a3322a9c24b80d245202d38a15167821951a4ab9ca32c69c546bc122a27628","abstract_canon_sha256":"fc24e2c75cd75af8f84142ffe553592af9fe0c60ef37baf188550e38bbdb59b7"},"schema_version":"1.0"},"canonical_sha256":"b48824433971520cc0a40a7f4558dcf6822e77dc7f7dc8d23f1ace092980c2f6","source":{"kind":"arxiv","id":"2605.19319","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19319","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19319v1","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19319","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"pith_short_12","alias_value":"WSECIQZZOFJA","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"pith_short_16","alias_value":"WSECIQZZOFJAZQFE","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"pith_short_8","alias_value":"WSECIQZZ","created_at":"2026-05-20T01:05:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:WSECIQZZOFJAZQFEBJ7UKWG462","target":"record","payload":{"canonical_record":{"source":{"id":"2605.19319","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-19T03:54:46Z","cross_cats_sorted":[],"title_canon_sha256":"d6a3322a9c24b80d245202d38a15167821951a4ab9ca32c69c546bc122a27628","abstract_canon_sha256":"fc24e2c75cd75af8f84142ffe553592af9fe0c60ef37baf188550e38bbdb59b7"},"schema_version":"1.0"},"canonical_sha256":"b48824433971520cc0a40a7f4558dcf6822e77dc7f7dc8d23f1ace092980c2f6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:39.092924Z","signature_b64":"iGf6VgHNk/CoMEHehyjNtYM0sSTYTSQ/RSB3THWYjTQR58HTAPQKPb88Q1t+LS9IKZI4bukLeFjrjtuwlfhRAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b48824433971520cc0a40a7f4558dcf6822e77dc7f7dc8d23f1ace092980c2f6","last_reissued_at":"2026-05-20T01:05:39.092249Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:39.092249Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.19319","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xjnQ/ryRgiMpV9sXMKcYetrsOZtJjHMBfKIXJ/gvFEg83jBYXyNMKiC3kgebyInHR2TUVFSKFMYwrTW1PoDXBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T04:29:01.002548Z"},"content_sha256":"f5b2cd2f39a33460581be12e4cf069f0658f9960de39a86d2ab1dc92dd4c79b2","schema_version":"1.0","event_id":"sha256:f5b2cd2f39a33460581be12e4cf069f0658f9960de39a86d2ab1dc92dd4c79b2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:WSECIQZZOFJAZQFEBJ7UKWG462","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SWEET: Sparse World Modeling with Image Editing for Embodied Task Execution","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Mike Zheng Shou, Xiyao Deng, Yihan Wang, Yiren Song, Zhuoran Yan","submitted_at":"2026-05-19T03:54:46Z","abstract_excerpt":"Visual prediction has emerged as a promising paradigm for embodied control, where future observations are generated and then translated into actions. However, dense video generation is computationally expensive and often unnecessary for many manipulation tasks, whose progress can be summarized by a small number of task-relevant visual states. In this work, we study whether image editing models can serve as sparse visual world models for robot manipulation by predicting task-level future states without dense video rollout. We first conduct a controlled comparison between the video generation mo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19319","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19319/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T01:05:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"j8j6bE3XEqdkehyeUzvLgTFo5g/7PAj6H5TSLXRuouu2WM2LTuqDZWzNDK1z4WA4NlME6sdu7cd1IyxCb73lDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T04:29:01.002933Z"},"content_sha256":"4f88a2533c56ece283083c2e1d51a4d2e7869b16a86b852d8b7db059cc1526b5","schema_version":"1.0","event_id":"sha256:4f88a2533c56ece283083c2e1d51a4d2e7869b16a86b852d8b7db059cc1526b5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WSECIQZZOFJAZQFEBJ7UKWG462/bundle.json","state_url":"https://pith.science/pith/WSECIQZZOFJAZQFEBJ7UKWG462/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WSECIQZZOFJAZQFEBJ7UKWG462/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T04:29:01Z","links":{"resolver":"https://pith.science/pith/WSECIQZZOFJAZQFEBJ7UKWG462","bundle":"https://pith.science/pith/WSECIQZZOFJAZQFEBJ7UKWG462/bundle.json","state":"https://pith.science/pith/WSECIQZZOFJAZQFEBJ7UKWG462/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WSECIQZZOFJAZQFEBJ7UKWG462/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:WSECIQZZOFJAZQFEBJ7UKWG462","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fc24e2c75cd75af8f84142ffe553592af9fe0c60ef37baf188550e38bbdb59b7","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-19T03:54:46Z","title_canon_sha256":"d6a3322a9c24b80d245202d38a15167821951a4ab9ca32c69c546bc122a27628"},"schema_version":"1.0","source":{"id":"2605.19319","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.19319","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"arxiv_version","alias_value":"2605.19319v1","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19319","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"pith_short_12","alias_value":"WSECIQZZOFJA","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"pith_short_16","alias_value":"WSECIQZZOFJAZQFE","created_at":"2026-05-20T01:05:39Z"},{"alias_kind":"pith_short_8","alias_value":"WSECIQZZ","created_at":"2026-05-20T01:05:39Z"}],"graph_snapshots":[{"event_id":"sha256:4f88a2533c56ece283083c2e1d51a4d2e7869b16a86b852d8b7db059cc1526b5","target":"graph","created_at":"2026-05-20T01:05:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.19319/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Visual prediction has emerged as a promising paradigm for embodied control, where future observations are generated and then translated into actions. However, dense video generation is computationally expensive and often unnecessary for many manipulation tasks, whose progress can be summarized by a small number of task-relevant visual states. In this work, we study whether image editing models can serve as sparse visual world models for robot manipulation by predicting task-level future states without dense video rollout. We first conduct a controlled comparison between the video generation mo","authors_text":"Mike Zheng Shou, Xiyao Deng, Yihan Wang, Yiren Song, Zhuoran Yan","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-19T03:54:46Z","title":"SWEET: Sparse World Modeling with Image Editing for Embodied Task Execution"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19319","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f5b2cd2f39a33460581be12e4cf069f0658f9960de39a86d2ab1dc92dd4c79b2","target":"record","created_at":"2026-05-20T01:05:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fc24e2c75cd75af8f84142ffe553592af9fe0c60ef37baf188550e38bbdb59b7","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-19T03:54:46Z","title_canon_sha256":"d6a3322a9c24b80d245202d38a15167821951a4ab9ca32c69c546bc122a27628"},"schema_version":"1.0","source":{"id":"2605.19319","kind":"arxiv","version":1}},"canonical_sha256":"b48824433971520cc0a40a7f4558dcf6822e77dc7f7dc8d23f1ace092980c2f6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b48824433971520cc0a40a7f4558dcf6822e77dc7f7dc8d23f1ace092980c2f6","first_computed_at":"2026-05-20T01:05:39.092249Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T01:05:39.092249Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"iGf6VgHNk/CoMEHehyjNtYM0sSTYTSQ/RSB3THWYjTQR58HTAPQKPb88Q1t+LS9IKZI4bukLeFjrjtuwlfhRAQ==","signature_status":"signed_v1","signed_at":"2026-05-20T01:05:39.092924Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.19319","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f5b2cd2f39a33460581be12e4cf069f0658f9960de39a86d2ab1dc92dd4c79b2","sha256:4f88a2533c56ece283083c2e1d51a4d2e7869b16a86b852d8b7db059cc1526b5"],"state_sha256":"fd60b9a8d01036dd3a12142566a1b087d47e2a3df866843b4a55f73a1efac600"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7iN2IbC0jTbDAz/JXDMd2j2wJIbpqImGB8S0Ph2KtpRHL8juXCjCLA3Z9oo4JtT7U8qAS6P4lgkLr9jieCYiCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T04:29:01.005216Z","bundle_sha256":"aa04a4197908d5d661a196838e4e77bae8bb3ae1ee3a2e3762eea73f0ec205ca"}}