{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:2IESB3WMK23AGYDUTI5ZDPZ3KL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b092c91f41b8a1245cb75a7303e0f53df7a1eef21d91747577ed6fb1ad5f5e7b","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-31T16:32:12Z","title_canon_sha256":"5bcf56f378723d61bb225af5b28e2dbc3d541a7c822da2a40b730ecf227f3db0"},"schema_version":"1.0","source":{"id":"2510.27607","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.27607","created_at":"2026-05-29T01:04:57Z"},{"alias_kind":"arxiv_version","alias_value":"2510.27607v3","created_at":"2026-05-29T01:04:57Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.27607","created_at":"2026-05-29T01:04:57Z"},{"alias_kind":"pith_short_12","alias_value":"2IESB3WMK23A","created_at":"2026-05-29T01:04:57Z"},{"alias_kind":"pith_short_16","alias_value":"2IESB3WMK23AGYDU","created_at":"2026-05-29T01:04:57Z"},{"alias_kind":"pith_short_8","alias_value":"2IESB3WM","created_at":"2026-05-29T01:04:57Z"}],"graph_snapshots":[{"event_id":"sha256:f7277c8ffaa49d7d7dc5984ae708bc3b40167a13ba6620b38a6b0e1d82d2dc69","target":"graph","created_at":"2026-05-29T01:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.27607/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Augmenting vision-language-action models (VLAs) with world models is promising for robotic policy learning but faces challenges in jointly predicting states and actions due to the modality gap. To address this, we propose DUal-STream diffusion (DUST), a world-model augmented VLA framework featuring a multimodal diffusion transformer that maintains separate modality streams while enabling cross-modal knowledge sharing. In addition, DUST utilizes independent noise perturbations and a decoupled flow matching loss to learn cross-modal causal relationships. We further introduce an asynchronous samp","authors_text":"Dongyoung Kim, Huiwon Jang, Jinwoo Shin, John Won, Kyungmin Lee","cross_cats":["cs.RO"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-31T16:32:12Z","title":"Dual-Stream Diffusion for World-Model Augmented Vision-Language-Action Model"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.27607","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f52b88cad84d8981e3d59769037605beab5a6ccbabecfc51321a1c72820c3cd2","target":"record","created_at":"2026-05-29T01:04:57Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b092c91f41b8a1245cb75a7303e0f53df7a1eef21d91747577ed6fb1ad5f5e7b","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-10-31T16:32:12Z","title_canon_sha256":"5bcf56f378723d61bb225af5b28e2dbc3d541a7c822da2a40b730ecf227f3db0"},"schema_version":"1.0","source":{"id":"2510.27607","kind":"arxiv","version":3}},"canonical_sha256":"d20920eecc56b60360749a3b91bf3b52f057455c9e45fb5659c1d02b25ff9a62","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d20920eecc56b60360749a3b91bf3b52f057455c9e45fb5659c1d02b25ff9a62","first_computed_at":"2026-05-29T01:04:57.684005Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:04:57.684005Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"RFgE1S5gF9mHtKMdJDAWPa1cDhh1iKhEH4EqgnmsAMysKvhM5rz+41xwh3JUVXIqJDrBs8QjGKZCrhG3IGB+CQ==","signature_status":"signed_v1","signed_at":"2026-05-29T01:04:57.684526Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.27607","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f52b88cad84d8981e3d59769037605beab5a6ccbabecfc51321a1c72820c3cd2","sha256:f7277c8ffaa49d7d7dc5984ae708bc3b40167a13ba6620b38a6b0e1d82d2dc69"],"state_sha256":"38017a3804ecde8d0d711797917251c4a009900e418e5603248ad07c9a99b91d"}