{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:TQVTQAHA4TZTBIG3AZNVKM2QFP","short_pith_number":"pith:TQVTQAHA","canonical_record":{"source":{"id":"2605.12334","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T16:16:15Z","cross_cats_sorted":[],"title_canon_sha256":"8cf227e754c8f31afc33536280ebbe3a5c85859213b19fcd75c2979ea7e69660","abstract_canon_sha256":"3598c8cdc6505cd98e33bc9e9469709671951f6be6b91282e5376e315a0c1aff"},"schema_version":"1.0"},"canonical_sha256":"9c2b3800e0e4f330a0db065b5533502be6a279846a5c6663d0e34e9018acc6d5","source":{"kind":"arxiv","id":"2605.12334","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12334","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12334v2","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12334","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"pith_short_12","alias_value":"TQVTQAHA4TZT","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"pith_short_16","alias_value":"TQVTQAHA4TZTBIG3","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"pith_short_8","alias_value":"TQVTQAHA","created_at":"2026-05-21T01:05:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:TQVTQAHA4TZTBIG3AZNVKM2QFP","target":"record","payload":{"canonical_record":{"source":{"id":"2605.12334","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T16:16:15Z","cross_cats_sorted":[],"title_canon_sha256":"8cf227e754c8f31afc33536280ebbe3a5c85859213b19fcd75c2979ea7e69660","abstract_canon_sha256":"3598c8cdc6505cd98e33bc9e9469709671951f6be6b91282e5376e315a0c1aff"},"schema_version":"1.0"},"canonical_sha256":"9c2b3800e0e4f330a0db065b5533502be6a279846a5c6663d0e34e9018acc6d5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:21.144074Z","signature_b64":"6xzKREXfVZLmks9pHQZC2ae9Vq4veyM30Y50rDZR5iz8HGsTS2I1cI9rkAUoqDB+zXvN0fTaiHqQDSNEjfqeAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9c2b3800e0e4f330a0db065b5533502be6a279846a5c6663d0e34e9018acc6d5","last_reissued_at":"2026-05-21T01:05:21.143258Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:21.143258Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.12334","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yKjrV3JKE915bNz7Ufj/SOsd51T5UxF34JDW6kQi3yR7FkaN8Gchy5XtGVjsoXUbfoKYKmCJGuFp+Cmat5EXBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T07:33:33.113350Z"},"content_sha256":"d2ff0291368da58823e8ca1b702b32623aae84e5909f6a0efc4d3554c78ad9a7","schema_version":"1.0","event_id":"sha256:d2ff0291368da58823e8ca1b702b32623aae84e5909f6a0efc4d3554c78ad9a7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:TQVTQAHA4TZTBIG3AZNVKM2QFP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcing VLAs in Task-Agnostic World Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A task-agnostic world model pre-trained on diverse behaviors combined with an off-the-shelf VLM allows VLAs to be fine-tuned for new tasks entirely through zero-shot imagined rollouts.","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Fengming Zhang, Junjie Lu, Kaixin Wang, Li Zhao, Rui Yu, Tianxiang Zhang, Xinyao Qin, Yucen Wang","submitted_at":"2026-05-12T16:16:15Z","abstract_excerpt":"Post-training Vision-Language-Action (VLA) models via reinforcement learning (RL) in learned world models has emerged as an effective strategy to adapt to new tasks without costly real-world interactions. However, while using imagined trajectories reduces the sample complexity of policy training, existing methods still heavily rely on task-specific data to fine-tune both the world and reward models, fundamentally limiting their scalability to unseen tasks. To overcome this, we argue that world and reward models should capture transferable physical priors that enable zero-shot inference. We pro"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Because both components are task-agnostic, VLAs can be readily finetuned for any new task entirely within this zero-shot imagination. ... proving that generalized physical priors can effectively substitute for costly task-dependent data.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That a world model pre-trained solely on diverse task-free behaviors will capture sufficiently accurate and transferable physical priors to support reliable zero-shot inference and reward generation via an off-the-shelf VLM on unseen tasks.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"RAW-Dream lets VLAs learn new tasks in zero-shot imagination by using a world model pre-trained only on task-free behaviors and an unmodified VLM to supply rewards, with dual-noise verification to limit hallucinations.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A task-agnostic world model pre-trained on diverse behaviors combined with an off-the-shelf VLM allows VLAs to be fine-tuned for new tasks entirely through zero-shot imagined rollouts.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"1f2869e9e651e8d40e5257f769f57a547016c6558c8861648d8dee41bb5374e8"},"source":{"id":"2605.12334","kind":"arxiv","version":2},"verdict":{"id":"36876279-3927-4f11-8100-deb4fff03819","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-13T04:10:21.078292Z","strongest_claim":"Because both components are task-agnostic, VLAs can be readily finetuned for any new task entirely within this zero-shot imagination. ... proving that generalized physical priors can effectively substitute for costly task-dependent data.","one_line_summary":"RAW-Dream lets VLAs learn new tasks in zero-shot imagination by using a world model pre-trained only on task-free behaviors and an unmodified VLM to supply rewards, with dual-noise verification to limit hallucinations.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That a world model pre-trained solely on diverse task-free behaviors will capture sufficiently accurate and transferable physical priors to support reliable zero-shot inference and reward generation via an off-the-shelf VLM on unseen tasks.","pith_extraction_headline":"A task-agnostic world model pre-trained on diverse behaviors combined with an off-the-shelf VLM allows VLAs to be fine-tuned for new tasks entirely through zero-shot imagined rollouts."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.12334/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-20T13:31:25.325359Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-20T09:56:59.027026Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T22:41:58.265480Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T10:38:25.080527Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"159ff3a3abf46b077703f69159d370047520d84968cbc7efdbf1cd492e08696c"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"61839cd2b319630fbace09c6a46edc14f42369b270563149d5239436f042060a"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"36876279-3927-4f11-8100-deb4fff03819"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5hz+dqn4u0xGyLVzhAaMA8iro8Rx+FX8xc8prDQM2tihnELJrFWE30iGm3DcxcVpTfCBCaes1GughTZXF+xEDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T07:33:33.114674Z"},"content_sha256":"4174614749bcdfe4d4d5c24fdece6f52daae3c94307b69fd1010b4b3ac720c51","schema_version":"1.0","event_id":"sha256:4174614749bcdfe4d4d5c24fdece6f52daae3c94307b69fd1010b4b3ac720c51"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP/bundle.json","state_url":"https://pith.science/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T07:33:33Z","links":{"resolver":"https://pith.science/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP","bundle":"https://pith.science/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP/bundle.json","state":"https://pith.science/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/TQVTQAHA4TZTBIG3AZNVKM2QFP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TQVTQAHA4TZTBIG3AZNVKM2QFP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3598c8cdc6505cd98e33bc9e9469709671951f6be6b91282e5376e315a0c1aff","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T16:16:15Z","title_canon_sha256":"8cf227e754c8f31afc33536280ebbe3a5c85859213b19fcd75c2979ea7e69660"},"schema_version":"1.0","source":{"id":"2605.12334","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12334","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12334v2","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12334","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"pith_short_12","alias_value":"TQVTQAHA4TZT","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"pith_short_16","alias_value":"TQVTQAHA4TZTBIG3","created_at":"2026-05-21T01:05:21Z"},{"alias_kind":"pith_short_8","alias_value":"TQVTQAHA","created_at":"2026-05-21T01:05:21Z"}],"graph_snapshots":[{"event_id":"sha256:4174614749bcdfe4d4d5c24fdece6f52daae3c94307b69fd1010b4b3ac720c51","target":"graph","created_at":"2026-05-21T01:05:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Because both components are task-agnostic, VLAs can be readily finetuned for any new task entirely within this zero-shot imagination. ... proving that generalized physical priors can effectively substitute for costly task-dependent data."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That a world model pre-trained solely on diverse task-free behaviors will capture sufficiently accurate and transferable physical priors to support reliable zero-shot inference and reward generation via an off-the-shelf VLM on unseen tasks."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"RAW-Dream lets VLAs learn new tasks in zero-shot imagination by using a world model pre-trained only on task-free behaviors and an unmodified VLM to supply rewards, with dual-noise verification to limit hallucinations."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A task-agnostic world model pre-trained on diverse behaviors combined with an off-the-shelf VLM allows VLAs to be fine-tuned for new tasks entirely through zero-shot imagined rollouts."}],"snapshot_sha256":"1f2869e9e651e8d40e5257f769f57a547016c6558c8861648d8dee41bb5374e8"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"61839cd2b319630fbace09c6a46edc14f42369b270563149d5239436f042060a"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-20T13:31:25.325359Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-20T09:56:59.027026Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T22:41:58.265480Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T10:38:25.080527Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.12334/integrity.json","findings":[],"snapshot_sha256":"159ff3a3abf46b077703f69159d370047520d84968cbc7efdbf1cd492e08696c","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Post-training Vision-Language-Action (VLA) models via reinforcement learning (RL) in learned world models has emerged as an effective strategy to adapt to new tasks without costly real-world interactions. However, while using imagined trajectories reduces the sample complexity of policy training, existing methods still heavily rely on task-specific data to fine-tune both the world and reward models, fundamentally limiting their scalability to unseen tasks. To overcome this, we argue that world and reward models should capture transferable physical priors that enable zero-shot inference. We pro","authors_text":"Fengming Zhang, Junjie Lu, Kaixin Wang, Li Zhao, Rui Yu, Tianxiang Zhang, Xinyao Qin, Yucen Wang","cross_cats":[],"headline":"A task-agnostic world model pre-trained on diverse behaviors combined with an off-the-shelf VLM allows VLAs to be fine-tuned for new tasks entirely through zero-shot imagined rollouts.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T16:16:15Z","title":"Reinforcing VLAs in Task-Agnostic World Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.12334","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-13T04:10:21.078292Z","id":"36876279-3927-4f11-8100-deb4fff03819","model_set":{"reader":"grok-4.3"},"one_line_summary":"RAW-Dream lets VLAs learn new tasks in zero-shot imagination by using a world model pre-trained only on task-free behaviors and an unmodified VLM to supply rewards, with dual-noise verification to limit hallucinations.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A task-agnostic world model pre-trained on diverse behaviors combined with an off-the-shelf VLM allows VLAs to be fine-tuned for new tasks entirely through zero-shot imagined rollouts.","strongest_claim":"Because both components are task-agnostic, VLAs can be readily finetuned for any new task entirely within this zero-shot imagination. ... proving that generalized physical priors can effectively substitute for costly task-dependent data.","weakest_assumption":"That a world model pre-trained solely on diverse task-free behaviors will capture sufficiently accurate and transferable physical priors to support reliable zero-shot inference and reward generation via an off-the-shelf VLM on unseen tasks."}},"verdict_id":"36876279-3927-4f11-8100-deb4fff03819"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d2ff0291368da58823e8ca1b702b32623aae84e5909f6a0efc4d3554c78ad9a7","target":"record","created_at":"2026-05-21T01:05:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3598c8cdc6505cd98e33bc9e9469709671951f6be6b91282e5376e315a0c1aff","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T16:16:15Z","title_canon_sha256":"8cf227e754c8f31afc33536280ebbe3a5c85859213b19fcd75c2979ea7e69660"},"schema_version":"1.0","source":{"id":"2605.12334","kind":"arxiv","version":2}},"canonical_sha256":"9c2b3800e0e4f330a0db065b5533502be6a279846a5c6663d0e34e9018acc6d5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9c2b3800e0e4f330a0db065b5533502be6a279846a5c6663d0e34e9018acc6d5","first_computed_at":"2026-05-21T01:05:21.143258Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:05:21.143258Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"6xzKREXfVZLmks9pHQZC2ae9Vq4veyM30Y50rDZR5iz8HGsTS2I1cI9rkAUoqDB+zXvN0fTaiHqQDSNEjfqeAQ==","signature_status":"signed_v1","signed_at":"2026-05-21T01:05:21.144074Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12334","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d2ff0291368da58823e8ca1b702b32623aae84e5909f6a0efc4d3554c78ad9a7","sha256:4174614749bcdfe4d4d5c24fdece6f52daae3c94307b69fd1010b4b3ac720c51"],"state_sha256":"52ad9bd423125d03e009ac3cb335900a83587ec933b1818ec4d42ae7b28fdbe9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fzZyX5BgaT7THJzsf33YrJlBCgt8h16pzoSGGN8frvD1WrRwQh+BZw2k0srbw6t9WVoYWeXEOC68XH2kIWh2Cg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T07:33:33.120250Z","bundle_sha256":"3b2849c6796b59e9cf88ec508c0f0f4decef074b8aecd6c984ec2e384b315bf1"}}