{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:YIPZDVQAKZX22K32RLVJLHZUJW","short_pith_number":"pith:YIPZDVQA","schema_version":"1.0","canonical_sha256":"c21f91d600566fad2b7a8aea959f344dacf4d141a5d1c70e6db3c057af713221","source":{"kind":"arxiv","id":"2512.06628","version":3},"attestation_state":"computed","paper":{"title":"MIND-V: Hierarchical World Model for Long-Horizon Robotic Manipulation with RL-based Physical Alignment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.RO","authors_text":"Haocheng Luo, Jun Zhou, Mingyang Zhang, Puxin Yan, Ruicheng Zhang, Xiaofan Liu, Xiu Li, Zhizhou Zhong, Zunnan Xu","submitted_at":"2025-12-07T02:28:06Z","abstract_excerpt":"Scalable embodied intelligence is constrained by the scarcity of diverse, long-horizon robotic manipulation data. Existing video world models in this domain are limited to synthesizing short clips of simple actions and often rely on manually defined trajectories. To this end, we introduce MIND-V, a cognitive hierarchical world model designed to synthesize physically plausible and logically coherent videos of long-horizon robotic manipulation. Inspired by cognitive science, MIND-V bridges high-level reasoning with pixel-level synthesis through three core components: a Semantic Reasoning Hub (SR"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.06628","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2025-12-07T02:28:06Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"544eb682f9cb63e1896302cb968e5c372b6ec42ae02b3aa5820093f0c2a57ecf","abstract_canon_sha256":"bc5df2e5fb3a2f2baa448bdfb9d99e243c7304e4336ec1faa3fbdca6ad146f14"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:09:49.235279Z","signature_b64":"WxO+mqlz9irRw+GmP1f3ggU7fKqKbkXvYhHZ9xvMYH4MYBw2CxWaDLLZhjNZKSvQO/btvQO9IJNh44/+4lYZCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c21f91d600566fad2b7a8aea959f344dacf4d141a5d1c70e6db3c057af713221","last_reissued_at":"2026-06-10T01:09:49.234259Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:09:49.234259Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MIND-V: Hierarchical World Model for Long-Horizon Robotic Manipulation with RL-based Physical Alignment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.RO","authors_text":"Haocheng Luo, Jun Zhou, Mingyang Zhang, Puxin Yan, Ruicheng Zhang, Xiaofan Liu, Xiu Li, Zhizhou Zhong, Zunnan Xu","submitted_at":"2025-12-07T02:28:06Z","abstract_excerpt":"Scalable embodied intelligence is constrained by the scarcity of diverse, long-horizon robotic manipulation data. Existing video world models in this domain are limited to synthesizing short clips of simple actions and often rely on manually defined trajectories. To this end, we introduce MIND-V, a cognitive hierarchical world model designed to synthesize physically plausible and logically coherent videos of long-horizon robotic manipulation. Inspired by cognitive science, MIND-V bridges high-level reasoning with pixel-level synthesis through three core components: a Semantic Reasoning Hub (SR"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.06628","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.06628/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.06628","created_at":"2026-06-10T01:09:49.234418+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.06628v3","created_at":"2026-06-10T01:09:49.234418+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.06628","created_at":"2026-06-10T01:09:49.234418+00:00"},{"alias_kind":"pith_short_12","alias_value":"YIPZDVQAKZX2","created_at":"2026-06-10T01:09:49.234418+00:00"},{"alias_kind":"pith_short_16","alias_value":"YIPZDVQAKZX22K32","created_at":"2026-06-10T01:09:49.234418+00:00"},{"alias_kind":"pith_short_8","alias_value":"YIPZDVQA","created_at":"2026-06-10T01:09:49.234418+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2602.11146","citing_title":"Beyond VLM-Based Rewards: Diffusion-Native Latent Reward Modeling","ref_index":54,"is_internal_anchor":true},{"citing_arxiv_id":"2603.12639","citing_title":"RoboStereo: Dual-Tower 4D Embodied World Models for Unified Policy Optimization","ref_index":48,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14278","citing_title":"KVPO: ODE-Native GRPO for Autoregressive Video Alignment via KV Semantic Exploration","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13775","citing_title":"RoboEvolve: Co-Evolving Planner-Simulator for Robotic Manipulation with Limited Data","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07514","citing_title":"Is the Future Compatible? Diagnosing Dynamic Consistency in World Action Models","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2604.05672","citing_title":"A1: A Fully Transparent Open-Source, Adaptive and Efficient Truncated Vision-Language-Action Model","ref_index":56,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW","json":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW.json","graph_json":"https://pith.science/api/pith-number/YIPZDVQAKZX22K32RLVJLHZUJW/graph.json","events_json":"https://pith.science/api/pith-number/YIPZDVQAKZX22K32RLVJLHZUJW/events.json","paper":"https://pith.science/paper/YIPZDVQA"},"agent_actions":{"view_html":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW","download_json":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW.json","view_paper":"https://pith.science/paper/YIPZDVQA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.06628&json=true","fetch_graph":"https://pith.science/api/pith-number/YIPZDVQAKZX22K32RLVJLHZUJW/graph.json","fetch_events":"https://pith.science/api/pith-number/YIPZDVQAKZX22K32RLVJLHZUJW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW/action/storage_attestation","attest_author":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW/action/author_attestation","sign_citation":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW/action/citation_signature","submit_replication":"https://pith.science/pith/YIPZDVQAKZX22K32RLVJLHZUJW/action/replication_record"}},"created_at":"2026-06-10T01:09:49.234418+00:00","updated_at":"2026-06-10T01:09:49.234418+00:00"}