{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PPA2S7JNYF2PZTOE6WGQ2QAMF2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"72ec33970f0f939c1415f1bb52a7a2e408b9d77f282deab1801b180016fe4363","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-06-05T16:21:27Z","title_canon_sha256":"33c3eb4446fd3b1372cb65f3bf83d7fa63c8264be0ebcfc3575ac5f46571bdc3"},"schema_version":"1.0","source":{"id":"2606.07723","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07723","created_at":"2026-06-09T01:04:50Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07723v1","created_at":"2026-06-09T01:04:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07723","created_at":"2026-06-09T01:04:50Z"},{"alias_kind":"pith_short_12","alias_value":"PPA2S7JNYF2P","created_at":"2026-06-09T01:04:50Z"},{"alias_kind":"pith_short_16","alias_value":"PPA2S7JNYF2PZTOE","created_at":"2026-06-09T01:04:50Z"},{"alias_kind":"pith_short_8","alias_value":"PPA2S7JN","created_at":"2026-06-09T01:04:50Z"}],"graph_snapshots":[{"event_id":"sha256:f75222e7d7dde9a397c78334352630606e9dff456b75587a403d3b8f27dc686f","target":"graph","created_at":"2026-06-09T01:04:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07723/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Open-vocabulary long-horizon manipulation requires robots to reason over flexible instructions and complex multi-object scenes while adaptively planning, executing, monitoring, and recovering from failures. We address these demands with a closed agent loop in which a VLM orchestrates heterogeneous robot capabilities as interruptible tools. Unlike in virtual AI agents, the timing of decisions, actions and tool calls is important in a physical world that does not pause for reasoning. We refer to this setting as Physical Orchestration, and propose VoLoAgent, a VLM that plans, monitors, and recove","authors_text":"Alex Zook, Chan Hee Song, Erwin Coumans, Faisal Ladhak, Hugo Hadfield, Jonathan Tremblay, Mikaela Angelina Uy, Qing Qu, Siyi Chen, Stan Birchfield, Valts Blukis, Xuning Yang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-06-05T16:21:27Z","title":"VoLo: A Physical Orchestrator for Open-Vocabulary Long-Horizon Manipulation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07723","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1d0e5f94f5330b906c5f429febc28290bbf5de923833b8395e2568337f146163","target":"record","created_at":"2026-06-09T01:04:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"72ec33970f0f939c1415f1bb52a7a2e408b9d77f282deab1801b180016fe4363","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-06-05T16:21:27Z","title_canon_sha256":"33c3eb4446fd3b1372cb65f3bf83d7fa63c8264be0ebcfc3575ac5f46571bdc3"},"schema_version":"1.0","source":{"id":"2606.07723","kind":"arxiv","version":1}},"canonical_sha256":"7bc1a97d2dc174fccdc4f58d0d400c2ea928df9d1b65ef288a4752e610220a91","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7bc1a97d2dc174fccdc4f58d0d400c2ea928df9d1b65ef288a4752e610220a91","first_computed_at":"2026-06-09T01:04:50.363240Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T01:04:50.363240Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/KyZRsHOtB/xRo85z84jXO49iU3F7hfuBXdzdwcRWk59h3DxdaIbkSsJRcyWjFNcN1uEBuXok7y7FcYtcLKwCw==","signature_status":"signed_v1","signed_at":"2026-06-09T01:04:50.363653Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07723","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1d0e5f94f5330b906c5f429febc28290bbf5de923833b8395e2568337f146163","sha256:f75222e7d7dde9a397c78334352630606e9dff456b75587a403d3b8f27dc686f"],"state_sha256":"f4ffe2cef811e5026e742c990009e3c3a45a2c6c6e2a30eb0beeda34dffe6d8e"}