{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AR46D7UDWX3D4BNCLN7VAN3H4E","short_pith_number":"pith:AR46D7UD","canonical_record":{"source":{"id":"2605.31387","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T14:51:49Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"d89fa0f45d77f73ab6e1698e24f10bd5adbae9ec8d90acd7238abbc9d00d3acf","abstract_canon_sha256":"0e9eb90c1c98393f94af8a2704685a8ab9436624eed83932e97d44bfb0cd0213"},"schema_version":"1.0"},"canonical_sha256":"0479e1fe83b5f63e05a25b7f503767e10bfa69fef087f6863b6f8fbdcdfd22e5","source":{"kind":"arxiv","id":"2605.31387","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.31387","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"arxiv_version","alias_value":"2605.31387v1","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.31387","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"pith_short_12","alias_value":"AR46D7UDWX3D","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"pith_short_16","alias_value":"AR46D7UDWX3D4BNC","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"pith_short_8","alias_value":"AR46D7UD","created_at":"2026-06-01T02:04:02Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AR46D7UDWX3D4BNCLN7VAN3H4E","target":"record","payload":{"canonical_record":{"source":{"id":"2605.31387","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T14:51:49Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"d89fa0f45d77f73ab6e1698e24f10bd5adbae9ec8d90acd7238abbc9d00d3acf","abstract_canon_sha256":"0e9eb90c1c98393f94af8a2704685a8ab9436624eed83932e97d44bfb0cd0213"},"schema_version":"1.0"},"canonical_sha256":"0479e1fe83b5f63e05a25b7f503767e10bfa69fef087f6863b6f8fbdcdfd22e5","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T02:04:02.452182Z","signature_b64":"Jrg0NtHu3oUpL5WeBfLQrq34I44Zpw3rA21RUscFUTCpaAFOINQ18lIghOXeoa/dOf749dkySB0QeO1KP9n+Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0479e1fe83b5f63e05a25b7f503767e10bfa69fef087f6863b6f8fbdcdfd22e5","last_reissued_at":"2026-06-01T02:04:02.451418Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T02:04:02.451418Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.31387","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T02:04:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GyulPCAM0fpxOeBK2Mcu/HFp92YeTqpWeK3/YWSLjxLAmXryf9eahbfrAB/Ymd4jk1JusscwHITIKJ7yyes7Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T15:38:31.111417Z"},"content_sha256":"b8928af6feff7434e16943c21b1dfa8f558a41dbe42d83bf7a8d495f78644af4","schema_version":"1.0","event_id":"sha256:b8928af6feff7434e16943c21b1dfa8f558a41dbe42d83bf7a8d495f78644af4"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AR46D7UDWX3D4BNCLN7VAN3H4E","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-Turn Multi-Agent Dialogue for Collaborative Reconstruction Improves VLM Performance on Spatial Reasoning, But Only Barely","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.RO"],"primary_cat":"cs.CL","authors_text":"Chalamalasetti Kranti, David Schlangen, Sherzod Hakimov","submitted_at":"2026-05-29T14:51:49Z","abstract_excerpt":"Robots operating in diverse environments rely on visual input to interpret objects and spatial layouts. In human-collaborative tasks, they are expected to communicate this understanding through language. Vision-language models (VLMs) support robotic tasks involving visual interpretation, question answering, and instruction following, but their capabilities in collaborative dialogue tasks requiring spatial reasoning remain underexplored. We study this gap through a collaborative structure-building task that combines visual interpretation, grounding, language-guided interaction, and action gener"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.31387","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.31387/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T02:04:02Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KBvtb056VXUMgAUfkbDQUsNII7q+El984dBl+mE74SR9xAvv4LmGh3FP/VN404kMT56f24xio1zIC4Xka69eDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T15:38:31.111792Z"},"content_sha256":"a42329db5993559aaa0fde0f7fafe5d3c6df64d1adec0c849a7da8b575112387","schema_version":"1.0","event_id":"sha256:a42329db5993559aaa0fde0f7fafe5d3c6df64d1adec0c849a7da8b575112387"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AR46D7UDWX3D4BNCLN7VAN3H4E/bundle.json","state_url":"https://pith.science/pith/AR46D7UDWX3D4BNCLN7VAN3H4E/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AR46D7UDWX3D4BNCLN7VAN3H4E/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T15:38:31Z","links":{"resolver":"https://pith.science/pith/AR46D7UDWX3D4BNCLN7VAN3H4E","bundle":"https://pith.science/pith/AR46D7UDWX3D4BNCLN7VAN3H4E/bundle.json","state":"https://pith.science/pith/AR46D7UDWX3D4BNCLN7VAN3H4E/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AR46D7UDWX3D4BNCLN7VAN3H4E/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AR46D7UDWX3D4BNCLN7VAN3H4E","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0e9eb90c1c98393f94af8a2704685a8ab9436624eed83932e97d44bfb0cd0213","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T14:51:49Z","title_canon_sha256":"d89fa0f45d77f73ab6e1698e24f10bd5adbae9ec8d90acd7238abbc9d00d3acf"},"schema_version":"1.0","source":{"id":"2605.31387","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.31387","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"arxiv_version","alias_value":"2605.31387v1","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.31387","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"pith_short_12","alias_value":"AR46D7UDWX3D","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"pith_short_16","alias_value":"AR46D7UDWX3D4BNC","created_at":"2026-06-01T02:04:02Z"},{"alias_kind":"pith_short_8","alias_value":"AR46D7UD","created_at":"2026-06-01T02:04:02Z"}],"graph_snapshots":[{"event_id":"sha256:a42329db5993559aaa0fde0f7fafe5d3c6df64d1adec0c849a7da8b575112387","target":"graph","created_at":"2026-06-01T02:04:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.31387/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Robots operating in diverse environments rely on visual input to interpret objects and spatial layouts. In human-collaborative tasks, they are expected to communicate this understanding through language. Vision-language models (VLMs) support robotic tasks involving visual interpretation, question answering, and instruction following, but their capabilities in collaborative dialogue tasks requiring spatial reasoning remain underexplored. We study this gap through a collaborative structure-building task that combines visual interpretation, grounding, language-guided interaction, and action gener","authors_text":"Chalamalasetti Kranti, David Schlangen, Sherzod Hakimov","cross_cats":["cs.RO"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T14:51:49Z","title":"Multi-Turn Multi-Agent Dialogue for Collaborative Reconstruction Improves VLM Performance on Spatial Reasoning, But Only Barely"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.31387","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b8928af6feff7434e16943c21b1dfa8f558a41dbe42d83bf7a8d495f78644af4","target":"record","created_at":"2026-06-01T02:04:02Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0e9eb90c1c98393f94af8a2704685a8ab9436624eed83932e97d44bfb0cd0213","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-29T14:51:49Z","title_canon_sha256":"d89fa0f45d77f73ab6e1698e24f10bd5adbae9ec8d90acd7238abbc9d00d3acf"},"schema_version":"1.0","source":{"id":"2605.31387","kind":"arxiv","version":1}},"canonical_sha256":"0479e1fe83b5f63e05a25b7f503767e10bfa69fef087f6863b6f8fbdcdfd22e5","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0479e1fe83b5f63e05a25b7f503767e10bfa69fef087f6863b6f8fbdcdfd22e5","first_computed_at":"2026-06-01T02:04:02.451418Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T02:04:02.451418Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Jrg0NtHu3oUpL5WeBfLQrq34I44Zpw3rA21RUscFUTCpaAFOINQ18lIghOXeoa/dOf749dkySB0QeO1KP9n+Cw==","signature_status":"signed_v1","signed_at":"2026-06-01T02:04:02.452182Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.31387","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b8928af6feff7434e16943c21b1dfa8f558a41dbe42d83bf7a8d495f78644af4","sha256:a42329db5993559aaa0fde0f7fafe5d3c6df64d1adec0c849a7da8b575112387"],"state_sha256":"05bf7b9f7705ca575d3e48b62287d62758755f14e6f7fdf504c809719f87196a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bO8sj/RYbdo5T9NS2pT46TB0RiBkLKFVkxCsh+JsczNHmehLjJGlY+AB80wHhdr4ZhBrwVU+/ZqzvykmZU7VAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T15:38:31.113741Z","bundle_sha256":"4f1bec4d7fcbbe6d199268683bdb45388ddfbbab3bc077ee8c9183e923d8ecac"}}