{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:XTU5PDCIB75B64GPS7SNYRCLL7","short_pith_number":"pith:XTU5PDCI","canonical_record":{"source":{"id":"2606.06076","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-04T12:13:24Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"17be7f2f48e2d182252385444a93dd088dd7f0d26e1b848f2ac2b17632ea32cd","abstract_canon_sha256":"2c6c25972469ec62838cbf75f922e434b714d1491d086b1dbf9337e10edbcb21"},"schema_version":"1.0"},"canonical_sha256":"bce9d78c480ffa1f70cf97e4dc444b5ffe07bff42c9bd288b9a46063f460d84a","source":{"kind":"arxiv","id":"2606.06076","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.06076","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"arxiv_version","alias_value":"2606.06076v1","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06076","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"pith_short_12","alias_value":"XTU5PDCIB75B","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"pith_short_16","alias_value":"XTU5PDCIB75B64GP","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"pith_short_8","alias_value":"XTU5PDCI","created_at":"2026-06-05T01:15:32Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:XTU5PDCIB75B64GPS7SNYRCLL7","target":"record","payload":{"canonical_record":{"source":{"id":"2606.06076","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-04T12:13:24Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"17be7f2f48e2d182252385444a93dd088dd7f0d26e1b848f2ac2b17632ea32cd","abstract_canon_sha256":"2c6c25972469ec62838cbf75f922e434b714d1491d086b1dbf9337e10edbcb21"},"schema_version":"1.0"},"canonical_sha256":"bce9d78c480ffa1f70cf97e4dc444b5ffe07bff42c9bd288b9a46063f460d84a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:32.251163Z","signature_b64":"IlRyJDnoDqOAxIEFVHQEe3rleslGxv592tFLcqLXJ+Sf/0YJkVsrX/4OI6Pt6O8mOx3BnFRJefUkjaVzK4u1DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bce9d78c480ffa1f70cf97e4dc444b5ffe07bff42c9bd288b9a46063f460d84a","last_reissued_at":"2026-06-05T01:15:32.250751Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:32.250751Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.06076","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:15:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HpLAwXJHuCCChVJqVgZdWKaFE8ypeql6QCf2ag3TTf1Xon6fMUV0hkRbyATL7uKRKGmdWCQOLHPQk5k8ArB8AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T08:33:22.534564Z"},"content_sha256":"209eeecd8231478d74de70b7e7067b2237ccdc32509cf2649733c724a64737a7","schema_version":"1.0","event_id":"sha256:209eeecd8231478d74de70b7e7067b2237ccdc32509cf2649733c724a64737a7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:XTU5PDCIB75B64GPS7SNYRCLL7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Visual Spatial Planning from Symbolic State via Modality-Gap-Aware Self-Distillation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.AI","authors_text":"Haocheng Luo, Jiahui Liu, Jiaqi Huang, Jun Zhou, Quan Shi, Ruicheng Zhang, Xiu Li, Zhizhou Zhong, Zunnan Xu","submitted_at":"2026-06-04T12:13:24Z","abstract_excerpt":"While vision-language models excel at general multimodal understanding, they still struggle with visual spatial planning. We attribute this to a perception-reasoning modality gap: visual planning requires models to infer latent state structures from pixels and then reason over the recovered structure to produce valid actions, whereas symbolic planning directly leverages explicit objects and constraints. This creates dual bottlenecks in visual state recovery and multi-step planning. To address this, we propose MGSD, a two-stage modality-gap-aware self-distillation framework. First, a cold-start"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06076","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.06076/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-05T01:15:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8S84WapiJzTF3iOBZ1ZGKowrn2OJ5sFiasuf2yhzU43DuvfY5C0FkJRX7yvtMhw5FBT/twEoXFJaH1DdFuAwAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-09T08:33:22.535385Z"},"content_sha256":"89359a5ed1ff62575a0abdd7a892ca2b28480bb909887386ccf39879b1ca871e","schema_version":"1.0","event_id":"sha256:89359a5ed1ff62575a0abdd7a892ca2b28480bb909887386ccf39879b1ca871e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XTU5PDCIB75B64GPS7SNYRCLL7/bundle.json","state_url":"https://pith.science/pith/XTU5PDCIB75B64GPS7SNYRCLL7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XTU5PDCIB75B64GPS7SNYRCLL7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-09T08:33:22Z","links":{"resolver":"https://pith.science/pith/XTU5PDCIB75B64GPS7SNYRCLL7","bundle":"https://pith.science/pith/XTU5PDCIB75B64GPS7SNYRCLL7/bundle.json","state":"https://pith.science/pith/XTU5PDCIB75B64GPS7SNYRCLL7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XTU5PDCIB75B64GPS7SNYRCLL7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XTU5PDCIB75B64GPS7SNYRCLL7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2c6c25972469ec62838cbf75f922e434b714d1491d086b1dbf9337e10edbcb21","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-04T12:13:24Z","title_canon_sha256":"17be7f2f48e2d182252385444a93dd088dd7f0d26e1b848f2ac2b17632ea32cd"},"schema_version":"1.0","source":{"id":"2606.06076","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.06076","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"arxiv_version","alias_value":"2606.06076v1","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.06076","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"pith_short_12","alias_value":"XTU5PDCIB75B","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"pith_short_16","alias_value":"XTU5PDCIB75B64GP","created_at":"2026-06-05T01:15:32Z"},{"alias_kind":"pith_short_8","alias_value":"XTU5PDCI","created_at":"2026-06-05T01:15:32Z"}],"graph_snapshots":[{"event_id":"sha256:89359a5ed1ff62575a0abdd7a892ca2b28480bb909887386ccf39879b1ca871e","target":"graph","created_at":"2026-06-05T01:15:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.06076/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While vision-language models excel at general multimodal understanding, they still struggle with visual spatial planning. We attribute this to a perception-reasoning modality gap: visual planning requires models to infer latent state structures from pixels and then reason over the recovered structure to produce valid actions, whereas symbolic planning directly leverages explicit objects and constraints. This creates dual bottlenecks in visual state recovery and multi-step planning. To address this, we propose MGSD, a two-stage modality-gap-aware self-distillation framework. First, a cold-start","authors_text":"Haocheng Luo, Jiahui Liu, Jiaqi Huang, Jun Zhou, Quan Shi, Ruicheng Zhang, Xiu Li, Zhizhou Zhong, Zunnan Xu","cross_cats":["cs.CV"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-04T12:13:24Z","title":"Learning Visual Spatial Planning from Symbolic State via Modality-Gap-Aware Self-Distillation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.06076","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:209eeecd8231478d74de70b7e7067b2237ccdc32509cf2649733c724a64737a7","target":"record","created_at":"2026-06-05T01:15:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2c6c25972469ec62838cbf75f922e434b714d1491d086b1dbf9337e10edbcb21","cross_cats_sorted":["cs.CV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-04T12:13:24Z","title_canon_sha256":"17be7f2f48e2d182252385444a93dd088dd7f0d26e1b848f2ac2b17632ea32cd"},"schema_version":"1.0","source":{"id":"2606.06076","kind":"arxiv","version":1}},"canonical_sha256":"bce9d78c480ffa1f70cf97e4dc444b5ffe07bff42c9bd288b9a46063f460d84a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bce9d78c480ffa1f70cf97e4dc444b5ffe07bff42c9bd288b9a46063f460d84a","first_computed_at":"2026-06-05T01:15:32.250751Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-05T01:15:32.250751Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IlRyJDnoDqOAxIEFVHQEe3rleslGxv592tFLcqLXJ+Sf/0YJkVsrX/4OI6Pt6O8mOx3BnFRJefUkjaVzK4u1DA==","signature_status":"signed_v1","signed_at":"2026-06-05T01:15:32.251163Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.06076","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:209eeecd8231478d74de70b7e7067b2237ccdc32509cf2649733c724a64737a7","sha256:89359a5ed1ff62575a0abdd7a892ca2b28480bb909887386ccf39879b1ca871e"],"state_sha256":"9ec9219cff63f4e2d6966ef353d7bc856399ed6e12c2150dfac6bb848b0a8f6b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Xo9dTl5UCSUeKa6iaLQg/I64fW8d27heSnC3Gq2ToCx30Vk+zYHVrZzpi2/Sj/7QQLc0kTMu+5G4w2Co+SX2Dw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-09T08:33:22.539446Z","bundle_sha256":"36a91ee1a1793ca42f8c8eda230efb6ad019ee2dc91fc8ae8f0694fd7b80077f"}}