{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:YC224JSVXG4XIBRNF3RX3AGK3Q","short_pith_number":"pith:YC224JSV","canonical_record":{"source":{"id":"2512.21970","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.RO","submitted_at":"2025-12-26T10:34:20Z","cross_cats_sorted":[],"title_canon_sha256":"d5b1d31910e22fe72b1e1f4dbb8a820b8e340b8453f7f25b32ffd586dba1c3fb","abstract_canon_sha256":"e2d44db087c991bd93d3814d0ebc79f141399e1fd8dcde48736a05ad682c6a86"},"schema_version":"1.0"},"canonical_sha256":"c0b5ae2655b9b974062d2ee37d80cadc1001b57da4c11cd45017223b32d9c1d0","source":{"kind":"arxiv","id":"2512.21970","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.21970","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"arxiv_version","alias_value":"2512.21970v2","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.21970","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"pith_short_12","alias_value":"YC224JSVXG4X","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"pith_short_16","alias_value":"YC224JSVXG4XIBRN","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"pith_short_8","alias_value":"YC224JSV","created_at":"2026-06-29T01:14:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:YC224JSVXG4XIBRNF3RX3AGK3Q","target":"record","payload":{"canonical_record":{"source":{"id":"2512.21970","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.RO","submitted_at":"2025-12-26T10:34:20Z","cross_cats_sorted":[],"title_canon_sha256":"d5b1d31910e22fe72b1e1f4dbb8a820b8e340b8453f7f25b32ffd586dba1c3fb","abstract_canon_sha256":"e2d44db087c991bd93d3814d0ebc79f141399e1fd8dcde48736a05ad682c6a86"},"schema_version":"1.0"},"canonical_sha256":"c0b5ae2655b9b974062d2ee37d80cadc1001b57da4c11cd45017223b32d9c1d0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-29T01:14:27.310664Z","signature_b64":"gJ+cK5SwB9lmpuqRgPtbHiUMj8zuweTTPfKdhFMqqalQxrgm1K8fyLpmaDRFjQSmawqrWbb2bZBEF/PMgiV1AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c0b5ae2655b9b974062d2ee37d80cadc1001b57da4c11cd45017223b32d9c1d0","last_reissued_at":"2026-06-29T01:14:27.310110Z","signature_status":"signed_v1","first_computed_at":"2026-06-29T01:14:27.310110Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2512.21970","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-29T01:14:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Li/NLKy3DWkJBAYHFMTyVgWbJqOr2s5Lp0VgABUQKbFmrq0lLPeZLznJ7N9imtvzdQSaBgUa1T/MHCUu7QtmBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T14:34:15.581420Z"},"content_sha256":"c5402c338313075572f512a5174f9829fdfd746fad0872e2a06693d858912222","schema_version":"1.0","event_id":"sha256:c5402c338313075572f512a5174f9829fdfd746fad0872e2a06693d858912222"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:YC224JSVXG4XIBRNF3RX3AGK3Q","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"StereoVLA: Enhancing Vision-Language-Action Models with Stereo Vision","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Heming Cui, He Wang, Jiayi Su, Mi Yan, Shengliang Deng, Wenhao Zhang, Xiaoguang Zhao, Yitao Zeng, Yixin Zheng, Zhizheng Zhang","submitted_at":"2025-12-26T10:34:20Z","abstract_excerpt":"While Vision-Language-Action (VLA) models excel in generalist manipulation, they often lack fine-grained spatial awareness and show limited viewpoint robustness. This limitation largely stems from the reliance on pretrained RGB encoders, which lack explicit geometric cues and prioritize semantic alignment over geometric representation. We argue that effective visual representations for VLA models must jointly encode both semantic and geometric information. In this paper, we introduce StereoVLA, the first VLA model to incorporate rich geometric cues from large-scale synthetic stereo data. Stere"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.21970","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.21970/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-29T01:14:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ept+99TR5OcrBjdQsu44mzljuVjy6vxigCR11YXgwM3xO4tQ6QV0M2yN2tvzWqJdp1s6mvjjELr54/L7OtVlBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-30T14:34:15.581793Z"},"content_sha256":"ff9206141e89867c8ea475aa25786ebb489aa39f906298195fc9818ad4359292","schema_version":"1.0","event_id":"sha256:ff9206141e89867c8ea475aa25786ebb489aa39f906298195fc9818ad4359292"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YC224JSVXG4XIBRNF3RX3AGK3Q/bundle.json","state_url":"https://pith.science/pith/YC224JSVXG4XIBRNF3RX3AGK3Q/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YC224JSVXG4XIBRNF3RX3AGK3Q/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-30T14:34:15Z","links":{"resolver":"https://pith.science/pith/YC224JSVXG4XIBRNF3RX3AGK3Q","bundle":"https://pith.science/pith/YC224JSVXG4XIBRNF3RX3AGK3Q/bundle.json","state":"https://pith.science/pith/YC224JSVXG4XIBRNF3RX3AGK3Q/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YC224JSVXG4XIBRNF3RX3AGK3Q/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:YC224JSVXG4XIBRNF3RX3AGK3Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e2d44db087c991bd93d3814d0ebc79f141399e1fd8dcde48736a05ad682c6a86","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.RO","submitted_at":"2025-12-26T10:34:20Z","title_canon_sha256":"d5b1d31910e22fe72b1e1f4dbb8a820b8e340b8453f7f25b32ffd586dba1c3fb"},"schema_version":"1.0","source":{"id":"2512.21970","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.21970","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"arxiv_version","alias_value":"2512.21970v2","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.21970","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"pith_short_12","alias_value":"YC224JSVXG4X","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"pith_short_16","alias_value":"YC224JSVXG4XIBRN","created_at":"2026-06-29T01:14:27Z"},{"alias_kind":"pith_short_8","alias_value":"YC224JSV","created_at":"2026-06-29T01:14:27Z"}],"graph_snapshots":[{"event_id":"sha256:ff9206141e89867c8ea475aa25786ebb489aa39f906298195fc9818ad4359292","target":"graph","created_at":"2026-06-29T01:14:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.21970/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While Vision-Language-Action (VLA) models excel in generalist manipulation, they often lack fine-grained spatial awareness and show limited viewpoint robustness. This limitation largely stems from the reliance on pretrained RGB encoders, which lack explicit geometric cues and prioritize semantic alignment over geometric representation. We argue that effective visual representations for VLA models must jointly encode both semantic and geometric information. In this paper, we introduce StereoVLA, the first VLA model to incorporate rich geometric cues from large-scale synthetic stereo data. Stere","authors_text":"Heming Cui, He Wang, Jiayi Su, Mi Yan, Shengliang Deng, Wenhao Zhang, Xiaoguang Zhao, Yitao Zeng, Yixin Zheng, Zhizheng Zhang","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.RO","submitted_at":"2025-12-26T10:34:20Z","title":"StereoVLA: Enhancing Vision-Language-Action Models with Stereo Vision"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.21970","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c5402c338313075572f512a5174f9829fdfd746fad0872e2a06693d858912222","target":"record","created_at":"2026-06-29T01:14:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e2d44db087c991bd93d3814d0ebc79f141399e1fd8dcde48736a05ad682c6a86","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.RO","submitted_at":"2025-12-26T10:34:20Z","title_canon_sha256":"d5b1d31910e22fe72b1e1f4dbb8a820b8e340b8453f7f25b32ffd586dba1c3fb"},"schema_version":"1.0","source":{"id":"2512.21970","kind":"arxiv","version":2}},"canonical_sha256":"c0b5ae2655b9b974062d2ee37d80cadc1001b57da4c11cd45017223b32d9c1d0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c0b5ae2655b9b974062d2ee37d80cadc1001b57da4c11cd45017223b32d9c1d0","first_computed_at":"2026-06-29T01:14:27.310110Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-29T01:14:27.310110Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gJ+cK5SwB9lmpuqRgPtbHiUMj8zuweTTPfKdhFMqqalQxrgm1K8fyLpmaDRFjQSmawqrWbb2bZBEF/PMgiV1AA==","signature_status":"signed_v1","signed_at":"2026-06-29T01:14:27.310664Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.21970","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c5402c338313075572f512a5174f9829fdfd746fad0872e2a06693d858912222","sha256:ff9206141e89867c8ea475aa25786ebb489aa39f906298195fc9818ad4359292"],"state_sha256":"2e53ffaf562b3ff0a4707877a940b3f347881b23c2b498d2a0223c5eebb9eda7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qmb0pkUQzd4Fq6FmQtGpUhE6Eqe2DvNOBFE55UFXxHKS3mrcHduk1LeC0kxhg1mZBAg920J4phb7p43VerzuDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-30T14:34:15.583719Z","bundle_sha256":"67be9aa75ec222c6004e9409caadf9f213e88b58208f6bcc0b6bb5401c76da75"}}