{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CWMSRT2S2ZTVORYRAZG4R2QP67","short_pith_number":"pith:CWMSRT2S","schema_version":"1.0","canonical_sha256":"159928cf52d667574711064dc8ea0ff7dd48f00a4231da368cb4041f777bc60e","source":{"kind":"arxiv","id":"2603.06001","version":2},"attestation_state":"computed","paper":{"title":"Restoring Linguistic Grounding in VLA Models via Train-Free Attention Recalibration","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CV"],"primary_cat":"cs.RO","authors_text":"Bin Zhu, Jingjing Chen, Ninghao Zhang, Shijie Zhou","submitted_at":"2026-03-06T08:01:36Z","abstract_excerpt":"Vision-Language-Action (VLA) models enable robots to perform manipulation tasks directly from natural language instructions and are increasingly viewed as a foundation for generalist robotic policies. However, their reliability under Out-of-Distribution (OOD) instructions remains underexplored. In this paper, we reveal a critical failure mode in which VLA policies continue executing visually plausible actions even when the language instruction contradicts the scene. We refer to this phenomenon as linguistic blindness, where VLA policies prioritize visual priors over instruction semantics durin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.06001","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.RO","submitted_at":"2026-03-06T08:01:36Z","cross_cats_sorted":["cs.AI","cs.CV"],"title_canon_sha256":"7840f3d21922991419becc95788347e6d47e70c178c92efe212694d95ab72c9f","abstract_canon_sha256":"e8ee29e72609b21f9733c32238198aa17964111e0d4dc2662d06bce32bf0d345"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:16:52.398354Z","signature_b64":"k/nOabNHmYTtxUHWqptwBZCya4LLmcC0Gsv8nEE4jDc4bmIqC0ioU6rMf+3+Ely8xjfX0j+O4cTKBRoOU5+mBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"159928cf52d667574711064dc8ea0ff7dd48f00a4231da368cb4041f777bc60e","last_reissued_at":"2026-07-03T01:16:52.397846Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:16:52.397846Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Restoring Linguistic Grounding in VLA Models via Train-Free Attention Recalibration","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","cs.CV"],"primary_cat":"cs.RO","authors_text":"Bin Zhu, Jingjing Chen, Ninghao Zhang, Shijie Zhou","submitted_at":"2026-03-06T08:01:36Z","abstract_excerpt":"Vision-Language-Action (VLA) models enable robots to perform manipulation tasks directly from natural language instructions and are increasingly viewed as a foundation for generalist robotic policies. However, their reliability under Out-of-Distribution (OOD) instructions remains underexplored. In this paper, we reveal a critical failure mode in which VLA policies continue executing visually plausible actions even when the language instruction contradicts the scene. We refer to this phenomenon as linguistic blindness, where VLA policies prioritize visual priors over instruction semantics durin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.06001","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.06001/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.06001","created_at":"2026-07-03T01:16:52.397903+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.06001v2","created_at":"2026-07-03T01:16:52.397903+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.06001","created_at":"2026-07-03T01:16:52.397903+00:00"},{"alias_kind":"pith_short_12","alias_value":"CWMSRT2S2ZTV","created_at":"2026-07-03T01:16:52.397903+00:00"},{"alias_kind":"pith_short_16","alias_value":"CWMSRT2S2ZTVORYR","created_at":"2026-07-03T01:16:52.397903+00:00"},{"alias_kind":"pith_short_8","alias_value":"CWMSRT2S","created_at":"2026-07-03T01:16:52.397903+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2606.02277","citing_title":"RoboSemanticBench: Diagnosing Semantic Grounding in Action Prediction for VLA Models","ref_index":40,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67","json":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67.json","graph_json":"https://pith.science/api/pith-number/CWMSRT2S2ZTVORYRAZG4R2QP67/graph.json","events_json":"https://pith.science/api/pith-number/CWMSRT2S2ZTVORYRAZG4R2QP67/events.json","paper":"https://pith.science/paper/CWMSRT2S"},"agent_actions":{"view_html":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67","download_json":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67.json","view_paper":"https://pith.science/paper/CWMSRT2S","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.06001&json=true","fetch_graph":"https://pith.science/api/pith-number/CWMSRT2S2ZTVORYRAZG4R2QP67/graph.json","fetch_events":"https://pith.science/api/pith-number/CWMSRT2S2ZTVORYRAZG4R2QP67/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67/action/storage_attestation","attest_author":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67/action/author_attestation","sign_citation":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67/action/citation_signature","submit_replication":"https://pith.science/pith/CWMSRT2S2ZTVORYRAZG4R2QP67/action/replication_record"}},"created_at":"2026-07-03T01:16:52.397903+00:00","updated_at":"2026-07-03T01:16:52.397903+00:00"}