{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:NSC7EKMEI5QJPWIKA5PPF3YJYY","short_pith_number":"pith:NSC7EKME","schema_version":"1.0","canonical_sha256":"6c85f22984476097d90a075ef2ef09c607be37d2880e6c316e4e915872fe96de","source":{"kind":"arxiv","id":"2605.30282","version":1},"attestation_state":"computed","paper":{"title":"Gaze2Act: Gaze-Conditioned Vision-Language-Action Policies for Interactive Robot Manipulation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Bofan Lyu, Boyu Ma, Chuhao Zhou, Geng Li, Gen Li, Jianfei Yang, Jiaqi Bai, Kuangji Zuo, Shijia Han, Xichen Yuan, Xinyu Zhou, Yanshuo Lu","submitted_at":"2026-05-28T17:37:16Z","abstract_excerpt":"Vision-Language-Action (VLA) models have recently shown strong potential for robot learning by following language instructions. However, in practice, language alone is often insufficient to precisely convey human intent. It is difficult to describe which exact object to interact with among similar candidates, where to act on the object, or how the target may change during execution. To address this limitation, we propose Gaze2Act, a novel VLA framework that leverages human gaze as a dynamic and intuitive intent signal for complex interactive manipulation. Gaze2Act first bridges the ego-exo vie"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.30282","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-05-28T17:37:16Z","cross_cats_sorted":[],"title_canon_sha256":"12b39aee37d8d4f0f536a09ace92edab34c00e86e66aea340eae323239a94244","abstract_canon_sha256":"acf53c2b7de486e886ea97ccdaaf56133a356c7e89a33b5ff6245f759285dc5b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T02:06:15.170639Z","signature_b64":"OelAkoQe0lXygRuuiVxkG6zO0COqNyfxDHunB+HjpyIygEokKpsfvbF6izRh26Fo3rmfVmz2tXhIeUZFF5dTCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6c85f22984476097d90a075ef2ef09c607be37d2880e6c316e4e915872fe96de","last_reissued_at":"2026-05-29T02:06:15.170226Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T02:06:15.170226Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Gaze2Act: Gaze-Conditioned Vision-Language-Action Policies for Interactive Robot Manipulation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.RO","authors_text":"Bofan Lyu, Boyu Ma, Chuhao Zhou, Geng Li, Gen Li, Jianfei Yang, Jiaqi Bai, Kuangji Zuo, Shijia Han, Xichen Yuan, Xinyu Zhou, Yanshuo Lu","submitted_at":"2026-05-28T17:37:16Z","abstract_excerpt":"Vision-Language-Action (VLA) models have recently shown strong potential for robot learning by following language instructions. However, in practice, language alone is often insufficient to precisely convey human intent. It is difficult to describe which exact object to interact with among similar candidates, where to act on the object, or how the target may change during execution. To address this limitation, we propose Gaze2Act, a novel VLA framework that leverages human gaze as a dynamic and intuitive intent signal for complex interactive manipulation. Gaze2Act first bridges the ego-exo vie"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.30282","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.30282/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.30282","created_at":"2026-05-29T02:06:15.170284+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.30282v1","created_at":"2026-05-29T02:06:15.170284+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.30282","created_at":"2026-05-29T02:06:15.170284+00:00"},{"alias_kind":"pith_short_12","alias_value":"NSC7EKMEI5QJ","created_at":"2026-05-29T02:06:15.170284+00:00"},{"alias_kind":"pith_short_16","alias_value":"NSC7EKMEI5QJPWIK","created_at":"2026-05-29T02:06:15.170284+00:00"},{"alias_kind":"pith_short_8","alias_value":"NSC7EKME","created_at":"2026-05-29T02:06:15.170284+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY","json":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY.json","graph_json":"https://pith.science/api/pith-number/NSC7EKMEI5QJPWIKA5PPF3YJYY/graph.json","events_json":"https://pith.science/api/pith-number/NSC7EKMEI5QJPWIKA5PPF3YJYY/events.json","paper":"https://pith.science/paper/NSC7EKME"},"agent_actions":{"view_html":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY","download_json":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY.json","view_paper":"https://pith.science/paper/NSC7EKME","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.30282&json=true","fetch_graph":"https://pith.science/api/pith-number/NSC7EKMEI5QJPWIKA5PPF3YJYY/graph.json","fetch_events":"https://pith.science/api/pith-number/NSC7EKMEI5QJPWIKA5PPF3YJYY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY/action/storage_attestation","attest_author":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY/action/author_attestation","sign_citation":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY/action/citation_signature","submit_replication":"https://pith.science/pith/NSC7EKMEI5QJPWIKA5PPF3YJYY/action/replication_record"}},"created_at":"2026-05-29T02:06:15.170284+00:00","updated_at":"2026-05-29T02:06:15.170284+00:00"}