{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LRE4WLRKJ7YEJ33XWVD4N54PNC","short_pith_number":"pith:LRE4WLRK","schema_version":"1.0","canonical_sha256":"5c49cb2e2a4ff044ef77b547c6f78f6895f9926e0d11e727521e3934d7479472","source":{"kind":"arxiv","id":"2606.12105","version":1},"attestation_state":"computed","paper":{"title":"DAM-VLA: Decoupled Asynchronous Multimodal Vision Language Action model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV","cs.LG"],"primary_cat":"cs.RO","authors_text":"Gianluca Geraci, Jakub Suliga, Moritz Reuss, Pankhuri Vanjani, Rudolf Lioutikov, Xinkai Jiang, Zhuoyue Li","submitted_at":"2026-06-10T13:59:07Z","abstract_excerpt":"Vision-language-action (VLA) models inherit a shared synchronous clock from vision-language pretraining, processing every input at one rate. This is misaligned with physical interaction, where a high-frequency modality changes at hundreds of hertz, vision evolves more slowly, and language stays constant across an episode. A synchronous VLA oversamples slow modalities, undersamples fast ones, and caps action generation at the lowest effective frequency. We hypothesize that decoupling temporal processing per modality, letting each update and retain information at its own sensor rate, yields stro"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12105","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-06-10T13:59:07Z","cross_cats_sorted":["cs.CV","cs.LG"],"title_canon_sha256":"1b6f998c0c41392916a104cfe325cd72ff831ad65dd53da54c179eb12ff1aaca","abstract_canon_sha256":"3babbbf973ac87f83fa5088b74d340e090986bee754b510e38ded29c431dc288"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:10:48.406053Z","signature_b64":"Bg7RwbF4CMS3kFruNTTNbde19fhWSQbl+RjOE8HfALO9DmS94KrTJIdKtN0LYC5PavcGcr90P0OiCJwFeZn2Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5c49cb2e2a4ff044ef77b547c6f78f6895f9926e0d11e727521e3934d7479472","last_reissued_at":"2026-06-11T01:10:48.405145Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:10:48.405145Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"DAM-VLA: Decoupled Asynchronous Multimodal Vision Language Action model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV","cs.LG"],"primary_cat":"cs.RO","authors_text":"Gianluca Geraci, Jakub Suliga, Moritz Reuss, Pankhuri Vanjani, Rudolf Lioutikov, Xinkai Jiang, Zhuoyue Li","submitted_at":"2026-06-10T13:59:07Z","abstract_excerpt":"Vision-language-action (VLA) models inherit a shared synchronous clock from vision-language pretraining, processing every input at one rate. This is misaligned with physical interaction, where a high-frequency modality changes at hundreds of hertz, vision evolves more slowly, and language stays constant across an episode. A synchronous VLA oversamples slow modalities, undersamples fast ones, and caps action generation at the lowest effective frequency. We hypothesize that decoupling temporal processing per modality, letting each update and retain information at its own sensor rate, yields stro"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12105","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12105/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12105","created_at":"2026-06-11T01:10:48.405280+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12105v1","created_at":"2026-06-11T01:10:48.405280+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12105","created_at":"2026-06-11T01:10:48.405280+00:00"},{"alias_kind":"pith_short_12","alias_value":"LRE4WLRKJ7YE","created_at":"2026-06-11T01:10:48.405280+00:00"},{"alias_kind":"pith_short_16","alias_value":"LRE4WLRKJ7YEJ33X","created_at":"2026-06-11T01:10:48.405280+00:00"},{"alias_kind":"pith_short_8","alias_value":"LRE4WLRK","created_at":"2026-06-11T01:10:48.405280+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC","json":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC.json","graph_json":"https://pith.science/api/pith-number/LRE4WLRKJ7YEJ33XWVD4N54PNC/graph.json","events_json":"https://pith.science/api/pith-number/LRE4WLRKJ7YEJ33XWVD4N54PNC/events.json","paper":"https://pith.science/paper/LRE4WLRK"},"agent_actions":{"view_html":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC","download_json":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC.json","view_paper":"https://pith.science/paper/LRE4WLRK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12105&json=true","fetch_graph":"https://pith.science/api/pith-number/LRE4WLRKJ7YEJ33XWVD4N54PNC/graph.json","fetch_events":"https://pith.science/api/pith-number/LRE4WLRKJ7YEJ33XWVD4N54PNC/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC/action/storage_attestation","attest_author":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC/action/author_attestation","sign_citation":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC/action/citation_signature","submit_replication":"https://pith.science/pith/LRE4WLRKJ7YEJ33XWVD4N54PNC/action/replication_record"}},"created_at":"2026-06-11T01:10:48.405280+00:00","updated_at":"2026-06-11T01:10:48.405280+00:00"}