{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:6PODWBQ66NHPHH4P2YAGBWZZOJ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7716eaf2f5ee9b037088608ffdf6d906d146c65ade6c3f590ef9fba649d57556","cross_cats_sorted":["cs.AI","cs.CV"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.RO","submitted_at":"2025-04-28T14:47:34Z","title_canon_sha256":"5a390a6e64ecfb8fabe23d1f69197cd49ca6c9544d82162d63a710c7e40e70a9"},"schema_version":"1.0","source":{"id":"2504.19854","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2504.19854","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"arxiv_version","alias_value":"2504.19854v1","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.19854","created_at":"2026-05-17T23:38:47Z"},{"alias_kind":"pith_short_12","alias_value":"6PODWBQ66NHP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"6PODWBQ66NHPHH4P","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"6PODWBQ6","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:88194d29a63ba85d0bf688c46b39b659735272b2a2013729d8a3c6635d2f73f3","target":"graph","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experimental results demonstrate that NORA outperforms existing large-scale VLA models, achieving better task performance with significantly reduced computational overhead, making it a more practical solution for real-time robotic autonomy."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The assumption that using the Qwen-2.5-VL-3B as backbone and FAST+ tokenizer will overcome the visual encoding limitations leading to failures in tasks like object grasping, without new issues arising from the reduced model size."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"NORA is a compact 3B-parameter VLA model trained on 970k robot demonstrations that outperforms larger VLA models in embodied tasks while using significantly less computational resources."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A 3B-parameter vision-language-action model outperforms larger ones on robotic tasks with far less computation."}],"snapshot_sha256":"a236f8cd7f9e0571303bf4f47edcdba5bad87bf5cacea023ba40c3e47f00ea8b"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d29dfbdb2fa74751b13007efd57825e1669a8e477915c895ca890c6e7d8d99d6"},"paper":{"abstract_excerpt":"Existing Visual-Language-Action (VLA) models have shown promising performance in zero-shot scenarios, demonstrating impressive task execution and reasoning capabilities. However, a significant challenge arises from the limitations of visual encoding, which can result in failures during tasks such as object grasping. Moreover, these models typically suffer from high computational overhead due to their large sizes, often exceeding 7B parameters. While these models excel in reasoning and task planning, the substantial computational overhead they incur makes them impractical for real-time robotic ","authors_text":"Amir Zadeh, Chia-Yu Hung, Chuan Li, Navonil Majumder, Pengfei Hong, Qi Sun, Soujanya Poria, U-Xuan Tan","cross_cats":["cs.AI","cs.CV"],"headline":"A 3B-parameter vision-language-action model outperforms larger ones on robotic tasks with far less computation.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.RO","submitted_at":"2025-04-28T14:47:34Z","title":"NORA: A Small Open-Sourced Generalist Vision Language Action Model for Embodied Tasks"},"references":{"count":16,"internal_anchors":14,"resolved_work":16,"sample":[{"cited_arxiv_id":"2502.13923","doi":"","is_internal_anchor":true,"ref_index":1,"title":"Qwen2.5-VL Technical Report","work_id":"69dffacb-bfe8-442d-be86-48624c60426f","year":null},{"cited_arxiv_id":"2410.24164","doi":"","is_internal_anchor":true,"ref_index":2,"title":"$\\pi_0$: A Vision-Language-Action Flow Model for General Robot Control","work_id":"f790abdc-a796-482f-a40d-f8ee035ecfc2","year":null},{"cited_arxiv_id":"2307.15818","doi":"","is_internal_anchor":true,"ref_index":3,"title":"RT-2: Vision-Language-Action Models Transfer Web Knowledge to Robotic Control","work_id":"ff438a8a-8003-4fae-9131-acd418b3597b","year":null},{"cited_arxiv_id":"2303.04137","doi":"","is_internal_anchor":true,"ref_index":4,"title":"Diffusion Policy: Visuomotor Policy Learning via Action Diffusion","work_id":"2dce18e6-f07a-4f57-8a81-e71c3e6a293c","year":null},{"cited_arxiv_id":"2303.03378","doi":"","is_internal_anchor":true,"ref_index":5,"title":"PaLM-E: An Embodied Multimodal Language Model","work_id":"5b99811a-1d93-47e2-9d59-f4045a0b74a2","year":null}],"snapshot_sha256":"af1b206254e9d24fc4dc7fdf5b923867c8a42b6819e974c0c876c8bb15b31a62"},"source":{"id":"2504.19854","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-16T15:49:35.980619Z","id":"a861a0c9-4c1e-4a55-806c-664f92005997","model_set":{"reader":"grok-4.3"},"one_line_summary":"NORA is a compact 3B-parameter VLA model trained on 970k robot demonstrations that outperforms larger VLA models in embodied tasks while using significantly less computational resources.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A 3B-parameter vision-language-action model outperforms larger ones on robotic tasks with far less computation.","strongest_claim":"Experimental results demonstrate that NORA outperforms existing large-scale VLA models, achieving better task performance with significantly reduced computational overhead, making it a more practical solution for real-time robotic autonomy.","weakest_assumption":"The assumption that using the Qwen-2.5-VL-3B as backbone and FAST+ tokenizer will overcome the visual encoding limitations leading to failures in tasks like object grasping, without new issues arising from the reduced model size."}},"verdict_id":"a861a0c9-4c1e-4a55-806c-664f92005997"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c55a2667b6d7f5db6d8e05ca3ac8d1a12a83d5384e4ba2b7091803949f13c4a3","target":"record","created_at":"2026-05-17T23:38:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7716eaf2f5ee9b037088608ffdf6d906d146c65ade6c3f590ef9fba649d57556","cross_cats_sorted":["cs.AI","cs.CV"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.RO","submitted_at":"2025-04-28T14:47:34Z","title_canon_sha256":"5a390a6e64ecfb8fabe23d1f69197cd49ca6c9544d82162d63a710c7e40e70a9"},"schema_version":"1.0","source":{"id":"2504.19854","kind":"arxiv","version":1}},"canonical_sha256":"f3dc3b061ef34ef39f8fd60060db39726382cb3b3c9ae9854e9305501446d3b6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f3dc3b061ef34ef39f8fd60060db39726382cb3b3c9ae9854e9305501446d3b6","first_computed_at":"2026-05-17T23:38:47.387149Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:47.387149Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"t0gVArWx2xHF2MC//SQIjH2ygNFBX2lk75fOOn5i6zn3ou85TTNcSzLQcJ/E4NSA3Z5jku3sryv9BvmCvWaJAg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:47.387644Z","signed_message":"canonical_sha256_bytes"},"source_id":"2504.19854","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c55a2667b6d7f5db6d8e05ca3ac8d1a12a83d5384e4ba2b7091803949f13c4a3","sha256:88194d29a63ba85d0bf688c46b39b659735272b2a2013729d8a3c6635d2f73f3"],"state_sha256":"c43557f199da62ed295f65f459f8a87bcb05863dbcab6fad2da7531c4f089f31"}