{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AZMN26M3X7NL7QVCMAG24I2YIV","short_pith_number":"pith:AZMN26M3","canonical_record":{"source":{"id":"2602.12691","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-02-13T07:46:37Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3a3cff1768e6b6c0347b60cf5a69cea7de41644cc8bfd6233d9eecac8b894b98","abstract_canon_sha256":"b8d3168011442bffc1e0d86829bdb49f955f4e4df2702b688daa8b8677d11d89"},"schema_version":"1.0"},"canonical_sha256":"0658dd799bbfdabfc2a2600dae23584579ad8fd41608b82babd04179a54a3c8c","source":{"kind":"arxiv","id":"2602.12691","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.12691","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"arxiv_version","alias_value":"2602.12691v3","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.12691","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"pith_short_12","alias_value":"AZMN26M3X7NL","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"pith_short_16","alias_value":"AZMN26M3X7NL7QVC","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"pith_short_8","alias_value":"AZMN26M3","created_at":"2026-06-23T02:12:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AZMN26M3X7NL7QVCMAG24I2YIV","target":"record","payload":{"canonical_record":{"source":{"id":"2602.12691","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-02-13T07:46:37Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"3a3cff1768e6b6c0347b60cf5a69cea7de41644cc8bfd6233d9eecac8b894b98","abstract_canon_sha256":"b8d3168011442bffc1e0d86829bdb49f955f4e4df2702b688daa8b8677d11d89"},"schema_version":"1.0"},"canonical_sha256":"0658dd799bbfdabfc2a2600dae23584579ad8fd41608b82babd04179a54a3c8c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T02:12:46.943590Z","signature_b64":"sRFhY4GCA0U61VndAuuSscBjs/X/yT+G5iXVUU9pujxyupedGM/lU1vPcwb4BfZYzMYZen5NHKNq2sF2QReZBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0658dd799bbfdabfc2a2600dae23584579ad8fd41608b82babd04179a54a3c8c","last_reissued_at":"2026-06-23T02:12:46.943084Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T02:12:46.943084Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.12691","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T02:12:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"InkfrFvWec7TKXdSlN/eUarmd1I96+kGSPHKf+4ATz2DbjS0xCgz8T9MJFM277Qz1Su++2V7l4PPM/6aZI4wAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T00:26:47.846845Z"},"content_sha256":"6d1b1bbd607de9c746d872af614274e0e447925ceb94d5ab47765c1d738bb3ea","schema_version":"1.0","event_id":"sha256:6d1b1bbd607de9c746d872af614274e0e447925ceb94d5ab47765c1d738bb3ea"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AZMN26M3X7NL7QVCMAG24I2YIV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ALOE: Action-Level Off-Policy Evaluation for Vision-Language-Action Model Post-Training","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.RO","authors_text":"Chiming Liu, Chuheng Zhang, Hecheng Wang, Lizhe Qi, Maoqing Yao, Rushuai Yang, Shuoyu Yue, Wei Shan, Xiaohan Yan, Xuan Du, Yi Chen, Yongcheng Liu, Yunlong Wang, Zhichao Wu","submitted_at":"2026-02-13T07:46:37Z","abstract_excerpt":"We study how to improve large foundation vision-language-action (VLA) systems through human-in-the-loop reinforcement learning (RL) in real-world environments. A key challenge is learning reliable value functions from heterogeneous real-world experience, as value estimation provides the primary learning signal for VLA training. In practice, replay buffers contain trajectories collected from historical policies, online rollouts, demonstrations, and intermittent human interventions. Because replay buffers mix trajectories generated by different behaviors, the observed returns can be mismatched w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.12691","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.12691/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-23T02:12:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0TVXjV2gas8044M0Za9SEocJLtuxmGbSBbCkZEfP8l8DiGcKxREVDOkC+PjWJHrVd3ec/9vnMSVuZ57WcTsfCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-25T00:26:47.847218Z"},"content_sha256":"05af0a80d095d46020e65632e6bedff02400e1a92f96f05c08a9a85420a303c9","schema_version":"1.0","event_id":"sha256:05af0a80d095d46020e65632e6bedff02400e1a92f96f05c08a9a85420a303c9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AZMN26M3X7NL7QVCMAG24I2YIV/bundle.json","state_url":"https://pith.science/pith/AZMN26M3X7NL7QVCMAG24I2YIV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AZMN26M3X7NL7QVCMAG24I2YIV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-25T00:26:47Z","links":{"resolver":"https://pith.science/pith/AZMN26M3X7NL7QVCMAG24I2YIV","bundle":"https://pith.science/pith/AZMN26M3X7NL7QVCMAG24I2YIV/bundle.json","state":"https://pith.science/pith/AZMN26M3X7NL7QVCMAG24I2YIV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AZMN26M3X7NL7QVCMAG24I2YIV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AZMN26M3X7NL7QVCMAG24I2YIV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b8d3168011442bffc1e0d86829bdb49f955f4e4df2702b688daa8b8677d11d89","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-02-13T07:46:37Z","title_canon_sha256":"3a3cff1768e6b6c0347b60cf5a69cea7de41644cc8bfd6233d9eecac8b894b98"},"schema_version":"1.0","source":{"id":"2602.12691","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.12691","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"arxiv_version","alias_value":"2602.12691v3","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.12691","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"pith_short_12","alias_value":"AZMN26M3X7NL","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"pith_short_16","alias_value":"AZMN26M3X7NL7QVC","created_at":"2026-06-23T02:12:46Z"},{"alias_kind":"pith_short_8","alias_value":"AZMN26M3","created_at":"2026-06-23T02:12:46Z"}],"graph_snapshots":[{"event_id":"sha256:05af0a80d095d46020e65632e6bedff02400e1a92f96f05c08a9a85420a303c9","target":"graph","created_at":"2026-06-23T02:12:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.12691/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study how to improve large foundation vision-language-action (VLA) systems through human-in-the-loop reinforcement learning (RL) in real-world environments. A key challenge is learning reliable value functions from heterogeneous real-world experience, as value estimation provides the primary learning signal for VLA training. In practice, replay buffers contain trajectories collected from historical policies, online rollouts, demonstrations, and intermittent human interventions. Because replay buffers mix trajectories generated by different behaviors, the observed returns can be mismatched w","authors_text":"Chiming Liu, Chuheng Zhang, Hecheng Wang, Lizhe Qi, Maoqing Yao, Rushuai Yang, Shuoyu Yue, Wei Shan, Xiaohan Yan, Xuan Du, Yi Chen, Yongcheng Liu, Yunlong Wang, Zhichao Wu","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-02-13T07:46:37Z","title":"ALOE: Action-Level Off-Policy Evaluation for Vision-Language-Action Model Post-Training"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.12691","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6d1b1bbd607de9c746d872af614274e0e447925ceb94d5ab47765c1d738bb3ea","target":"record","created_at":"2026-06-23T02:12:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b8d3168011442bffc1e0d86829bdb49f955f4e4df2702b688daa8b8677d11d89","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.RO","submitted_at":"2026-02-13T07:46:37Z","title_canon_sha256":"3a3cff1768e6b6c0347b60cf5a69cea7de41644cc8bfd6233d9eecac8b894b98"},"schema_version":"1.0","source":{"id":"2602.12691","kind":"arxiv","version":3}},"canonical_sha256":"0658dd799bbfdabfc2a2600dae23584579ad8fd41608b82babd04179a54a3c8c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0658dd799bbfdabfc2a2600dae23584579ad8fd41608b82babd04179a54a3c8c","first_computed_at":"2026-06-23T02:12:46.943084Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-23T02:12:46.943084Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"sRFhY4GCA0U61VndAuuSscBjs/X/yT+G5iXVUU9pujxyupedGM/lU1vPcwb4BfZYzMYZen5NHKNq2sF2QReZBg==","signature_status":"signed_v1","signed_at":"2026-06-23T02:12:46.943590Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.12691","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6d1b1bbd607de9c746d872af614274e0e447925ceb94d5ab47765c1d738bb3ea","sha256:05af0a80d095d46020e65632e6bedff02400e1a92f96f05c08a9a85420a303c9"],"state_sha256":"778028bc66074acbc2295fa9e9634f24efc155aab8337d9350386e323945f70e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iBa0SH0epnXNdgok25A3Cq9qurVfPwOPsv/yQe/FFDorCxkkt59zM6xuWYLasqAA40fZIr9mQkDqNCV1SF85Bg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-25T00:26:47.849273Z","bundle_sha256":"836747cebb986bbd2f40d7283c445bc7ae629dda8f30ba592ec2f704f9e3021b"}}