{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:QKJPCUZF4W5AE62HTEJ6C5DBZT","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3c4d8838581bcf3fc2f103a87d93879b626f2302282534ebf289f226ce08705b","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-11-22T00:45:33Z","title_canon_sha256":"f54530b9854b1d970c515e608ea7372d2c911ad1e3fc75365d6917fff23f56e3"},"schema_version":"1.0","source":{"id":"2511.17855","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2511.17855","created_at":"2026-05-18T03:09:33Z"},{"alias_kind":"arxiv_version","alias_value":"2511.17855v2","created_at":"2026-05-18T03:09:33Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.17855","created_at":"2026-05-18T03:09:33Z"},{"alias_kind":"pith_short_12","alias_value":"QKJPCUZF4W5A","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"QKJPCUZF4W5AE62H","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"QKJPCUZF","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:674f1ee035688468da4804fd3c466ae54e6eea5aaab8de15da4db7c00cdaff2d","target":"graph","created_at":"2026-05-18T03:09:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"QuickLAP reduces reward learning error by over 70% compared to physical-only and heuristic multimodal baselines in a semi-autonomous driving simulator, with a 15-participant user study showing significantly higher understandability, collaboration, and preference for the learned behavior."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That large language models can reliably extract accurate reward feature attention masks and preference shifts from free-form user utterances without introducing systematic bias or hallucination that would degrade the Bayesian fusion."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"QuickLAP fuses language and physical feedback in a Bayesian update to learn reward functions in real time for semi-autonomous systems, reducing error by over 70% versus physical-only and heuristic baselines."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time."}],"snapshot_sha256":"996802faa337dda7bea9194d4f6f1f75c7ec695618099cb0f70a5b2ff2b59122"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b7b67fa83e3f14d5849b551c720209c6107b1f0ec4394bf3184f56717422d8b3"},"paper":{"abstract_excerpt":"Robots must learn from both what people do and what they say, but either modality alone is often incomplete: physical corrections are grounded but ambiguous in intent, while language expresses high-level goals but lacks physical grounding. We introduce QuickLAP: Quick Language-Action Preference learning, a Bayesian framework that fuses physical and language feedback to infer reward functions in real time. Our key insight is to treat language as a probabilistic observation over the user's latent preferences, clarifying which reward features matter and how physical corrections should be interpre","authors_text":"Andreea Bobu, David Lee, Jordan Abi Nader, Nathaniel Dennler","cross_cats":["cs.RO"],"headline":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-11-22T00:45:33Z","title":"QuickLAP: Quick Language-Action Preference Learning for Semi-Autonomous Systems"},"references":{"count":70,"internal_anchors":0,"resolved_work":70,"sample":[{"cited_arxiv_id":"","doi":"10.1145/1015330.1015430","is_internal_anchor":false,"ref_index":1,"title":"In: Proceedings of the Twenty-First International Conference on Machine Learning (ICML)","work_id":"ec674945-89da-4bc1-99ff-754529936f07","year":2004},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Henny Admoni and Brian Scassellati. 2017. Social eye gaze in human-robot interaction: a review.Journal of Human-Robot Interaction6, 1 (2017), 25–63","work_id":"d4ce08e8-e337-4843-8274-c4887eb1ef2f","year":2017},{"cited_arxiv_id":"","doi":"10.1145/3171221.3171267","is_internal_anchor":false,"ref_index":3,"title":"Andrea Bajcsy, Dylan P. Losey, Marcia K. O’Malley, and Anca D. Dragan. 2018. Learning from Physical Human Corrections, One Feature at a Time. InPro- ceedings of the 2018 ACM/IEEE International Confere","work_id":"c44d6fe6-6eac-48ae-b2f6-b6dcf02269fc","year":2018},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Andrea Bajcsy, Dylan P. Losey, Marcia K. O’Malley, and Anca D. Dragan. 2017. Learning Robot Objectives from Physical Human Interaction. InProceedings of the 1st Annual Conference on Robot Learning (Pr","work_id":"07d7f2b3-2f6a-43d8-8cc4-0254a816b0aa","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Chris L Baker, Joshua B Tenenbaum, and Rebecca R Saxe. 2007. Goal inference as inverse planning. InProceedings of the Annual Meeting of the Cognitive Science Society, Vol. 29","work_id":"def66dda-2650-4e11-8504-3fd1a372237d","year":2007}],"snapshot_sha256":"e41889c37836b58e8f8e40fc8497f326ee82db6e1bfaac1520a0b601d5a7e801"},"source":{"id":"2511.17855","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T06:46:57.341389Z","id":"f14c2d78-4f67-47e0-83ff-308866ad9117","model_set":{"reader":"grok-4.3"},"one_line_summary":"QuickLAP fuses language and physical feedback in a Bayesian update to learn reward functions in real time for semi-autonomous systems, reducing error by over 70% versus physical-only and heuristic baselines.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time.","strongest_claim":"QuickLAP reduces reward learning error by over 70% compared to physical-only and heuristic multimodal baselines in a semi-autonomous driving simulator, with a 15-participant user study showing significantly higher understandability, collaboration, and preference for the learned behavior.","weakest_assumption":"That large language models can reliably extract accurate reward feature attention masks and preference shifts from free-form user utterances without introducing systematic bias or hallucination that would degrade the Bayesian fusion."}},"verdict_id":"f14c2d78-4f67-47e0-83ff-308866ad9117"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e13aa03baccd1b9df301a46a1761a6ed9baa557c8d8fc8f956b089c4ac747eee","target":"record","created_at":"2026-05-18T03:09:33Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3c4d8838581bcf3fc2f103a87d93879b626f2302282534ebf289f226ce08705b","cross_cats_sorted":["cs.RO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-11-22T00:45:33Z","title_canon_sha256":"f54530b9854b1d970c515e608ea7372d2c911ad1e3fc75365d6917fff23f56e3"},"schema_version":"1.0","source":{"id":"2511.17855","kind":"arxiv","version":2}},"canonical_sha256":"8292f15325e5ba027b479913e17461ccc6d2f04bec79f5d85225a876233d2055","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"8292f15325e5ba027b479913e17461ccc6d2f04bec79f5d85225a876233d2055","first_computed_at":"2026-05-18T03:09:33.011687Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:33.011687Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NuFHLpnpeqQmvtE2LJt6IZbtv7xWa7vaG+QTibcmGeG51tU9metssvfQ6wr536qCVit34Ka8aTpjVKtN953qCA==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:33.012539Z","signed_message":"canonical_sha256_bytes"},"source_id":"2511.17855","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e13aa03baccd1b9df301a46a1761a6ed9baa557c8d8fc8f956b089c4ac747eee","sha256:674f1ee035688468da4804fd3c466ae54e6eea5aaab8de15da4db7c00cdaff2d"],"state_sha256":"d0975c1fa513b1e71a4d53b46d75aa02dcb9b21acda19a37284c0344e260380b"}