{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:QKJPCUZF4W5AE62HTEJ6C5DBZT","short_pith_number":"pith:QKJPCUZF","schema_version":"1.0","canonical_sha256":"8292f15325e5ba027b479913e17461ccc6d2f04bec79f5d85225a876233d2055","source":{"kind":"arxiv","id":"2511.17855","version":2},"attestation_state":"computed","paper":{"title":"QuickLAP: Quick Language-Action Preference Learning for Semi-Autonomous Systems","license":"http://creativecommons.org/licenses/by/4.0/","headline":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time.","cross_cats":["cs.RO"],"primary_cat":"cs.AI","authors_text":"Andreea Bobu, David Lee, Jordan Abi Nader, Nathaniel Dennler","submitted_at":"2025-11-22T00:45:33Z","abstract_excerpt":"Robots must learn from both what people do and what they say, but either modality alone is often incomplete: physical corrections are grounded but ambiguous in intent, while language expresses high-level goals but lacks physical grounding. We introduce QuickLAP: Quick Language-Action Preference learning, a Bayesian framework that fuses physical and language feedback to infer reward functions in real time. Our key insight is to treat language as a probabilistic observation over the user's latent preferences, clarifying which reward features matter and how physical corrections should be interpre"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2511.17855","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-11-22T00:45:33Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"f54530b9854b1d970c515e608ea7372d2c911ad1e3fc75365d6917fff23f56e3","abstract_canon_sha256":"3c4d8838581bcf3fc2f103a87d93879b626f2302282534ebf289f226ce08705b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:33.012539Z","signature_b64":"NuFHLpnpeqQmvtE2LJt6IZbtv7xWa7vaG+QTibcmGeG51tU9metssvfQ6wr536qCVit34Ka8aTpjVKtN953qCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8292f15325e5ba027b479913e17461ccc6d2f04bec79f5d85225a876233d2055","last_reissued_at":"2026-05-18T03:09:33.011687Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:33.011687Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"QuickLAP: Quick Language-Action Preference Learning for Semi-Autonomous Systems","license":"http://creativecommons.org/licenses/by/4.0/","headline":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time.","cross_cats":["cs.RO"],"primary_cat":"cs.AI","authors_text":"Andreea Bobu, David Lee, Jordan Abi Nader, Nathaniel Dennler","submitted_at":"2025-11-22T00:45:33Z","abstract_excerpt":"Robots must learn from both what people do and what they say, but either modality alone is often incomplete: physical corrections are grounded but ambiguous in intent, while language expresses high-level goals but lacks physical grounding. We introduce QuickLAP: Quick Language-Action Preference learning, a Bayesian framework that fuses physical and language feedback to infer reward functions in real time. Our key insight is to treat language as a probabilistic observation over the user's latent preferences, clarifying which reward features matter and how physical corrections should be interpre"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"QuickLAP reduces reward learning error by over 70% compared to physical-only and heuristic multimodal baselines in a semi-autonomous driving simulator, with a 15-participant user study showing significantly higher understandability, collaboration, and preference for the learned behavior.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That large language models can reliably extract accurate reward feature attention masks and preference shifts from free-form user utterances without introducing systematic bias or hallucination that would degrade the Bayesian fusion.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"QuickLAP fuses language and physical feedback in a Bayesian update to learn reward functions in real time for semi-autonomous systems, reducing error by over 70% versus physical-only and heuristic baselines.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"996802faa337dda7bea9194d4f6f1f75c7ec695618099cb0f70a5b2ff2b59122"},"source":{"id":"2511.17855","kind":"arxiv","version":2},"verdict":{"id":"f14c2d78-4f67-47e0-83ff-308866ad9117","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T06:46:57.341389Z","strongest_claim":"QuickLAP reduces reward learning error by over 70% compared to physical-only and heuristic multimodal baselines in a semi-autonomous driving simulator, with a 15-participant user study showing significantly higher understandability, collaboration, and preference for the learned behavior.","one_line_summary":"QuickLAP fuses language and physical feedback in a Bayesian update to learn reward functions in real time for semi-autonomous systems, reducing error by over 70% versus physical-only and heuristic baselines.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That large language models can reliably extract accurate reward feature attention masks and preference shifts from free-form user utterances without introducing systematic bias or hallucination that would degrade the Bayesian fusion.","pith_extraction_headline":"QuickLAP fuses language feedback as probabilistic observations with physical corrections to infer robot reward functions in real time."},"references":{"count":70,"sample":[{"doi":"10.1145/1015330.1015430","year":2004,"title":"In: Proceedings of the Twenty-First International Conference on Machine Learning (ICML)","work_id":"ec674945-89da-4bc1-99ff-754529936f07","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Henny Admoni and Brian Scassellati. 2017. Social eye gaze in human-robot interaction: a review.Journal of Human-Robot Interaction6, 1 (2017), 25–63","work_id":"d4ce08e8-e337-4843-8274-c4887eb1ef2f","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1145/3171221.3171267","year":2018,"title":"Andrea Bajcsy, Dylan P. Losey, Marcia K. O’Malley, and Anca D. Dragan. 2018. Learning from Physical Human Corrections, One Feature at a Time. InPro- ceedings of the 2018 ACM/IEEE International Confere","work_id":"c44d6fe6-6eac-48ae-b2f6-b6dcf02269fc","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Andrea Bajcsy, Dylan P. Losey, Marcia K. O’Malley, and Anca D. Dragan. 2017. Learning Robot Objectives from Physical Human Interaction. InProceedings of the 1st Annual Conference on Robot Learning (Pr","work_id":"07d7f2b3-2f6a-43d8-8cc4-0254a816b0aa","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2007,"title":"Chris L Baker, Joshua B Tenenbaum, and Rebecca R Saxe. 2007. Goal inference as inverse planning. InProceedings of the Annual Meeting of the Cognitive Science Society, Vol. 29","work_id":"def66dda-2650-4e11-8504-3fd1a372237d","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":70,"snapshot_sha256":"e41889c37836b58e8f8e40fc8497f326ee82db6e1bfaac1520a0b601d5a7e801","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b7b67fa83e3f14d5849b551c720209c6107b1f0ec4394bf3184f56717422d8b3"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.17855","created_at":"2026-05-18T03:09:33.011836+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.17855v2","created_at":"2026-05-18T03:09:33.011836+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.17855","created_at":"2026-05-18T03:09:33.011836+00:00"},{"alias_kind":"pith_short_12","alias_value":"QKJPCUZF4W5A","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"QKJPCUZF4W5AE62H","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"QKJPCUZF","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT","json":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT.json","graph_json":"https://pith.science/api/pith-number/QKJPCUZF4W5AE62HTEJ6C5DBZT/graph.json","events_json":"https://pith.science/api/pith-number/QKJPCUZF4W5AE62HTEJ6C5DBZT/events.json","paper":"https://pith.science/paper/QKJPCUZF"},"agent_actions":{"view_html":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT","download_json":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT.json","view_paper":"https://pith.science/paper/QKJPCUZF","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.17855&json=true","fetch_graph":"https://pith.science/api/pith-number/QKJPCUZF4W5AE62HTEJ6C5DBZT/graph.json","fetch_events":"https://pith.science/api/pith-number/QKJPCUZF4W5AE62HTEJ6C5DBZT/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT/action/storage_attestation","attest_author":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT/action/author_attestation","sign_citation":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT/action/citation_signature","submit_replication":"https://pith.science/pith/QKJPCUZF4W5AE62HTEJ6C5DBZT/action/replication_record"}},"created_at":"2026-05-18T03:09:33.011836+00:00","updated_at":"2026-05-18T03:09:33.011836+00:00"}