{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:SYM3L23KOPUNJPYPCO3YHP6P46","short_pith_number":"pith:SYM3L23K","schema_version":"1.0","canonical_sha256":"9619b5eb6a73e8d4bf0f13b783bfcfe78a7471d98e64dda616f47405e1ecd18f","source":{"kind":"arxiv","id":"1901.08654","version":1},"attestation_state":"computed","paper":{"title":"The Assistive Multi-Armed Bandit","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anca Dragan, Dylan Hadfield-Menell, Lawrence Chan, Siddhartha Srinivasa","submitted_at":"2019-01-24T21:52:01Z","abstract_excerpt":"Learning preferences implicit in the choices humans make is a well studied problem in both economics and computer science. However, most work makes the assumption that humans are acting (noisily) optimally with respect to their preferences. Such approaches can fail when people are themselves learning about what they want. In this work, we introduce the assistive multi-armed bandit, where a robot assists a human playing a bandit task to maximize cumulative reward. In this problem, the human does not know the reward function but can learn it through the rewards received from arm pulls; the robot"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.08654","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-01-24T21:52:01Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"92832c9030df8aeb2d354cb55756377ff0617bab465ebc313efe00a87e39494f","abstract_canon_sha256":"1373a50e49845b55de47510ac5cf2b4ae97df3b7ffa3dec9cc462c80c75710c2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:33.482981Z","signature_b64":"6LbYuLP7RbDEgwH6jm0yl8QfA/bGzwjHbwZPwCyyLcrUXE3TYPNCNWNfXpeAqGNjvgGfW+RKVJfNmNFcZNHBAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9619b5eb6a73e8d4bf0f13b783bfcfe78a7471d98e64dda616f47405e1ecd18f","last_reissued_at":"2026-05-17T23:55:33.482462Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:33.482462Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Assistive Multi-Armed Bandit","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anca Dragan, Dylan Hadfield-Menell, Lawrence Chan, Siddhartha Srinivasa","submitted_at":"2019-01-24T21:52:01Z","abstract_excerpt":"Learning preferences implicit in the choices humans make is a well studied problem in both economics and computer science. However, most work makes the assumption that humans are acting (noisily) optimally with respect to their preferences. Such approaches can fail when people are themselves learning about what they want. In this work, we introduce the assistive multi-armed bandit, where a robot assists a human playing a bandit task to maximize cumulative reward. In this problem, the human does not know the reward function but can learn it through the rewards received from arm pulls; the robot"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.08654","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.08654","created_at":"2026-05-17T23:55:33.482541+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.08654v1","created_at":"2026-05-17T23:55:33.482541+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.08654","created_at":"2026-05-17T23:55:33.482541+00:00"},{"alias_kind":"pith_short_12","alias_value":"SYM3L23KOPUN","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"SYM3L23KOPUNJPYP","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"SYM3L23K","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46","json":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46.json","graph_json":"https://pith.science/api/pith-number/SYM3L23KOPUNJPYPCO3YHP6P46/graph.json","events_json":"https://pith.science/api/pith-number/SYM3L23KOPUNJPYPCO3YHP6P46/events.json","paper":"https://pith.science/paper/SYM3L23K"},"agent_actions":{"view_html":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46","download_json":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46.json","view_paper":"https://pith.science/paper/SYM3L23K","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.08654&json=true","fetch_graph":"https://pith.science/api/pith-number/SYM3L23KOPUNJPYPCO3YHP6P46/graph.json","fetch_events":"https://pith.science/api/pith-number/SYM3L23KOPUNJPYPCO3YHP6P46/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46/action/storage_attestation","attest_author":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46/action/author_attestation","sign_citation":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46/action/citation_signature","submit_replication":"https://pith.science/pith/SYM3L23KOPUNJPYPCO3YHP6P46/action/replication_record"}},"created_at":"2026-05-17T23:55:33.482541+00:00","updated_at":"2026-05-17T23:55:33.482541+00:00"}