{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:735KRB4TFRHOF4MFVVCQBYE5PQ","short_pith_number":"pith:735KRB4T","schema_version":"1.0","canonical_sha256":"fefaa887932c4ee2f185ad4500e09d7c3aca8e4ef070a56346aa6beb482f4ff9","source":{"kind":"arxiv","id":"1902.04198","version":2},"attestation_state":"computed","paper":{"title":"Preferences Implicit in the State of the World","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anca Dragan, Dmitrii Krasheninnikov, Jordan Alexander, Pieter Abbeel, Rohin Shah","submitted_at":"2019-02-12T00:50:56Z","abstract_excerpt":"Reinforcement learning (RL) agents optimize only the features specified in a reward function and are indifferent to anything left out inadvertently. This means that we must not only specify what to do, but also the much larger space of what not to do. It is easy to forget these preferences, since these preferences are already satisfied in our environment. This motivates our key insight: when a robot is deployed in an environment that humans act in, the state of the environment is already optimized for what humans want. We can therefore use this implicit preference information from the state to"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.04198","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T00:50:56Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"9b147d473fba77689122c5f0407094427cd481d7fac09d8a3c4611c0cf520622","abstract_canon_sha256":"389b9f0cb3844716f12b3d8ed7f189c48af278c9d1fe60f228f0cfd14bd14088"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:11.090902Z","signature_b64":"A/WGp/fHByO3NrNF8+eUnyxBzqNmnx3c0CNv6ULKnMMqi42JKUtlBQAY7XOV4zn5V1TCLCQTZ+inDeilDNz5Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fefaa887932c4ee2f185ad4500e09d7c3aca8e4ef070a56346aa6beb482f4ff9","last_reissued_at":"2026-05-17T23:48:11.090094Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:11.090094Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Preferences Implicit in the State of the World","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anca Dragan, Dmitrii Krasheninnikov, Jordan Alexander, Pieter Abbeel, Rohin Shah","submitted_at":"2019-02-12T00:50:56Z","abstract_excerpt":"Reinforcement learning (RL) agents optimize only the features specified in a reward function and are indifferent to anything left out inadvertently. This means that we must not only specify what to do, but also the much larger space of what not to do. It is easy to forget these preferences, since these preferences are already satisfied in our environment. This motivates our key insight: when a robot is deployed in an environment that humans act in, the state of the environment is already optimized for what humans want. We can therefore use this implicit preference information from the state to"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.04198","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.04198","created_at":"2026-05-17T23:48:11.090228+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.04198v2","created_at":"2026-05-17T23:48:11.090228+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.04198","created_at":"2026-05-17T23:48:11.090228+00:00"},{"alias_kind":"pith_short_12","alias_value":"735KRB4TFRHO","created_at":"2026-05-18T12:33:10.108867+00:00"},{"alias_kind":"pith_short_16","alias_value":"735KRB4TFRHOF4MF","created_at":"2026-05-18T12:33:10.108867+00:00"},{"alias_kind":"pith_short_8","alias_value":"735KRB4T","created_at":"2026-05-18T12:33:10.108867+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ","json":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ.json","graph_json":"https://pith.science/api/pith-number/735KRB4TFRHOF4MFVVCQBYE5PQ/graph.json","events_json":"https://pith.science/api/pith-number/735KRB4TFRHOF4MFVVCQBYE5PQ/events.json","paper":"https://pith.science/paper/735KRB4T"},"agent_actions":{"view_html":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ","download_json":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ.json","view_paper":"https://pith.science/paper/735KRB4T","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.04198&json=true","fetch_graph":"https://pith.science/api/pith-number/735KRB4TFRHOF4MFVVCQBYE5PQ/graph.json","fetch_events":"https://pith.science/api/pith-number/735KRB4TFRHOF4MFVVCQBYE5PQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ/action/storage_attestation","attest_author":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ/action/author_attestation","sign_citation":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ/action/citation_signature","submit_replication":"https://pith.science/pith/735KRB4TFRHOF4MFVVCQBYE5PQ/action/replication_record"}},"created_at":"2026-05-17T23:48:11.090228+00:00","updated_at":"2026-05-17T23:48:11.090228+00:00"}