{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:LF4AW44Y6KYL3PR5QZQ4AYELF3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"545c40f0b1837ea335f6cc8e288321bace6893fa9b6550cbbf9ddd77a455f074","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T06:45:21Z","title_canon_sha256":"d8094d4bfb56b5dc6700c84e8977d3cb77e6def2ff39bb10aff2da15cd0d0a21"},"schema_version":"1.0","source":{"id":"1902.04257","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.04257","created_at":"2026-05-17T23:54:12Z"},{"alias_kind":"arxiv_version","alias_value":"1902.04257v1","created_at":"2026-05-17T23:54:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.04257","created_at":"2026-05-17T23:54:12Z"},{"alias_kind":"pith_short_12","alias_value":"LF4AW44Y6KYL","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_16","alias_value":"LF4AW44Y6KYL3PR5","created_at":"2026-05-18T12:33:21Z"},{"alias_kind":"pith_short_8","alias_value":"LF4AW44Y","created_at":"2026-05-18T12:33:21Z"}],"graph_snapshots":[{"event_id":"sha256:1bfcfa94f0d972f85053e50072989d1602fc3029963d0cef64676d6e13653ec0","target":"graph","created_at":"2026-05-17T23:54:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"To widen their accessibility and increase their utility, intelligent agents must be able to learn complex behaviors as specified by (non-expert) human users. Moreover, they will need to learn these behaviors within a reasonable amount of time while efficiently leveraging the sparse feedback a human trainer is capable of providing. Recent work has shown that human feedback can be characterized as a critique of an agent's current behavior rather than as an alternative reward signal to be maximized, culminating in the COnvergent Actor-Critic by Humans (COACH) algorithm for making direct policy up","authors_text":"Dilip Arumugam, Jun Ki Lee, Michael L. Littman, Sophie Saskin","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T06:45:21Z","title":"Deep Reinforcement Learning from Policy-Dependent Human Feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.04257","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a2e3b86cbc3910828fc9727f776a47b2fdc25258336359f079686ffa88685c0b","target":"record","created_at":"2026-05-17T23:54:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"545c40f0b1837ea335f6cc8e288321bace6893fa9b6550cbbf9ddd77a455f074","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T06:45:21Z","title_canon_sha256":"d8094d4bfb56b5dc6700c84e8977d3cb77e6def2ff39bb10aff2da15cd0d0a21"},"schema_version":"1.0","source":{"id":"1902.04257","kind":"arxiv","version":1}},"canonical_sha256":"59780b7398f2b0bdbe3d8661c0608b2eda1f3bc6a056c178621a36b2c50432d9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"59780b7398f2b0bdbe3d8661c0608b2eda1f3bc6a056c178621a36b2c50432d9","first_computed_at":"2026-05-17T23:54:12.731455Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:54:12.731455Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"v2Ktg3OsjKPilJmmfbBIWcaP+LRORBQBQ96xgXFrdYb+N8vnD62NDMMNYiGIsc5trSmeCtMCIlZJuxwcjsxdCQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:54:12.731896Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.04257","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a2e3b86cbc3910828fc9727f776a47b2fdc25258336359f079686ffa88685c0b","sha256:1bfcfa94f0d972f85053e50072989d1602fc3029963d0cef64676d6e13653ec0"],"state_sha256":"8d2a5cd20af7dbcb7f8de4615ca2832366a597d782f890a564e1f9c72906a967"}