{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:R2UBDYQVXPHBZJBOHLKT7YMBEL","short_pith_number":"pith:R2UBDYQV","schema_version":"1.0","canonical_sha256":"8ea811e215bbce1ca42e3ad53fe18122c79577315fc4b9a7c0374b4c049dc802","source":{"kind":"arxiv","id":"1611.01211","version":8},"attestation_state":"computed","paper":{"title":"Combating Reinforcement Learning's Sisyphean Curse with Intrinsic Fear","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE","stat.ML"],"primary_cat":"cs.LG","authors_text":"Abhishek Kumar, Jianfeng Gao, Kamyar Azizzadenesheli, Li Deng, Lihong Li, Zachary C. Lipton","submitted_at":"2016-11-03T22:30:10Z","abstract_excerpt":"Many practical environments contain catastrophic states that an optimal agent would visit infrequently or never. Even on toy problems, Deep Reinforcement Learning (DRL) agents tend to periodically revisit these states upon forgetting their existence under a new policy. We introduce intrinsic fear (IF), a learned reward shaping that guards DRL agents against periodic catastrophes. IF agents possess a fear model trained to predict the probability of imminent catastrophe. This score is then used to penalize the Q-learning objective. Our theoretical analysis bounds the reduction in average return "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1611.01211","kind":"arxiv","version":8},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-03T22:30:10Z","cross_cats_sorted":["cs.NE","stat.ML"],"title_canon_sha256":"ee030de06c0d85b74483e13735808f14c4638155bab4fde6e9e8bd1764608280","abstract_canon_sha256":"0be0952dc5209fd45393ec9452d80dab69bd5a6b79da6932ac0c4ff5d972d237"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:21:05.114266Z","signature_b64":"LEDVSzUnMXqU8tTW7J2OOV5Dq1Fma86l8L3YbbwhJ1Ed/+y1mjJT+pxdiDj36FQJVU8be5pnP+RRY/4i7krQDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8ea811e215bbce1ca42e3ad53fe18122c79577315fc4b9a7c0374b4c049dc802","last_reissued_at":"2026-05-18T00:21:05.113532Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:21:05.113532Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Combating Reinforcement Learning's Sisyphean Curse with Intrinsic Fear","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.NE","stat.ML"],"primary_cat":"cs.LG","authors_text":"Abhishek Kumar, Jianfeng Gao, Kamyar Azizzadenesheli, Li Deng, Lihong Li, Zachary C. Lipton","submitted_at":"2016-11-03T22:30:10Z","abstract_excerpt":"Many practical environments contain catastrophic states that an optimal agent would visit infrequently or never. Even on toy problems, Deep Reinforcement Learning (DRL) agents tend to periodically revisit these states upon forgetting their existence under a new policy. We introduce intrinsic fear (IF), a learned reward shaping that guards DRL agents against periodic catastrophes. IF agents possess a fear model trained to predict the probability of imminent catastrophe. This score is then used to penalize the Q-learning objective. Our theoretical analysis bounds the reduction in average return "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.01211","kind":"arxiv","version":8},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1611.01211","created_at":"2026-05-18T00:21:05.113646+00:00"},{"alias_kind":"arxiv_version","alias_value":"1611.01211v8","created_at":"2026-05-18T00:21:05.113646+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.01211","created_at":"2026-05-18T00:21:05.113646+00:00"},{"alias_kind":"pith_short_12","alias_value":"R2UBDYQVXPHB","created_at":"2026-05-18T12:30:41.710351+00:00"},{"alias_kind":"pith_short_16","alias_value":"R2UBDYQVXPHBZJBO","created_at":"2026-05-18T12:30:41.710351+00:00"},{"alias_kind":"pith_short_8","alias_value":"R2UBDYQV","created_at":"2026-05-18T12:30:41.710351+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.01475","citing_title":"Generalizing from a few environments in safety-critical reinforcement learning","ref_index":23,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL","json":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL.json","graph_json":"https://pith.science/api/pith-number/R2UBDYQVXPHBZJBOHLKT7YMBEL/graph.json","events_json":"https://pith.science/api/pith-number/R2UBDYQVXPHBZJBOHLKT7YMBEL/events.json","paper":"https://pith.science/paper/R2UBDYQV"},"agent_actions":{"view_html":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL","download_json":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL.json","view_paper":"https://pith.science/paper/R2UBDYQV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1611.01211&json=true","fetch_graph":"https://pith.science/api/pith-number/R2UBDYQVXPHBZJBOHLKT7YMBEL/graph.json","fetch_events":"https://pith.science/api/pith-number/R2UBDYQVXPHBZJBOHLKT7YMBEL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL/action/storage_attestation","attest_author":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL/action/author_attestation","sign_citation":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL/action/citation_signature","submit_replication":"https://pith.science/pith/R2UBDYQVXPHBZJBOHLKT7YMBEL/action/replication_record"}},"created_at":"2026-05-18T00:21:05.113646+00:00","updated_at":"2026-05-18T00:21:05.113646+00:00"}