{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ORM2PAZ47EUR6CB6SQOPXZXHW2","short_pith_number":"pith:ORM2PAZ4","schema_version":"1.0","canonical_sha256":"7459a7833cf9291f083e941cfbe6e7b6afbc318a8d638ecf4d2bfe59d0c03bf7","source":{"kind":"arxiv","id":"1806.00553","version":3},"attestation_state":"computed","paper":{"title":"Deep Curiosity Search: Intra-Life Exploration Can Improve Performance on Challenging Deep Reinforcement Learning Problems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Christopher Stanton, Jeff Clune","submitted_at":"2018-06-01T22:09:51Z","abstract_excerpt":"Traditional exploration methods in RL require agents to perform random actions to find rewards. But these approaches struggle on sparse-reward domains like Montezuma's Revenge where the probability that any random action sequence leads to reward is extremely low. Recent algorithms have performed well on such tasks by encouraging agents to visit new states or perform new actions in relation to all prior training episodes (which we call across-training novelty). But such algorithms do not consider whether an agent exhibits intra-life novelty: doing something new within the current episode, regar"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1806.00553","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-06-01T22:09:51Z","cross_cats_sorted":[],"title_canon_sha256":"4c0d5d65c34962a18d775ce549ac9483b6117aa33aad17672b48ccb5c74b5856","abstract_canon_sha256":"172c8bc03f521c9d34def4895dbfad1d74abf6a3e1947be357ffcfcebc1e3bb1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:00:02.047865Z","signature_b64":"As1x5QrhQt+yb8g9YPZOl8189hMHJUmc8ho6E6mMcRg5/dRczLTSGH8Sx5pGi7XZ2sDwxb5BVcN6gmI6jbW8BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7459a7833cf9291f083e941cfbe6e7b6afbc318a8d638ecf4d2bfe59d0c03bf7","last_reissued_at":"2026-05-18T00:00:02.047341Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:00:02.047341Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Deep Curiosity Search: Intra-Life Exploration Can Improve Performance on Challenging Deep Reinforcement Learning Problems","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Christopher Stanton, Jeff Clune","submitted_at":"2018-06-01T22:09:51Z","abstract_excerpt":"Traditional exploration methods in RL require agents to perform random actions to find rewards. But these approaches struggle on sparse-reward domains like Montezuma's Revenge where the probability that any random action sequence leads to reward is extremely low. Recent algorithms have performed well on such tasks by encouraging agents to visit new states or perform new actions in relation to all prior training episodes (which we call across-training novelty). But such algorithms do not consider whether an agent exhibits intra-life novelty: doing something new within the current episode, regar"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.00553","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1806.00553","created_at":"2026-05-18T00:00:02.047423+00:00"},{"alias_kind":"arxiv_version","alias_value":"1806.00553v3","created_at":"2026-05-18T00:00:02.047423+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.00553","created_at":"2026-05-18T00:00:02.047423+00:00"},{"alias_kind":"pith_short_12","alias_value":"ORM2PAZ47EUR","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_16","alias_value":"ORM2PAZ47EUR6CB6","created_at":"2026-05-18T12:32:43.782077+00:00"},{"alias_kind":"pith_short_8","alias_value":"ORM2PAZ4","created_at":"2026-05-18T12:32:43.782077+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2","json":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2.json","graph_json":"https://pith.science/api/pith-number/ORM2PAZ47EUR6CB6SQOPXZXHW2/graph.json","events_json":"https://pith.science/api/pith-number/ORM2PAZ47EUR6CB6SQOPXZXHW2/events.json","paper":"https://pith.science/paper/ORM2PAZ4"},"agent_actions":{"view_html":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2","download_json":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2.json","view_paper":"https://pith.science/paper/ORM2PAZ4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1806.00553&json=true","fetch_graph":"https://pith.science/api/pith-number/ORM2PAZ47EUR6CB6SQOPXZXHW2/graph.json","fetch_events":"https://pith.science/api/pith-number/ORM2PAZ47EUR6CB6SQOPXZXHW2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2/action/storage_attestation","attest_author":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2/action/author_attestation","sign_citation":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2/action/citation_signature","submit_replication":"https://pith.science/pith/ORM2PAZ47EUR6CB6SQOPXZXHW2/action/replication_record"}},"created_at":"2026-05-18T00:00:02.047423+00:00","updated_at":"2026-05-18T00:00:02.047423+00:00"}