{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:RGF3IT3NJSZSE37IDHNEXWNJPA","short_pith_number":"pith:RGF3IT3N","schema_version":"1.0","canonical_sha256":"898bb44f6d4cb3226fe819da4bd9a9781382e099653aff3432451b89a2a7c8cf","source":{"kind":"arxiv","id":"1902.03142","version":1},"attestation_state":"computed","paper":{"title":"Novelty Search for Deep Reinforcement Learning Policy Network Weights by Action Sequence Edit Metric Distance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Ethan C. Jackson, Mark Daley","submitted_at":"2019-02-08T15:14:09Z","abstract_excerpt":"Reinforcement learning (RL) problems often feature deceptive local optima, and learning methods that optimize purely for reward signal often fail to learn strategies for overcoming them. Deep neuroevolution and novelty search have been proposed as effective alternatives to gradient-based methods for learning RL policies directly from pixels. In this paper, we introduce and evaluate the use of novelty search over agent action sequences by string edit metric distance as a means for promoting innovation. We also introduce a method for stagnation detection and population resampling inspired by rec"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.03142","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-02-08T15:14:09Z","cross_cats_sorted":[],"title_canon_sha256":"7d40d7c1a58fa1d55711ea7dd08045461a9ed61a24b7a5d5e4841c70ad770acb","abstract_canon_sha256":"2347ffe499709f542d2f623531329b4fe57cbb2622ccd402ebb3335194be7540"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:27.824239Z","signature_b64":"Rrhclf0IU2z0aZizA1juFRY51Hrzj/LjctYcwAuvtcPrrGcfvngkbF/oxMUCgwdH0BhYhTqSikyfB7KAoJi6BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"898bb44f6d4cb3226fe819da4bd9a9781382e099653aff3432451b89a2a7c8cf","last_reissued_at":"2026-05-17T23:54:27.823526Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:27.823526Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Novelty Search for Deep Reinforcement Learning Policy Network Weights by Action Sequence Edit Metric Distance","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Ethan C. Jackson, Mark Daley","submitted_at":"2019-02-08T15:14:09Z","abstract_excerpt":"Reinforcement learning (RL) problems often feature deceptive local optima, and learning methods that optimize purely for reward signal often fail to learn strategies for overcoming them. Deep neuroevolution and novelty search have been proposed as effective alternatives to gradient-based methods for learning RL policies directly from pixels. In this paper, we introduce and evaluate the use of novelty search over agent action sequences by string edit metric distance as a means for promoting innovation. We also introduce a method for stagnation detection and population resampling inspired by rec"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.03142","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.03142","created_at":"2026-05-17T23:54:27.823644+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.03142v1","created_at":"2026-05-17T23:54:27.823644+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.03142","created_at":"2026-05-17T23:54:27.823644+00:00"},{"alias_kind":"pith_short_12","alias_value":"RGF3IT3NJSZS","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"RGF3IT3NJSZSE37I","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"RGF3IT3N","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA","json":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA.json","graph_json":"https://pith.science/api/pith-number/RGF3IT3NJSZSE37IDHNEXWNJPA/graph.json","events_json":"https://pith.science/api/pith-number/RGF3IT3NJSZSE37IDHNEXWNJPA/events.json","paper":"https://pith.science/paper/RGF3IT3N"},"agent_actions":{"view_html":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA","download_json":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA.json","view_paper":"https://pith.science/paper/RGF3IT3N","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.03142&json=true","fetch_graph":"https://pith.science/api/pith-number/RGF3IT3NJSZSE37IDHNEXWNJPA/graph.json","fetch_events":"https://pith.science/api/pith-number/RGF3IT3NJSZSE37IDHNEXWNJPA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA/action/storage_attestation","attest_author":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA/action/author_attestation","sign_citation":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA/action/citation_signature","submit_replication":"https://pith.science/pith/RGF3IT3NJSZSE37IDHNEXWNJPA/action/replication_record"}},"created_at":"2026-05-17T23:54:27.823644+00:00","updated_at":"2026-05-17T23:54:27.823644+00:00"}