{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:PO5I7T5B7775OGUZ6TBF27C5KX","short_pith_number":"pith:PO5I7T5B","canonical_record":{"source":{"id":"1902.00528","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T19:23:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"68581f316414404a1517dee50fe49490530ffa06493c8a3f11a9a4a69fc1655b","abstract_canon_sha256":"a94accdfa0fc977ff38f1befb88cc0da0e79ea230f4a6a605e7cd8cd0616b8ce"},"schema_version":"1.0"},"canonical_sha256":"7bba8fcfa1ffffd71a99f4c25d7c5d55d3a48e3b78871b2ffb10200fc4e6419e","source":{"kind":"arxiv","id":"1902.00528","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.00528","created_at":"2026-05-17T23:53:49Z"},{"alias_kind":"arxiv_version","alias_value":"1902.00528v4","created_at":"2026-05-17T23:53:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.00528","created_at":"2026-05-17T23:53:49Z"},{"alias_kind":"pith_short_12","alias_value":"PO5I7T5B7775","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PO5I7T5B7775OGUZ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PO5I7T5B","created_at":"2026-05-18T12:33:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:PO5I7T5B7775OGUZ6TBF27C5KX","target":"record","payload":{"canonical_record":{"source":{"id":"1902.00528","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T19:23:58Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"68581f316414404a1517dee50fe49490530ffa06493c8a3f11a9a4a69fc1655b","abstract_canon_sha256":"a94accdfa0fc977ff38f1befb88cc0da0e79ea230f4a6a605e7cd8cd0616b8ce"},"schema_version":"1.0"},"canonical_sha256":"7bba8fcfa1ffffd71a99f4c25d7c5d55d3a48e3b78871b2ffb10200fc4e6419e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:53:49.411198Z","signature_b64":"Hi8v/r5Gpfvz51jPUb9nKJqcEuRGaOp1Kcof2pdNGcJO3GXbiHgzzwRg9ocTGH2o9SclTGp46PT49IoRDb1dDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7bba8fcfa1ffffd71a99f4c25d7c5d55d3a48e3b78871b2ffb10200fc4e6419e","last_reissued_at":"2026-05-17T23:53:49.410556Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:53:49.410556Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1902.00528","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:53:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EKpkYpIK+gTglAjsUYL1Hw/m0aSkev3FTlFoCy0GbwvuhRM1RMXUD8eBctxJrRq1VU5yh9YGxgrBjhPHECvLBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T19:50:51.365379Z"},"content_sha256":"d516204725b22c6291ca589ae0f6121fe8c1c8ad879b7e7fcd6cbd1d19f94148","schema_version":"1.0","event_id":"sha256:d516204725b22c6291ca589ae0f6121fe8c1c8ad879b7e7fcd6cbd1d19f94148"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:PO5I7T5B7775OGUZ6TBF27C5KX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Competitive Experience Replay","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexander Trott, Caiming Xiong, Hao Liu, Richard Socher","submitted_at":"2019-02-01T19:23:58Z","abstract_excerpt":"Deep learning has achieved remarkable successes in solving challenging reinforcement learning (RL) problems when dense reward function is provided. However, in sparse reward environment it still often suffers from the need to carefully shape reward function to guide policy optimization. This limits the applicability of RL in the real world since both reinforcement learning and domain-specific knowledge are required. It is therefore of great practical importance to develop algorithms which can learn from a binary signal indicating successful task completion or other unshaped, sparse reward sign"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.00528","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:53:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W1+xArLpSQIlLxsFkL/Fp10DYpOMNIsen5f3I8flEGqrXrzxz5NAcsPEN1VpApGPD1aCD3TcIS/At+3Q5YlFDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T19:50:51.366071Z"},"content_sha256":"71c43fb0f22a4a9553c3427f19bd9879ee3047ac5a50bd8344046f0e13f0d75d","schema_version":"1.0","event_id":"sha256:71c43fb0f22a4a9553c3427f19bd9879ee3047ac5a50bd8344046f0e13f0d75d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PO5I7T5B7775OGUZ6TBF27C5KX/bundle.json","state_url":"https://pith.science/pith/PO5I7T5B7775OGUZ6TBF27C5KX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PO5I7T5B7775OGUZ6TBF27C5KX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T19:50:51Z","links":{"resolver":"https://pith.science/pith/PO5I7T5B7775OGUZ6TBF27C5KX","bundle":"https://pith.science/pith/PO5I7T5B7775OGUZ6TBF27C5KX/bundle.json","state":"https://pith.science/pith/PO5I7T5B7775OGUZ6TBF27C5KX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PO5I7T5B7775OGUZ6TBF27C5KX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:PO5I7T5B7775OGUZ6TBF27C5KX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a94accdfa0fc977ff38f1befb88cc0da0e79ea230f4a6a605e7cd8cd0616b8ce","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T19:23:58Z","title_canon_sha256":"68581f316414404a1517dee50fe49490530ffa06493c8a3f11a9a4a69fc1655b"},"schema_version":"1.0","source":{"id":"1902.00528","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.00528","created_at":"2026-05-17T23:53:49Z"},{"alias_kind":"arxiv_version","alias_value":"1902.00528v4","created_at":"2026-05-17T23:53:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.00528","created_at":"2026-05-17T23:53:49Z"},{"alias_kind":"pith_short_12","alias_value":"PO5I7T5B7775","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_16","alias_value":"PO5I7T5B7775OGUZ","created_at":"2026-05-18T12:33:24Z"},{"alias_kind":"pith_short_8","alias_value":"PO5I7T5B","created_at":"2026-05-18T12:33:24Z"}],"graph_snapshots":[{"event_id":"sha256:71c43fb0f22a4a9553c3427f19bd9879ee3047ac5a50bd8344046f0e13f0d75d","target":"graph","created_at":"2026-05-17T23:53:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Deep learning has achieved remarkable successes in solving challenging reinforcement learning (RL) problems when dense reward function is provided. However, in sparse reward environment it still often suffers from the need to carefully shape reward function to guide policy optimization. This limits the applicability of RL in the real world since both reinforcement learning and domain-specific knowledge are required. It is therefore of great practical importance to develop algorithms which can learn from a binary signal indicating successful task completion or other unshaped, sparse reward sign","authors_text":"Alexander Trott, Caiming Xiong, Hao Liu, Richard Socher","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T19:23:58Z","title":"Competitive Experience Replay"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.00528","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d516204725b22c6291ca589ae0f6121fe8c1c8ad879b7e7fcd6cbd1d19f94148","target":"record","created_at":"2026-05-17T23:53:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a94accdfa0fc977ff38f1befb88cc0da0e79ea230f4a6a605e7cd8cd0616b8ce","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T19:23:58Z","title_canon_sha256":"68581f316414404a1517dee50fe49490530ffa06493c8a3f11a9a4a69fc1655b"},"schema_version":"1.0","source":{"id":"1902.00528","kind":"arxiv","version":4}},"canonical_sha256":"7bba8fcfa1ffffd71a99f4c25d7c5d55d3a48e3b78871b2ffb10200fc4e6419e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7bba8fcfa1ffffd71a99f4c25d7c5d55d3a48e3b78871b2ffb10200fc4e6419e","first_computed_at":"2026-05-17T23:53:49.410556Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:53:49.410556Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Hi8v/r5Gpfvz51jPUb9nKJqcEuRGaOp1Kcof2pdNGcJO3GXbiHgzzwRg9ocTGH2o9SclTGp46PT49IoRDb1dDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:53:49.411198Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.00528","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d516204725b22c6291ca589ae0f6121fe8c1c8ad879b7e7fcd6cbd1d19f94148","sha256:71c43fb0f22a4a9553c3427f19bd9879ee3047ac5a50bd8344046f0e13f0d75d"],"state_sha256":"65dc76559270c2194f2f089ef74f93d44159ec456f9f4d370dc9cb7ba6345720"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9w4Q5V+Ey7JwRQ507I8bnyMpMo2PMokgO8lYoiaayZqTGakSsp+J9lIB99zX9PQrLHnNrNzpnN7ZSpSSNBi0Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T19:50:51.369563Z","bundle_sha256":"c0f4bb83d1d9fcd4da7048c32de28d6b6e6cd47e5d62971ae676a0117d61fa16"}}