{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:DZMOQKYP5CUAQ54MPB3XOSKMTH","short_pith_number":"pith:DZMOQKYP","canonical_record":{"source":{"id":"1512.04860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-15T17:13:49Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"d6f38ad246e599b6940bef5e4290001bdb6d12c324374832951725a2f2dd0c9b","abstract_canon_sha256":"150ad4a54cd49ed48064f51758b087458606123b83e39d81701f4b9b75b93d7e"},"schema_version":"1.0"},"canonical_sha256":"1e58e82b0fe8a808778c787777494c99de8bdd0e544075015a26a88bb2405ec9","source":{"kind":"arxiv","id":"1512.04860","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.04860","created_at":"2026-05-18T01:24:15Z"},{"alias_kind":"arxiv_version","alias_value":"1512.04860v1","created_at":"2026-05-18T01:24:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.04860","created_at":"2026-05-18T01:24:15Z"},{"alias_kind":"pith_short_12","alias_value":"DZMOQKYP5CUA","created_at":"2026-05-18T12:29:17Z"},{"alias_kind":"pith_short_16","alias_value":"DZMOQKYP5CUAQ54M","created_at":"2026-05-18T12:29:17Z"},{"alias_kind":"pith_short_8","alias_value":"DZMOQKYP","created_at":"2026-05-18T12:29:17Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:DZMOQKYP5CUAQ54MPB3XOSKMTH","target":"record","payload":{"canonical_record":{"source":{"id":"1512.04860","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-15T17:13:49Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"d6f38ad246e599b6940bef5e4290001bdb6d12c324374832951725a2f2dd0c9b","abstract_canon_sha256":"150ad4a54cd49ed48064f51758b087458606123b83e39d81701f4b9b75b93d7e"},"schema_version":"1.0"},"canonical_sha256":"1e58e82b0fe8a808778c787777494c99de8bdd0e544075015a26a88bb2405ec9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:24:15.256216Z","signature_b64":"H1tb1BGDGIEJrgb4zGSNx0fOtM+2dy9LrfLEyTfBfm1DWkOzvSlMVC17KdSKmf+x48b0LI+mD0enouKLch2DCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1e58e82b0fe8a808778c787777494c99de8bdd0e544075015a26a88bb2405ec9","last_reissued_at":"2026-05-18T01:24:15.255550Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:24:15.255550Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1512.04860","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:24:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jsDI2GzUySyn3oAs09c1HpRh42xNcFEhUboRFYfBXAUkir3eUFKn/5KTojpSBLjV0M7d3BMUgQadiPz2bR0CAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T17:18:44.675343Z"},"content_sha256":"f11244a78619665f1aa13a1270c644c8ae0ff07431648d65e0038a81dc6a013d","schema_version":"1.0","event_id":"sha256:f11244a78619665f1aa13a1270c644c8ae0ff07431648d65e0038a81dc6a013d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:DZMOQKYP5CUAQ54MPB3XOSKMTH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Increasing the Action Gap: New Operators for Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Arthur Guez, Georg Ostrovski, Marc G. Bellemare, Philip S. Thomas, R\\'emi Munos","submitted_at":"2015-12-15T17:13:49Z","abstract_excerpt":"This paper introduces new optimality-preserving operators on Q-functions. We first describe an operator for tabular representations, the consistent Bellman operator, which incorporates a notion of local policy consistency. We show that this local consistency leads to an increase in the action gap at each state; increasing this gap, we argue, mitigates the undesirable effects of approximation and estimation errors on the induced greedy policies. This operator can also be applied to discretized continuous space and time problems, and we provide empirical results evidencing superior performance i"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.04860","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:24:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5rhIHZ/I6hJP4J+D9uafAER3+ukibWEMmVUbavwq9UuMF2c9uIMYsUU4enhi4NsqLqQn9vNW8os4pXQPSnZIAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T17:18:44.675747Z"},"content_sha256":"1c3225fe87c6bf57c2ec0a9ab46492b8f6dc002dc4924e515f1e9008e40b2620","schema_version":"1.0","event_id":"sha256:1c3225fe87c6bf57c2ec0a9ab46492b8f6dc002dc4924e515f1e9008e40b2620"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH/bundle.json","state_url":"https://pith.science/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T17:18:44Z","links":{"resolver":"https://pith.science/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH","bundle":"https://pith.science/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH/bundle.json","state":"https://pith.science/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DZMOQKYP5CUAQ54MPB3XOSKMTH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:DZMOQKYP5CUAQ54MPB3XOSKMTH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"150ad4a54cd49ed48064f51758b087458606123b83e39d81701f4b9b75b93d7e","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-15T17:13:49Z","title_canon_sha256":"d6f38ad246e599b6940bef5e4290001bdb6d12c324374832951725a2f2dd0c9b"},"schema_version":"1.0","source":{"id":"1512.04860","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1512.04860","created_at":"2026-05-18T01:24:15Z"},{"alias_kind":"arxiv_version","alias_value":"1512.04860v1","created_at":"2026-05-18T01:24:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.04860","created_at":"2026-05-18T01:24:15Z"},{"alias_kind":"pith_short_12","alias_value":"DZMOQKYP5CUA","created_at":"2026-05-18T12:29:17Z"},{"alias_kind":"pith_short_16","alias_value":"DZMOQKYP5CUAQ54M","created_at":"2026-05-18T12:29:17Z"},{"alias_kind":"pith_short_8","alias_value":"DZMOQKYP","created_at":"2026-05-18T12:29:17Z"}],"graph_snapshots":[{"event_id":"sha256:1c3225fe87c6bf57c2ec0a9ab46492b8f6dc002dc4924e515f1e9008e40b2620","target":"graph","created_at":"2026-05-18T01:24:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"This paper introduces new optimality-preserving operators on Q-functions. We first describe an operator for tabular representations, the consistent Bellman operator, which incorporates a notion of local policy consistency. We show that this local consistency leads to an increase in the action gap at each state; increasing this gap, we argue, mitigates the undesirable effects of approximation and estimation errors on the induced greedy policies. This operator can also be applied to discretized continuous space and time problems, and we provide empirical results evidencing superior performance i","authors_text":"Arthur Guez, Georg Ostrovski, Marc G. Bellemare, Philip S. Thomas, R\\'emi Munos","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-15T17:13:49Z","title":"Increasing the Action Gap: New Operators for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.04860","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f11244a78619665f1aa13a1270c644c8ae0ff07431648d65e0038a81dc6a013d","target":"record","created_at":"2026-05-18T01:24:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"150ad4a54cd49ed48064f51758b087458606123b83e39d81701f4b9b75b93d7e","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2015-12-15T17:13:49Z","title_canon_sha256":"d6f38ad246e599b6940bef5e4290001bdb6d12c324374832951725a2f2dd0c9b"},"schema_version":"1.0","source":{"id":"1512.04860","kind":"arxiv","version":1}},"canonical_sha256":"1e58e82b0fe8a808778c787777494c99de8bdd0e544075015a26a88bb2405ec9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1e58e82b0fe8a808778c787777494c99de8bdd0e544075015a26a88bb2405ec9","first_computed_at":"2026-05-18T01:24:15.255550Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:24:15.255550Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"H1tb1BGDGIEJrgb4zGSNx0fOtM+2dy9LrfLEyTfBfm1DWkOzvSlMVC17KdSKmf+x48b0LI+mD0enouKLch2DCg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:24:15.256216Z","signed_message":"canonical_sha256_bytes"},"source_id":"1512.04860","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f11244a78619665f1aa13a1270c644c8ae0ff07431648d65e0038a81dc6a013d","sha256:1c3225fe87c6bf57c2ec0a9ab46492b8f6dc002dc4924e515f1e9008e40b2620"],"state_sha256":"30a89ec3de683c14e4fbfda41ab1b3df56d3596558eb52abc0b5035d2bb24c06"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CwjRLImX/iCMZQZ08n/Yc2ccBDueeCU0eAYX4BNauLKUCa+GXb5ooNNS61YtKMaAe8MKiTUzN9CktirVP/pKDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T17:18:44.678520Z","bundle_sha256":"a4ef86c1a0acc425ef6915f1cb2f7a35cdb7a7e113270fc8f6c49ef4214d5216"}}