{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:CJVXBVGHWTO2KFGAZHPREGR73V","short_pith_number":"pith:CJVXBVGH","canonical_record":{"source":{"id":"1803.08501","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-03-22T14:59:16Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"9a653269cf227600f8e962ec555d6cd69ab35aec85f7df54ff388ca6b412c8df","abstract_canon_sha256":"61ee5491fef84bb50de2efa9887ee1a8caa49e79f0041cd3afdff0e4e78c5fe9"},"schema_version":"1.0"},"canonical_sha256":"126b70d4c7b4dda514c0c9df121a3fdd6be7edf2ada76d10e85c4c894a338f54","source":{"kind":"arxiv","id":"1803.08501","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.08501","created_at":"2026-05-18T00:20:21Z"},{"alias_kind":"arxiv_version","alias_value":"1803.08501v1","created_at":"2026-05-18T00:20:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.08501","created_at":"2026-05-18T00:20:21Z"},{"alias_kind":"pith_short_12","alias_value":"CJVXBVGHWTO2","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"CJVXBVGHWTO2KFGA","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"CJVXBVGH","created_at":"2026-05-18T12:32:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:CJVXBVGHWTO2KFGAZHPREGR73V","target":"record","payload":{"canonical_record":{"source":{"id":"1803.08501","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-03-22T14:59:16Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"9a653269cf227600f8e962ec555d6cd69ab35aec85f7df54ff388ca6b412c8df","abstract_canon_sha256":"61ee5491fef84bb50de2efa9887ee1a8caa49e79f0041cd3afdff0e4e78c5fe9"},"schema_version":"1.0"},"canonical_sha256":"126b70d4c7b4dda514c0c9df121a3fdd6be7edf2ada76d10e85c4c894a338f54","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:20:21.466740Z","signature_b64":"0BffV23xRGjYT3XOXP+zQkzc/Iw+EpKzyl+87cmjh0EENrhFdZmRGeM7LRBoEJ3D9KjN0tbu3wvB+66++sdiAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"126b70d4c7b4dda514c0c9df121a3fdd6be7edf2ada76d10e85c4c894a338f54","last_reissued_at":"2026-05-18T00:20:21.466156Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:20:21.466156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.08501","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aVBRS+I5jM2ppwXhT1rcLiq4yNnYP2g1OSeLcAZyi6OBPypxnTqekydFAPBkF39LtiHqAEAsRAlp3CWLGnzvCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T12:46:49.517304Z"},"content_sha256":"2f2f4cae0f842b4c4be2b9434eadb583d2cc949cd42fc43f015a0eb376e53986","schema_version":"1.0","event_id":"sha256:2f2f4cae0f842b4c4be2b9434eadb583d2cc949cd42fc43f015a0eb376e53986"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:CJVXBVGHWTO2KFGAZHPREGR73V","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"DOP: Deep Optimistic Planning with Approximate Value Function Evaluation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.RO","authors_text":"Daniele Nardi, Francesco Riccio, Roberto Capobianco","submitted_at":"2018-03-22T14:59:16Z","abstract_excerpt":"Research on reinforcement learning has demonstrated promising results in manifold applications and domains. Still, efficiently learning effective robot behaviors is very difficult, due to unstructured scenarios, high uncertainties, and large state dimensionality (e.g. multi-agent systems or hyper-redundant robots). To alleviate this problem, we present DOP, a deep model-based reinforcement learning algorithm, which exploits action values to both (1) guide the exploration of the state space and (2) plan effective policies. Specifically, we exploit deep neural networks to learn Q-functions that "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.08501","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:21Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"z5Xh2w+St7hh6mMwj5amf7r1R48+36VH8swCWxfVdQwk2CEx0Ax7EUAJ5WLSEGe8XHKPtHUmR7h/FaHt0HFTAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T12:46:49.517663Z"},"content_sha256":"2894e02368c7b7c7668c9234d3ecd915d5b536be127680e402f8449e979c3019","schema_version":"1.0","event_id":"sha256:2894e02368c7b7c7668c9234d3ecd915d5b536be127680e402f8449e979c3019"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CJVXBVGHWTO2KFGAZHPREGR73V/bundle.json","state_url":"https://pith.science/pith/CJVXBVGHWTO2KFGAZHPREGR73V/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CJVXBVGHWTO2KFGAZHPREGR73V/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T12:46:49Z","links":{"resolver":"https://pith.science/pith/CJVXBVGHWTO2KFGAZHPREGR73V","bundle":"https://pith.science/pith/CJVXBVGHWTO2KFGAZHPREGR73V/bundle.json","state":"https://pith.science/pith/CJVXBVGHWTO2KFGAZHPREGR73V/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CJVXBVGHWTO2KFGAZHPREGR73V/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:CJVXBVGHWTO2KFGAZHPREGR73V","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"61ee5491fef84bb50de2efa9887ee1a8caa49e79f0041cd3afdff0e4e78c5fe9","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-03-22T14:59:16Z","title_canon_sha256":"9a653269cf227600f8e962ec555d6cd69ab35aec85f7df54ff388ca6b412c8df"},"schema_version":"1.0","source":{"id":"1803.08501","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.08501","created_at":"2026-05-18T00:20:21Z"},{"alias_kind":"arxiv_version","alias_value":"1803.08501v1","created_at":"2026-05-18T00:20:21Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.08501","created_at":"2026-05-18T00:20:21Z"},{"alias_kind":"pith_short_12","alias_value":"CJVXBVGHWTO2","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"CJVXBVGHWTO2KFGA","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"CJVXBVGH","created_at":"2026-05-18T12:32:16Z"}],"graph_snapshots":[{"event_id":"sha256:2894e02368c7b7c7668c9234d3ecd915d5b536be127680e402f8449e979c3019","target":"graph","created_at":"2026-05-18T00:20:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Research on reinforcement learning has demonstrated promising results in manifold applications and domains. Still, efficiently learning effective robot behaviors is very difficult, due to unstructured scenarios, high uncertainties, and large state dimensionality (e.g. multi-agent systems or hyper-redundant robots). To alleviate this problem, we present DOP, a deep model-based reinforcement learning algorithm, which exploits action values to both (1) guide the exploration of the state space and (2) plan effective policies. Specifically, we exploit deep neural networks to learn Q-functions that ","authors_text":"Daniele Nardi, Francesco Riccio, Roberto Capobianco","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-03-22T14:59:16Z","title":"DOP: Deep Optimistic Planning with Approximate Value Function Evaluation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.08501","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2f2f4cae0f842b4c4be2b9434eadb583d2cc949cd42fc43f015a0eb376e53986","target":"record","created_at":"2026-05-18T00:20:21Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"61ee5491fef84bb50de2efa9887ee1a8caa49e79f0041cd3afdff0e4e78c5fe9","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2018-03-22T14:59:16Z","title_canon_sha256":"9a653269cf227600f8e962ec555d6cd69ab35aec85f7df54ff388ca6b412c8df"},"schema_version":"1.0","source":{"id":"1803.08501","kind":"arxiv","version":1}},"canonical_sha256":"126b70d4c7b4dda514c0c9df121a3fdd6be7edf2ada76d10e85c4c894a338f54","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"126b70d4c7b4dda514c0c9df121a3fdd6be7edf2ada76d10e85c4c894a338f54","first_computed_at":"2026-05-18T00:20:21.466156Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:20:21.466156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0BffV23xRGjYT3XOXP+zQkzc/Iw+EpKzyl+87cmjh0EENrhFdZmRGeM7LRBoEJ3D9KjN0tbu3wvB+66++sdiAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:20:21.466740Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.08501","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2f2f4cae0f842b4c4be2b9434eadb583d2cc949cd42fc43f015a0eb376e53986","sha256:2894e02368c7b7c7668c9234d3ecd915d5b536be127680e402f8449e979c3019"],"state_sha256":"0c5a340170d4b06836d41fca0024e43842331a04a89f4ac305c5bc9c66e8a0fa"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DXx+pXG6vElu7qy445NtoGSlYotcdh/TTCfD5X6ksHOSSWhUbcRNIBvHYBgnsxj6yNEFUt3RYxf6tSh8mdyEAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T12:46:49.519602Z","bundle_sha256":"2369a0714f98d66ce34e3c30eadb32133b3b7e98d528351519249f78281f0abf"}}