{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:YF5BEVD4SYLEGAZHYCQAUHQGBZ","short_pith_number":"pith:YF5BEVD4","canonical_record":{"source":{"id":"1806.01830","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-05T17:39:12Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"c0dfc7fec867ac84148771900f927db2a00f4edff9dedf2cac1828124fc3848a","abstract_canon_sha256":"bb6ea2d7b13f77659143444281157a228c90b88c9c6bc7e9ef9ecff1c221c42c"},"schema_version":"1.0"},"canonical_sha256":"c17a12547c9616430327c0a00a1e060e5c2a4378f4b6ae9b8ab13c387a04cf9d","source":{"kind":"arxiv","id":"1806.01830","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01830","created_at":"2026-05-18T00:12:08Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01830v2","created_at":"2026-05-18T00:12:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01830","created_at":"2026-05-18T00:12:08Z"},{"alias_kind":"pith_short_12","alias_value":"YF5BEVD4SYLE","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"YF5BEVD4SYLEGAZH","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"YF5BEVD4","created_at":"2026-05-18T12:33:04Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:YF5BEVD4SYLEGAZHYCQAUHQGBZ","target":"record","payload":{"canonical_record":{"source":{"id":"1806.01830","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-05T17:39:12Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"c0dfc7fec867ac84148771900f927db2a00f4edff9dedf2cac1828124fc3848a","abstract_canon_sha256":"bb6ea2d7b13f77659143444281157a228c90b88c9c6bc7e9ef9ecff1c221c42c"},"schema_version":"1.0"},"canonical_sha256":"c17a12547c9616430327c0a00a1e060e5c2a4378f4b6ae9b8ab13c387a04cf9d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:12:08.564860Z","signature_b64":"nv2Z6kPt++tQacuipsI5KpdBiUNlwYYXdpqVKCE5Wp3iao7JncwKu94r+zprQ2fS2J+B/1lcK/TzLWToJ1b2CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c17a12547c9616430327c0a00a1e060e5c2a4378f4b6ae9b8ab13c387a04cf9d","last_reissued_at":"2026-05-18T00:12:08.564392Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:12:08.564392Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1806.01830","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:12:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YyUEgSP8rJW3UPkJTVLj+F7PzQ1X1jFM+NiqEIlqp4qREpkXHZAVxEbrNH2/NkzJprVRghdip57vd6rqA/PLAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T01:55:35.048899Z"},"content_sha256":"e11002a454a1f6b301ab1fe40f8576380b72429ad77815d0ccea096b960bf299","schema_version":"1.0","event_id":"sha256:e11002a454a1f6b301ab1fe40f8576380b72429ad77815d0ccea096b960bf299"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:YF5BEVD4SYLEGAZHYCQAUHQGBZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Relational Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Adam Santoro, David Raposo, David Reichert, Edward Lockhart, Igor Babuschkin, Karl Tuyls, Matthew Botvinick, Murray Shanahan, Oriol Vinyals, Peter Battaglia, Razvan Pascanu, Timothy Lillicrap, Victor Bapst, Victoria Langston, Vinicius Zambaldi, Yujia Li","submitted_at":"2018-06-05T17:39:12Z","abstract_excerpt":"We introduce an approach for deep reinforcement learning (RL) that improves upon the efficiency, generalization capacity, and interpretability of conventional approaches through structured perception and relational reasoning. It uses self-attention to iteratively reason about the relations between entities in a scene and to guide a model-free policy. Our results show that in a novel navigation and planning task called Box-World, our agent finds interpretable solutions that improve upon baselines in terms of sample complexity, ability to generalize to more complex scenes than experienced during"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01830","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:12:08Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FsF4HkE2T3pp2O/eZQ1miMK6gbxmYieBIAvcAHELG5PoreiJEyNVbR+qlF5Ytn7fRnJDEVDy75qW74jfyIoWCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T01:55:35.049332Z"},"content_sha256":"d662076c97c1a2be21e7d2a69d2be861a0fb7ce9658c8bfaeb6822f984f9ee41","schema_version":"1.0","event_id":"sha256:d662076c97c1a2be21e7d2a69d2be861a0fb7ce9658c8bfaeb6822f984f9ee41"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ/bundle.json","state_url":"https://pith.science/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T01:55:35Z","links":{"resolver":"https://pith.science/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ","bundle":"https://pith.science/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ/bundle.json","state":"https://pith.science/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YF5BEVD4SYLEGAZHYCQAUHQGBZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:YF5BEVD4SYLEGAZHYCQAUHQGBZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bb6ea2d7b13f77659143444281157a228c90b88c9c6bc7e9ef9ecff1c221c42c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-05T17:39:12Z","title_canon_sha256":"c0dfc7fec867ac84148771900f927db2a00f4edff9dedf2cac1828124fc3848a"},"schema_version":"1.0","source":{"id":"1806.01830","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1806.01830","created_at":"2026-05-18T00:12:08Z"},{"alias_kind":"arxiv_version","alias_value":"1806.01830v2","created_at":"2026-05-18T00:12:08Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1806.01830","created_at":"2026-05-18T00:12:08Z"},{"alias_kind":"pith_short_12","alias_value":"YF5BEVD4SYLE","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_16","alias_value":"YF5BEVD4SYLEGAZH","created_at":"2026-05-18T12:33:04Z"},{"alias_kind":"pith_short_8","alias_value":"YF5BEVD4","created_at":"2026-05-18T12:33:04Z"}],"graph_snapshots":[{"event_id":"sha256:d662076c97c1a2be21e7d2a69d2be861a0fb7ce9658c8bfaeb6822f984f9ee41","target":"graph","created_at":"2026-05-18T00:12:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce an approach for deep reinforcement learning (RL) that improves upon the efficiency, generalization capacity, and interpretability of conventional approaches through structured perception and relational reasoning. It uses self-attention to iteratively reason about the relations between entities in a scene and to guide a model-free policy. Our results show that in a novel navigation and planning task called Box-World, our agent finds interpretable solutions that improve upon baselines in terms of sample complexity, ability to generalize to more complex scenes than experienced during","authors_text":"Adam Santoro, David Raposo, David Reichert, Edward Lockhart, Igor Babuschkin, Karl Tuyls, Matthew Botvinick, Murray Shanahan, Oriol Vinyals, Peter Battaglia, Razvan Pascanu, Timothy Lillicrap, Victor Bapst, Victoria Langston, Vinicius Zambaldi, Yujia Li","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-05T17:39:12Z","title":"Relational Deep Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1806.01830","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e11002a454a1f6b301ab1fe40f8576380b72429ad77815d0ccea096b960bf299","target":"record","created_at":"2026-05-18T00:12:08Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bb6ea2d7b13f77659143444281157a228c90b88c9c6bc7e9ef9ecff1c221c42c","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-06-05T17:39:12Z","title_canon_sha256":"c0dfc7fec867ac84148771900f927db2a00f4edff9dedf2cac1828124fc3848a"},"schema_version":"1.0","source":{"id":"1806.01830","kind":"arxiv","version":2}},"canonical_sha256":"c17a12547c9616430327c0a00a1e060e5c2a4378f4b6ae9b8ab13c387a04cf9d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c17a12547c9616430327c0a00a1e060e5c2a4378f4b6ae9b8ab13c387a04cf9d","first_computed_at":"2026-05-18T00:12:08.564392Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:12:08.564392Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"nv2Z6kPt++tQacuipsI5KpdBiUNlwYYXdpqVKCE5Wp3iao7JncwKu94r+zprQ2fS2J+B/1lcK/TzLWToJ1b2CQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:12:08.564860Z","signed_message":"canonical_sha256_bytes"},"source_id":"1806.01830","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e11002a454a1f6b301ab1fe40f8576380b72429ad77815d0ccea096b960bf299","sha256:d662076c97c1a2be21e7d2a69d2be861a0fb7ce9658c8bfaeb6822f984f9ee41"],"state_sha256":"7179e21431bec6917769a6c943fd19a5e87fe4a9f14caa1539f2c1de845b1d0d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iaC+sJPp9AV6bNZve4YUdjeJhJTKEH5TiDDayLLfCbVfJvMZ33tW29Z3X2Ez9q+00NYsVgEKHZKEGJAPjPqDCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T01:55:35.053904Z","bundle_sha256":"5bfef73c0722f53fa5e80b6868676943ead878cd4cce95b8ecfa78ef61dd12f2"}}