{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:6TI76Q22MJBFZ7A6F2V4GGEWIM","short_pith_number":"pith:6TI76Q22","canonical_record":{"source":{"id":"1906.12266","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-28T15:35:11Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"87826d5848ffce20115fb5f28e7dca63cd486496ffcc37130f36ec82d655a209","abstract_canon_sha256":"d26af6368e6b45780f768152e28e1d98eb6ecee0d3548831048d00cbf984114c"},"schema_version":"1.0"},"canonical_sha256":"f4d1ff435a62425cfc1e2eabc3189643202e7a4efc7f4169c88190592a95d6d7","source":{"kind":"arxiv","id":"1906.12266","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.12266","created_at":"2026-05-17T23:41:58Z"},{"alias_kind":"arxiv_version","alias_value":"1906.12266v1","created_at":"2026-05-17T23:41:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.12266","created_at":"2026-05-17T23:41:58Z"},{"alias_kind":"pith_short_12","alias_value":"6TI76Q22MJBF","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6TI76Q22MJBFZ7A6","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6TI76Q22","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:6TI76Q22MJBFZ7A6F2V4GGEWIM","target":"record","payload":{"canonical_record":{"source":{"id":"1906.12266","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-28T15:35:11Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"87826d5848ffce20115fb5f28e7dca63cd486496ffcc37130f36ec82d655a209","abstract_canon_sha256":"d26af6368e6b45780f768152e28e1d98eb6ecee0d3548831048d00cbf984114c"},"schema_version":"1.0"},"canonical_sha256":"f4d1ff435a62425cfc1e2eabc3189643202e7a4efc7f4169c88190592a95d6d7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:58.826440Z","signature_b64":"cAAPNzhqbm7ssyjKV1msrAmu13tcRug0Bnsl2+WxBa+/ff7ZqM+fY8mWSTTWJpIm62hMSUSA1zYLu8omZh7fAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f4d1ff435a62425cfc1e2eabc3189643202e7a4efc7f4169c88190592a95d6d7","last_reissued_at":"2026-05-17T23:41:58.825787Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:58.825787Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.12266","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zU72GJ3BZUrZKRQlFTTBkJTAtoGFeSVGd8GWbfAHX6Pfgcq/ibdACZIfLgQYkuStrKP3Q2pV8qR9wlPVUVANBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T22:49:08.775775Z"},"content_sha256":"d7467dbc12bc22cbb5900edffd80fe485b69b5c41054fa1b320a77438dd64fa5","schema_version":"1.0","event_id":"sha256:d7467dbc12bc22cbb5900edffd80fe485b69b5c41054fa1b320a77438dd64fa5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:6TI76Q22MJBFZ7A6F2V4GGEWIM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Growing Action Spaces","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Gabriel Synnaeve, Gregory Farquhar, Laura Gustafson, Nicolas Usunier, Shimon Whiteson, Zeming Lin","submitted_at":"2019-06-28T15:35:11Z","abstract_excerpt":"In complex tasks, such as those with large combinatorial action spaces, random exploration may be too inefficient to achieve meaningful learning progress. In this work, we use a curriculum of progressively growing action spaces to accelerate learning. We assume the environment is out of our control, but that the agent may set an internal curriculum by initially restricting its action space. Our approach uses off-policy reinforcement learning to estimate optimal value functions for multiple action spaces simultaneously and efficiently transfers data, value estimates, and state representations f"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.12266","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:41:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"u7ima+fL/emaSxNS5xfpxcjYRtV7jU4u6ymXEgYS+NX6+4yBkIQLLR59uH+GSUQtTNznjwFzk875CtkbYVDIBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T22:49:08.776524Z"},"content_sha256":"cd6324449f04348076cd2635820ced6c4ec642b91feef4fb99428d0a0c8e8f75","schema_version":"1.0","event_id":"sha256:cd6324449f04348076cd2635820ced6c4ec642b91feef4fb99428d0a0c8e8f75"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM/bundle.json","state_url":"https://pith.science/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T22:49:08Z","links":{"resolver":"https://pith.science/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM","bundle":"https://pith.science/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM/bundle.json","state":"https://pith.science/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6TI76Q22MJBFZ7A6F2V4GGEWIM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:6TI76Q22MJBFZ7A6F2V4GGEWIM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d26af6368e6b45780f768152e28e1d98eb6ecee0d3548831048d00cbf984114c","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-28T15:35:11Z","title_canon_sha256":"87826d5848ffce20115fb5f28e7dca63cd486496ffcc37130f36ec82d655a209"},"schema_version":"1.0","source":{"id":"1906.12266","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.12266","created_at":"2026-05-17T23:41:58Z"},{"alias_kind":"arxiv_version","alias_value":"1906.12266v1","created_at":"2026-05-17T23:41:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.12266","created_at":"2026-05-17T23:41:58Z"},{"alias_kind":"pith_short_12","alias_value":"6TI76Q22MJBF","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6TI76Q22MJBFZ7A6","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6TI76Q22","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:cd6324449f04348076cd2635820ced6c4ec642b91feef4fb99428d0a0c8e8f75","target":"graph","created_at":"2026-05-17T23:41:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In complex tasks, such as those with large combinatorial action spaces, random exploration may be too inefficient to achieve meaningful learning progress. In this work, we use a curriculum of progressively growing action spaces to accelerate learning. We assume the environment is out of our control, but that the agent may set an internal curriculum by initially restricting its action space. Our approach uses off-policy reinforcement learning to estimate optimal value functions for multiple action spaces simultaneously and efficiently transfers data, value estimates, and state representations f","authors_text":"Gabriel Synnaeve, Gregory Farquhar, Laura Gustafson, Nicolas Usunier, Shimon Whiteson, Zeming Lin","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-28T15:35:11Z","title":"Growing Action Spaces"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.12266","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d7467dbc12bc22cbb5900edffd80fe485b69b5c41054fa1b320a77438dd64fa5","target":"record","created_at":"2026-05-17T23:41:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d26af6368e6b45780f768152e28e1d98eb6ecee0d3548831048d00cbf984114c","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-28T15:35:11Z","title_canon_sha256":"87826d5848ffce20115fb5f28e7dca63cd486496ffcc37130f36ec82d655a209"},"schema_version":"1.0","source":{"id":"1906.12266","kind":"arxiv","version":1}},"canonical_sha256":"f4d1ff435a62425cfc1e2eabc3189643202e7a4efc7f4169c88190592a95d6d7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f4d1ff435a62425cfc1e2eabc3189643202e7a4efc7f4169c88190592a95d6d7","first_computed_at":"2026-05-17T23:41:58.825787Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:41:58.825787Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"cAAPNzhqbm7ssyjKV1msrAmu13tcRug0Bnsl2+WxBa+/ff7ZqM+fY8mWSTTWJpIm62hMSUSA1zYLu8omZh7fAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:41:58.826440Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.12266","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d7467dbc12bc22cbb5900edffd80fe485b69b5c41054fa1b320a77438dd64fa5","sha256:cd6324449f04348076cd2635820ced6c4ec642b91feef4fb99428d0a0c8e8f75"],"state_sha256":"86039b06ed09e5ca693b8a67846132e24600b0ffa97383a886fa0992fc7c063d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MrU/QKFrhpDMBtZIpYwLXGFV82i9ds//pI/VP+qdfeOz6r0OM5oIIRR1xr5vDmqCYRx7KVvpBivdjP31T/nhCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T22:49:08.780681Z","bundle_sha256":"c80c2d99550f93a6411763454516e0c9066177d1795433aad1ab84656c31a493"}}