{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:BYYXODIKFC7BC34OJPCFE4XZEJ","short_pith_number":"pith:BYYXODIK","schema_version":"1.0","canonical_sha256":"0e31770d0a28be116f8e4bc45272f922774f010688bf2d040f5742e91f15910a","source":{"kind":"arxiv","id":"1611.07507","version":1},"attestation_state":"computed","paper":{"title":"Variational Intrinsic Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Daan Wierstra, Danilo Jimenez Rezende, Karol Gregor","submitted_at":"2016-11-22T20:44:39Z","abstract_excerpt":"In this paper we introduce a new unsupervised reinforcement learning method for discovering the set of intrinsic options available to an agent. This set is learned by maximizing the number of different states an agent can reliably reach, as measured by the mutual information between the set of options and option termination states. To this end, we instantiate two policy gradient based algorithms, one that creates an explicit embedding space of options and one that represents options implicitly. The algorithms also provide an explicit measure of empowerment in a given state that can be used by "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1611.07507","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-11-22T20:44:39Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"94fe75339131bcb10df82a5c59dbe4694ed7d0ac67c55a7c3b749d0c7824cc9c","abstract_canon_sha256":"811eff2eb47ffcfaf7247b44d230012efc5c6c0aa4094dbc02c4d6306c7c40a5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:57:19.951070Z","signature_b64":"OuLHggVqXqZp+cgxgVmexk0CxPFdG/gN5tHsGTBEnv+rts7EaHij4jYoAbaiqoiACnnwTVcypVViF5Q9ToYiDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0e31770d0a28be116f8e4bc45272f922774f010688bf2d040f5742e91f15910a","last_reissued_at":"2026-05-18T00:57:19.950412Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:57:19.950412Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Variational Intrinsic Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Daan Wierstra, Danilo Jimenez Rezende, Karol Gregor","submitted_at":"2016-11-22T20:44:39Z","abstract_excerpt":"In this paper we introduce a new unsupervised reinforcement learning method for discovering the set of intrinsic options available to an agent. This set is learned by maximizing the number of different states an agent can reliably reach, as measured by the mutual information between the set of options and option termination states. To this end, we instantiate two policy gradient based algorithms, one that creates an explicit embedding space of options and one that represents options implicitly. The algorithms also provide an explicit measure of empowerment in a given state that can be used by "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1611.07507","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1611.07507","created_at":"2026-05-18T00:57:19.950509+00:00"},{"alias_kind":"arxiv_version","alias_value":"1611.07507v1","created_at":"2026-05-18T00:57:19.950509+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1611.07507","created_at":"2026-05-18T00:57:19.950509+00:00"},{"alias_kind":"pith_short_12","alias_value":"BYYXODIKFC7B","created_at":"2026-05-18T12:30:09.641336+00:00"},{"alias_kind":"pith_short_16","alias_value":"BYYXODIKFC7BC34O","created_at":"2026-05-18T12:30:09.641336+00:00"},{"alias_kind":"pith_short_8","alias_value":"BYYXODIK","created_at":"2026-05-18T12:30:09.641336+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1906.09205","citing_title":"Continual Reinforcement Learning with Diversity Exploration and Adversarial Self-Correction","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"1907.06143","citing_title":"Neural Embedding for Physical Manipulations","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2502.03752","citing_title":"Self-Improving Skill Learning for Robust Skill-based Meta-Reinforcement Learning","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12261","citing_title":"Delay-Empowered Causal Hierarchical Reinforcement Learning","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03413","citing_title":"Learning to Theorize the World from Observation","ref_index":94,"is_internal_anchor":false},{"citing_arxiv_id":"2604.24558","citing_title":"Hierarchical Behaviour Spaces","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06145","citing_title":"Unifying Goal-Conditioned RL and Unsupervised Skill Learning via Control-Maximization","ref_index":20,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01862","citing_title":"QHyer: Q-conditioned Hybrid Attention-mamba Transformer for Offline Goal-conditioned RL","ref_index":63,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ","json":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ.json","graph_json":"https://pith.science/api/pith-number/BYYXODIKFC7BC34OJPCFE4XZEJ/graph.json","events_json":"https://pith.science/api/pith-number/BYYXODIKFC7BC34OJPCFE4XZEJ/events.json","paper":"https://pith.science/paper/BYYXODIK"},"agent_actions":{"view_html":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ","download_json":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ.json","view_paper":"https://pith.science/paper/BYYXODIK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1611.07507&json=true","fetch_graph":"https://pith.science/api/pith-number/BYYXODIKFC7BC34OJPCFE4XZEJ/graph.json","fetch_events":"https://pith.science/api/pith-number/BYYXODIKFC7BC34OJPCFE4XZEJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ/action/storage_attestation","attest_author":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ/action/author_attestation","sign_citation":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ/action/citation_signature","submit_replication":"https://pith.science/pith/BYYXODIKFC7BC34OJPCFE4XZEJ/action/replication_record"}},"created_at":"2026-05-18T00:57:19.950509+00:00","updated_at":"2026-05-18T00:57:19.950509+00:00"}