{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:XG2Q3FXVGHI3K4SUTTAJ37KQ7Y","short_pith_number":"pith:XG2Q3FXV","canonical_record":{"source":{"id":"1605.09735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-05-31T17:30:54Z","cross_cats_sorted":["cs.RO","math.OC","stat.ML"],"title_canon_sha256":"855bb6779fba55b872a6b89279245c3dac9a7f40f852dadecb8768cf91a4e3e9","abstract_canon_sha256":"886cef9ad974aafffe70c6d6822a48299b91181500a1afb9473d1d45d06a3c05"},"schema_version":"1.0"},"canonical_sha256":"b9b50d96f531d1b572549cc09dfd50fe2655222466066a01f720b3472b62dd4f","source":{"kind":"arxiv","id":"1605.09735","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.09735","created_at":"2026-05-18T01:13:10Z"},{"alias_kind":"arxiv_version","alias_value":"1605.09735v1","created_at":"2026-05-18T01:13:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.09735","created_at":"2026-05-18T01:13:10Z"},{"alias_kind":"pith_short_12","alias_value":"XG2Q3FXVGHI3","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"XG2Q3FXVGHI3K4SU","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"XG2Q3FXV","created_at":"2026-05-18T12:30:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:XG2Q3FXVGHI3K4SUTTAJ37KQ7Y","target":"record","payload":{"canonical_record":{"source":{"id":"1605.09735","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-05-31T17:30:54Z","cross_cats_sorted":["cs.RO","math.OC","stat.ML"],"title_canon_sha256":"855bb6779fba55b872a6b89279245c3dac9a7f40f852dadecb8768cf91a4e3e9","abstract_canon_sha256":"886cef9ad974aafffe70c6d6822a48299b91181500a1afb9473d1d45d06a3c05"},"schema_version":"1.0"},"canonical_sha256":"b9b50d96f531d1b572549cc09dfd50fe2655222466066a01f720b3472b62dd4f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:13:10.254133Z","signature_b64":"Lzdz6+1TBJQAVfXtyY0+DWVE75BaEj9TTbB80RxqjHz+ZxfIerCy/dYqxcvcen6C4CuuTNm91+SoTkEAI1sEBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b9b50d96f531d1b572549cc09dfd50fe2655222466066a01f720b3472b62dd4f","last_reissued_at":"2026-05-18T01:13:10.253708Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:13:10.253708Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1605.09735","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:13:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9O4xxsVcyG7MiZhTXcRppIQ46xtlZ5kbA14SUi9tDoTyHGLajN1v1ALeBpXN2qHuqcY3SjwO86xSSU2UkRTnBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T00:41:59.473227Z"},"content_sha256":"c5f25ea7e811899aa21bef5db9d58bb6e6fe2e42671e1e57e7bca07e776b9a88","schema_version":"1.0","event_id":"sha256:c5f25ea7e811899aa21bef5db9d58bb6e6fe2e42671e1e57e7bca07e776b9a88"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:XG2Q3FXVGHI3K4SUTTAJ37KQ7Y","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Information Theoretically Aided Reinforcement Learning for Embodied Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO","math.OC","stat.ML"],"primary_cat":"cs.AI","authors_text":"Guido Montufar, Keyan Ghazi-Zahedi, Nihat Ay","submitted_at":"2016-05-31T17:30:54Z","abstract_excerpt":"Reinforcement learning for embodied agents is a challenging problem. The accumulated reward to be optimized is often a very rugged function, and gradient methods are impaired by many local optimizers. We demonstrate, in an experimental setting, that incorporating an intrinsic reward can smoothen the optimization landscape while preserving the global optimizers of interest. We show that policy gradient optimization for locomotion in a complex morphology is significantly improved when supplementing the extrinsic reward by an intrinsic reward defined in terms of the mutual information of time con"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.09735","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:13:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a2j/kz4qW6TDFYDA7UuHim5kMMXahFYg6WidfDyoe0BgE2njXP8r6/QN45APBwprWIZuWNa+tQ3ATlorfBdWAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T00:41:59.473600Z"},"content_sha256":"0ede45f6a831e3913c6d501a0663b2354b47f319281d286aa82bf26755f20b7d","schema_version":"1.0","event_id":"sha256:0ede45f6a831e3913c6d501a0663b2354b47f319281d286aa82bf26755f20b7d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y/bundle.json","state_url":"https://pith.science/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T00:41:59Z","links":{"resolver":"https://pith.science/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y","bundle":"https://pith.science/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y/bundle.json","state":"https://pith.science/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XG2Q3FXVGHI3K4SUTTAJ37KQ7Y/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:XG2Q3FXVGHI3K4SUTTAJ37KQ7Y","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"886cef9ad974aafffe70c6d6822a48299b91181500a1afb9473d1d45d06a3c05","cross_cats_sorted":["cs.RO","math.OC","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-05-31T17:30:54Z","title_canon_sha256":"855bb6779fba55b872a6b89279245c3dac9a7f40f852dadecb8768cf91a4e3e9"},"schema_version":"1.0","source":{"id":"1605.09735","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1605.09735","created_at":"2026-05-18T01:13:10Z"},{"alias_kind":"arxiv_version","alias_value":"1605.09735v1","created_at":"2026-05-18T01:13:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.09735","created_at":"2026-05-18T01:13:10Z"},{"alias_kind":"pith_short_12","alias_value":"XG2Q3FXVGHI3","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"XG2Q3FXVGHI3K4SU","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"XG2Q3FXV","created_at":"2026-05-18T12:30:51Z"}],"graph_snapshots":[{"event_id":"sha256:0ede45f6a831e3913c6d501a0663b2354b47f319281d286aa82bf26755f20b7d","target":"graph","created_at":"2026-05-18T01:13:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning for embodied agents is a challenging problem. The accumulated reward to be optimized is often a very rugged function, and gradient methods are impaired by many local optimizers. We demonstrate, in an experimental setting, that incorporating an intrinsic reward can smoothen the optimization landscape while preserving the global optimizers of interest. We show that policy gradient optimization for locomotion in a complex morphology is significantly improved when supplementing the extrinsic reward by an intrinsic reward defined in terms of the mutual information of time con","authors_text":"Guido Montufar, Keyan Ghazi-Zahedi, Nihat Ay","cross_cats":["cs.RO","math.OC","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-05-31T17:30:54Z","title":"Information Theoretically Aided Reinforcement Learning for Embodied Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.09735","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c5f25ea7e811899aa21bef5db9d58bb6e6fe2e42671e1e57e7bca07e776b9a88","target":"record","created_at":"2026-05-18T01:13:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"886cef9ad974aafffe70c6d6822a48299b91181500a1afb9473d1d45d06a3c05","cross_cats_sorted":["cs.RO","math.OC","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2016-05-31T17:30:54Z","title_canon_sha256":"855bb6779fba55b872a6b89279245c3dac9a7f40f852dadecb8768cf91a4e3e9"},"schema_version":"1.0","source":{"id":"1605.09735","kind":"arxiv","version":1}},"canonical_sha256":"b9b50d96f531d1b572549cc09dfd50fe2655222466066a01f720b3472b62dd4f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b9b50d96f531d1b572549cc09dfd50fe2655222466066a01f720b3472b62dd4f","first_computed_at":"2026-05-18T01:13:10.253708Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:13:10.253708Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Lzdz6+1TBJQAVfXtyY0+DWVE75BaEj9TTbB80RxqjHz+ZxfIerCy/dYqxcvcen6C4CuuTNm91+SoTkEAI1sEBw==","signature_status":"signed_v1","signed_at":"2026-05-18T01:13:10.254133Z","signed_message":"canonical_sha256_bytes"},"source_id":"1605.09735","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c5f25ea7e811899aa21bef5db9d58bb6e6fe2e42671e1e57e7bca07e776b9a88","sha256:0ede45f6a831e3913c6d501a0663b2354b47f319281d286aa82bf26755f20b7d"],"state_sha256":"a1dcf3cf616351f6d8349f310eeeafc815f7e013f1176118482fc1fb8761855c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"f6atuDY37l8lSd0g3gtCe1+YURaP1H0cLXTx9LnnrcfZk3JyCn5a7YQjjR/+J4POW08Kd1yTbIfe6PpC6boAAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T00:41:59.475683Z","bundle_sha256":"4634693ab45022daf57bf7af17955a3cde10a257919a20c116b422f0b1bd3b0c"}}