{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:GZ3ZOKQR5BIXBXBGQ5QBKMCPMX","short_pith_number":"pith:GZ3ZOKQR","canonical_record":{"source":{"id":"1805.04514","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-10T20:00:50Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"df3982e4f7ac0b8c95b0d13cddb30885e75234c9ee704637ce24180aca32f0c8","abstract_canon_sha256":"60ff7a7cdc8a4a98cc4672bcd6d59cba69b214a5d0f76abe9c70ef2a803622ee"},"schema_version":"1.0"},"canonical_sha256":"3677972a11e85170dc26876015304f65cb905804f4b3a2c47e77366b746b4908","source":{"kind":"arxiv","id":"1805.04514","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.04514","created_at":"2026-05-17T23:45:15Z"},{"alias_kind":"arxiv_version","alias_value":"1805.04514v2","created_at":"2026-05-17T23:45:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.04514","created_at":"2026-05-17T23:45:15Z"},{"alias_kind":"pith_short_12","alias_value":"GZ3ZOKQR5BIX","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"GZ3ZOKQR5BIXBXBG","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"GZ3ZOKQR","created_at":"2026-05-18T12:32:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:GZ3ZOKQR5BIXBXBGQ5QBKMCPMX","target":"record","payload":{"canonical_record":{"source":{"id":"1805.04514","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-10T20:00:50Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"df3982e4f7ac0b8c95b0d13cddb30885e75234c9ee704637ce24180aca32f0c8","abstract_canon_sha256":"60ff7a7cdc8a4a98cc4672bcd6d59cba69b214a5d0f76abe9c70ef2a803622ee"},"schema_version":"1.0"},"canonical_sha256":"3677972a11e85170dc26876015304f65cb905804f4b3a2c47e77366b746b4908","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:45:15.474421Z","signature_b64":"IZ2upTKDGT27V7WX0FUoDtEXlU0XAOT55eu0KqpTorfQUhTWcVQPwagGyO4ENjbZppV75oksRbpLl1GTNPrXDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3677972a11e85170dc26876015304f65cb905804f4b3a2c47e77366b746b4908","last_reissued_at":"2026-05-17T23:45:15.473803Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:45:15.473803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1805.04514","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"akiyAVEIQWWGbX/0RNz/BGxIkAW/F5zCe7Rmv+uqZF7uJAxwHU8Glm93ekPMiSb8oSYxGKj24d3az3mjLI/6CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T02:56:46.515435Z"},"content_sha256":"0fbc9145b19a33454afa39915d8b98d3510471934878263e7548190d0e74bee5","schema_version":"1.0","event_id":"sha256:0fbc9145b19a33454afa39915d8b98d3510471934878263e7548190d0e74bee5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:GZ3ZOKQR5BIXBXBGQ5QBKMCPMX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Metatrace Actor-Critic: Online Step-size Tuning by Meta-gradient Descent for Reinforcement Learning Control","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Baoxiang Wang, Kenny Young, Matthew E. Taylor","submitted_at":"2018-05-10T20:00:50Z","abstract_excerpt":"Reinforcement learning (RL) has had many successes in both \"deep\" and \"shallow\" settings. In both cases, significant hyperparameter tuning is often required to achieve good performance. Furthermore, when nonlinear function approximation is used, non-stationarity in the state representation can lead to learning instability. A variety of techniques exist to combat this --- most notably large experience replay buffers or the use of multiple parallel actors. These techniques come at the cost of moving away from the online RL problem as it is traditionally formulated (i.e., a single agent learning "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.04514","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:45:15Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Qg4DvRHa6l7L5K4cMLN0a5X9LWWcgpTGAzDnOZg65DTATN03GKIVUlSd9l6sd6e863FpE8r6Fn30ADxBHmXpAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T02:56:46.516200Z"},"content_sha256":"a0f87ab8482aac39e5f29fe172035ae4a2e799201dd675aa1b7f4cb9cff0db8d","schema_version":"1.0","event_id":"sha256:a0f87ab8482aac39e5f29fe172035ae4a2e799201dd675aa1b7f4cb9cff0db8d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX/bundle.json","state_url":"https://pith.science/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-12T02:56:46Z","links":{"resolver":"https://pith.science/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX","bundle":"https://pith.science/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX/bundle.json","state":"https://pith.science/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GZ3ZOKQR5BIXBXBGQ5QBKMCPMX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:GZ3ZOKQR5BIXBXBGQ5QBKMCPMX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"60ff7a7cdc8a4a98cc4672bcd6d59cba69b214a5d0f76abe9c70ef2a803622ee","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-10T20:00:50Z","title_canon_sha256":"df3982e4f7ac0b8c95b0d13cddb30885e75234c9ee704637ce24180aca32f0c8"},"schema_version":"1.0","source":{"id":"1805.04514","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1805.04514","created_at":"2026-05-17T23:45:15Z"},{"alias_kind":"arxiv_version","alias_value":"1805.04514v2","created_at":"2026-05-17T23:45:15Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.04514","created_at":"2026-05-17T23:45:15Z"},{"alias_kind":"pith_short_12","alias_value":"GZ3ZOKQR5BIX","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"GZ3ZOKQR5BIXBXBG","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"GZ3ZOKQR","created_at":"2026-05-18T12:32:25Z"}],"graph_snapshots":[{"event_id":"sha256:a0f87ab8482aac39e5f29fe172035ae4a2e799201dd675aa1b7f4cb9cff0db8d","target":"graph","created_at":"2026-05-17T23:45:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has had many successes in both \"deep\" and \"shallow\" settings. In both cases, significant hyperparameter tuning is often required to achieve good performance. Furthermore, when nonlinear function approximation is used, non-stationarity in the state representation can lead to learning instability. A variety of techniques exist to combat this --- most notably large experience replay buffers or the use of multiple parallel actors. These techniques come at the cost of moving away from the online RL problem as it is traditionally formulated (i.e., a single agent learning ","authors_text":"Baoxiang Wang, Kenny Young, Matthew E. Taylor","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-10T20:00:50Z","title":"Metatrace Actor-Critic: Online Step-size Tuning by Meta-gradient Descent for Reinforcement Learning Control"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.04514","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0fbc9145b19a33454afa39915d8b98d3510471934878263e7548190d0e74bee5","target":"record","created_at":"2026-05-17T23:45:15Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"60ff7a7cdc8a4a98cc4672bcd6d59cba69b214a5d0f76abe9c70ef2a803622ee","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-05-10T20:00:50Z","title_canon_sha256":"df3982e4f7ac0b8c95b0d13cddb30885e75234c9ee704637ce24180aca32f0c8"},"schema_version":"1.0","source":{"id":"1805.04514","kind":"arxiv","version":2}},"canonical_sha256":"3677972a11e85170dc26876015304f65cb905804f4b3a2c47e77366b746b4908","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3677972a11e85170dc26876015304f65cb905804f4b3a2c47e77366b746b4908","first_computed_at":"2026-05-17T23:45:15.473803Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:45:15.473803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IZ2upTKDGT27V7WX0FUoDtEXlU0XAOT55eu0KqpTorfQUhTWcVQPwagGyO4ENjbZppV75oksRbpLl1GTNPrXDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:45:15.474421Z","signed_message":"canonical_sha256_bytes"},"source_id":"1805.04514","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0fbc9145b19a33454afa39915d8b98d3510471934878263e7548190d0e74bee5","sha256:a0f87ab8482aac39e5f29fe172035ae4a2e799201dd675aa1b7f4cb9cff0db8d"],"state_sha256":"8c80455d785c55fd16b7e2f427c89025e0f084a5223769fa24fe3d8723dcba83"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vysbqk3jxaIZ98pMC9YK/b2+ZdQTD43ETgYc+CjsLLMuRPgV4Rsp/UJmQJq1ET6PC74wY7BUtGNHAla7jnmfAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-12T02:56:46.521481Z","bundle_sha256":"1ec8dde79d72fede606366d2d73151425b509829831c11cf794eb37baea889ac"}}