{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:6H2SVD67XIYZBD3UOCZXL7TGWM","short_pith_number":"pith:6H2SVD67","canonical_record":{"source":{"id":"1905.12654","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T18:05:01Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"9c01ac2a529d206c99a36e788a58b8f1068fc586bb779904ea0305c071d5c6fd","abstract_canon_sha256":"0cfe51085a76f9a658ead2110a052711028cf4034a973446e477898b14148bc6"},"schema_version":"1.0"},"canonical_sha256":"f1f52a8fdfba31908f7470b375fe66b33935d6e954ac6e2ef14063839170c0f8","source":{"kind":"arxiv","id":"1905.12654","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.12654","created_at":"2026-05-17T23:44:41Z"},{"alias_kind":"arxiv_version","alias_value":"1905.12654v1","created_at":"2026-05-17T23:44:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.12654","created_at":"2026-05-17T23:44:41Z"},{"alias_kind":"pith_short_12","alias_value":"6H2SVD67XIYZ","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6H2SVD67XIYZBD3U","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6H2SVD67","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:6H2SVD67XIYZBD3UOCZXL7TGWM","target":"record","payload":{"canonical_record":{"source":{"id":"1905.12654","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T18:05:01Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"9c01ac2a529d206c99a36e788a58b8f1068fc586bb779904ea0305c071d5c6fd","abstract_canon_sha256":"0cfe51085a76f9a658ead2110a052711028cf4034a973446e477898b14148bc6"},"schema_version":"1.0"},"canonical_sha256":"f1f52a8fdfba31908f7470b375fe66b33935d6e954ac6e2ef14063839170c0f8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:41.842975Z","signature_b64":"EElX5cH8CI+6FXIWMtHByOu6DY+7BGwvyPol2UF0SOa3uEAJoOf9C1s5wQX56OiQtYWQz7O6dwk9/Qt5cNrwDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f1f52a8fdfba31908f7470b375fe66b33935d6e954ac6e2ef14063839170c0f8","last_reissued_at":"2026-05-17T23:44:41.842314Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:41.842314Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1905.12654","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jOMmSu18w44r7jqpZxh/7FRPuqjri8NjEY0zISl/0QxOTcg8jjYeZD2T2kMxaqHDhV/GSbbkuOiVbiJcZfL8BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T18:06:43.330805Z"},"content_sha256":"ff6df8b80dd0ba71d82a4280cd2ea17128653fd91822f87ea1ebce97e09db4a1","schema_version":"1.0","event_id":"sha256:ff6df8b80dd0ba71d82a4280cd2ea17128653fd91822f87ea1ebce97e09db4a1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:6H2SVD67XIYZBD3UOCZXL7TGWM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"On the Generalization Gap in Reparameterizable Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Caiming Xiong, Huan Wang, Richard Socher, Stephan Zheng","submitted_at":"2019-05-29T18:05:01Z","abstract_excerpt":"Understanding generalization in reinforcement learning (RL) is a significant challenge, as many common assumptions of traditional supervised learning theory do not apply. We focus on the special class of reparameterizable RL problems, where the trajectory distribution can be decomposed using the reparametrization trick. For this problem class, estimating the expected return is efficient and the trajectory can be computed deterministically given peripheral random variables, which enables us to study reparametrizable RL using supervised learning and transfer learning theory. Through these relati"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.12654","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9i++Sqm9mv/P/BTbHRIH/RK6wXAOUsjqx/wt1q/dP/BnVb4ngbe5e+Br5kZBuXJanxjoxMJLR5fVCaOdjC0lBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T18:06:43.331180Z"},"content_sha256":"bb27c79b2fbdd9fff6a14709cf14b2a88c96b01ad50ef02ca96b88dc5992ea55","schema_version":"1.0","event_id":"sha256:bb27c79b2fbdd9fff6a14709cf14b2a88c96b01ad50ef02ca96b88dc5992ea55"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6H2SVD67XIYZBD3UOCZXL7TGWM/bundle.json","state_url":"https://pith.science/pith/6H2SVD67XIYZBD3UOCZXL7TGWM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6H2SVD67XIYZBD3UOCZXL7TGWM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T18:06:43Z","links":{"resolver":"https://pith.science/pith/6H2SVD67XIYZBD3UOCZXL7TGWM","bundle":"https://pith.science/pith/6H2SVD67XIYZBD3UOCZXL7TGWM/bundle.json","state":"https://pith.science/pith/6H2SVD67XIYZBD3UOCZXL7TGWM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6H2SVD67XIYZBD3UOCZXL7TGWM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:6H2SVD67XIYZBD3UOCZXL7TGWM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0cfe51085a76f9a658ead2110a052711028cf4034a973446e477898b14148bc6","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T18:05:01Z","title_canon_sha256":"9c01ac2a529d206c99a36e788a58b8f1068fc586bb779904ea0305c071d5c6fd"},"schema_version":"1.0","source":{"id":"1905.12654","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1905.12654","created_at":"2026-05-17T23:44:41Z"},{"alias_kind":"arxiv_version","alias_value":"1905.12654v1","created_at":"2026-05-17T23:44:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.12654","created_at":"2026-05-17T23:44:41Z"},{"alias_kind":"pith_short_12","alias_value":"6H2SVD67XIYZ","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"6H2SVD67XIYZBD3U","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"6H2SVD67","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:bb27c79b2fbdd9fff6a14709cf14b2a88c96b01ad50ef02ca96b88dc5992ea55","target":"graph","created_at":"2026-05-17T23:44:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Understanding generalization in reinforcement learning (RL) is a significant challenge, as many common assumptions of traditional supervised learning theory do not apply. We focus on the special class of reparameterizable RL problems, where the trajectory distribution can be decomposed using the reparametrization trick. For this problem class, estimating the expected return is efficient and the trajectory can be computed deterministically given peripheral random variables, which enables us to study reparametrizable RL using supervised learning and transfer learning theory. Through these relati","authors_text":"Caiming Xiong, Huan Wang, Richard Socher, Stephan Zheng","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T18:05:01Z","title":"On the Generalization Gap in Reparameterizable Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.12654","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ff6df8b80dd0ba71d82a4280cd2ea17128653fd91822f87ea1ebce97e09db4a1","target":"record","created_at":"2026-05-17T23:44:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0cfe51085a76f9a658ead2110a052711028cf4034a973446e477898b14148bc6","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-29T18:05:01Z","title_canon_sha256":"9c01ac2a529d206c99a36e788a58b8f1068fc586bb779904ea0305c071d5c6fd"},"schema_version":"1.0","source":{"id":"1905.12654","kind":"arxiv","version":1}},"canonical_sha256":"f1f52a8fdfba31908f7470b375fe66b33935d6e954ac6e2ef14063839170c0f8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f1f52a8fdfba31908f7470b375fe66b33935d6e954ac6e2ef14063839170c0f8","first_computed_at":"2026-05-17T23:44:41.842314Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:41.842314Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EElX5cH8CI+6FXIWMtHByOu6DY+7BGwvyPol2UF0SOa3uEAJoOf9C1s5wQX56OiQtYWQz7O6dwk9/Qt5cNrwDg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:41.842975Z","signed_message":"canonical_sha256_bytes"},"source_id":"1905.12654","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ff6df8b80dd0ba71d82a4280cd2ea17128653fd91822f87ea1ebce97e09db4a1","sha256:bb27c79b2fbdd9fff6a14709cf14b2a88c96b01ad50ef02ca96b88dc5992ea55"],"state_sha256":"6c24d164b28dabe315ecc5615c2ef604d8df56df6955c7c9fb1a9bc6e7c59b4e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"usgfsM7y+H0IOXJlM/izxiJq4lRBuzBPYruUmPMx0pGEgo2Hy8Qly+76X7SAOWvsCq1gmG8+m2ZJdQD198vCDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T18:06:43.333732Z","bundle_sha256":"ebdd0680188beca35501ecaa59872bb31b349cce9c41dee9c5b8e59b81df0876"}}