{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:D77MXYZZJH6VSOBBA7BW2RZD4F","short_pith_number":"pith:D77MXYZZ","canonical_record":{"source":{"id":"1906.04355","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-11T02:31:38Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"17ff3bb60a48cae1c96f790faf15743953ca56fef3f4321f9a663b266d0f110e","abstract_canon_sha256":"2dea3d69fcc5be717b7b3bc436ff66be6791296895a9ab37f7b676f3b98c5158"},"schema_version":"1.0"},"canonical_sha256":"1ffecbe33949fd59382107c36d4723e169b61128a11da62648960370b8521c1f","source":{"kind":"arxiv","id":"1906.04355","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.04355","created_at":"2026-05-17T23:43:39Z"},{"alias_kind":"arxiv_version","alias_value":"1906.04355v1","created_at":"2026-05-17T23:43:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.04355","created_at":"2026-05-17T23:43:39Z"},{"alias_kind":"pith_short_12","alias_value":"D77MXYZZJH6V","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"D77MXYZZJH6VSOBB","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"D77MXYZZ","created_at":"2026-05-18T12:33:15Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:D77MXYZZJH6VSOBBA7BW2RZD4F","target":"record","payload":{"canonical_record":{"source":{"id":"1906.04355","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-11T02:31:38Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"17ff3bb60a48cae1c96f790faf15743953ca56fef3f4321f9a663b266d0f110e","abstract_canon_sha256":"2dea3d69fcc5be717b7b3bc436ff66be6791296895a9ab37f7b676f3b98c5158"},"schema_version":"1.0"},"canonical_sha256":"1ffecbe33949fd59382107c36d4723e169b61128a11da62648960370b8521c1f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:39.570781Z","signature_b64":"DY93wwn7SFz6PrqOf9ptMYSE71VjAcMhU+3hSBsNVF1YIcGhgAbMOOKYY2cn5vbfra0b88Izul7hv04nsonvAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1ffecbe33949fd59382107c36d4723e169b61128a11da62648960370b8521c1f","last_reissued_at":"2026-05-17T23:43:39.570221Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:39.570221Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.04355","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"o6DSlYcdvn0I8H+MZzudfhZQUxqETyS3QvYNUN5O6LMXPb9dycQsnNXVzNQnlRMNEGyjIRdrhn0q9hvCINn6DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T06:12:35.485939Z"},"content_sha256":"51045fa203d39083ae568baee0fe6feae2c410b34ee74f2f73a7892073204c96","schema_version":"1.0","event_id":"sha256:51045fa203d39083ae568baee0fe6feae2c410b34ee74f2f73a7892073204c96"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:D77MXYZZJH6VSOBBA7BW2RZD4F","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Powerful Policies by Using Consistent Dynamics Model","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Anirudh Goyal, Jian Tang, Sergey Levine, Shagun Sodhani, Tristan Deleu, Yoshua Bengio","submitted_at":"2019-06-11T02:31:38Z","abstract_excerpt":"Model-based Reinforcement Learning approaches have the promise of being sample efficient. Much of the progress in learning dynamics models in RL has been made by learning models via supervised learning. But traditional model-based approaches lead to `compounding errors' when the model is unrolled step by step. Essentially, the state transitions that the learner predicts (by unrolling the model for multiple steps) and the state transitions that the learner experiences (by acting in the environment) may not be consistent. There is enough evidence that humans build a model of the environment, not"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.04355","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UepN3lZ2qQyK+bQOi+Mp56/7bXwVPX2galwcVu8Nmr9xzB1EEnsU/u9kCmX4REh3Yfh1JKX2skUMekzb1XCzBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T06:12:35.486639Z"},"content_sha256":"9829652ab869e3aa55474edc3316ed34a1a9b796f218a8be0b819d64dbed4368","schema_version":"1.0","event_id":"sha256:9829652ab869e3aa55474edc3316ed34a1a9b796f218a8be0b819d64dbed4368"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/D77MXYZZJH6VSOBBA7BW2RZD4F/bundle.json","state_url":"https://pith.science/pith/D77MXYZZJH6VSOBBA7BW2RZD4F/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/D77MXYZZJH6VSOBBA7BW2RZD4F/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T06:12:35Z","links":{"resolver":"https://pith.science/pith/D77MXYZZJH6VSOBBA7BW2RZD4F","bundle":"https://pith.science/pith/D77MXYZZJH6VSOBBA7BW2RZD4F/bundle.json","state":"https://pith.science/pith/D77MXYZZJH6VSOBBA7BW2RZD4F/state.json","well_known_bundle":"https://pith.science/.well-known/pith/D77MXYZZJH6VSOBBA7BW2RZD4F/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:D77MXYZZJH6VSOBBA7BW2RZD4F","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2dea3d69fcc5be717b7b3bc436ff66be6791296895a9ab37f7b676f3b98c5158","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-11T02:31:38Z","title_canon_sha256":"17ff3bb60a48cae1c96f790faf15743953ca56fef3f4321f9a663b266d0f110e"},"schema_version":"1.0","source":{"id":"1906.04355","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.04355","created_at":"2026-05-17T23:43:39Z"},{"alias_kind":"arxiv_version","alias_value":"1906.04355v1","created_at":"2026-05-17T23:43:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.04355","created_at":"2026-05-17T23:43:39Z"},{"alias_kind":"pith_short_12","alias_value":"D77MXYZZJH6V","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"D77MXYZZJH6VSOBB","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"D77MXYZZ","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:9829652ab869e3aa55474edc3316ed34a1a9b796f218a8be0b819d64dbed4368","target":"graph","created_at":"2026-05-17T23:43:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Model-based Reinforcement Learning approaches have the promise of being sample efficient. Much of the progress in learning dynamics models in RL has been made by learning models via supervised learning. But traditional model-based approaches lead to `compounding errors' when the model is unrolled step by step. Essentially, the state transitions that the learner predicts (by unrolling the model for multiple steps) and the state transitions that the learner experiences (by acting in the environment) may not be consistent. There is enough evidence that humans build a model of the environment, not","authors_text":"Anirudh Goyal, Jian Tang, Sergey Levine, Shagun Sodhani, Tristan Deleu, Yoshua Bengio","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-11T02:31:38Z","title":"Learning Powerful Policies by Using Consistent Dynamics Model"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.04355","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:51045fa203d39083ae568baee0fe6feae2c410b34ee74f2f73a7892073204c96","target":"record","created_at":"2026-05-17T23:43:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2dea3d69fcc5be717b7b3bc436ff66be6791296895a9ab37f7b676f3b98c5158","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-11T02:31:38Z","title_canon_sha256":"17ff3bb60a48cae1c96f790faf15743953ca56fef3f4321f9a663b266d0f110e"},"schema_version":"1.0","source":{"id":"1906.04355","kind":"arxiv","version":1}},"canonical_sha256":"1ffecbe33949fd59382107c36d4723e169b61128a11da62648960370b8521c1f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1ffecbe33949fd59382107c36d4723e169b61128a11da62648960370b8521c1f","first_computed_at":"2026-05-17T23:43:39.570221Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:43:39.570221Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DY93wwn7SFz6PrqOf9ptMYSE71VjAcMhU+3hSBsNVF1YIcGhgAbMOOKYY2cn5vbfra0b88Izul7hv04nsonvAQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:43:39.570781Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.04355","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:51045fa203d39083ae568baee0fe6feae2c410b34ee74f2f73a7892073204c96","sha256:9829652ab869e3aa55474edc3316ed34a1a9b796f218a8be0b819d64dbed4368"],"state_sha256":"c23ce515a135252f99758a30cbcb35bb8287d815f172a372bdc3041748f4e454"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZrIR6Z937YBastK82dye8aOKRRx2WWR9fxprAZf8TcgC9FN4OIla4TXb19A8gbyMsqD7B6JQ4JJAS41at4oZAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T06:12:35.490080Z","bundle_sha256":"18540e446f34978bcd5cfadd807a9c4080f0770dd62e15727ef549eec5494995"}}