{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:HUTWIJFKICAD3FHCLOBUS42XA7","short_pith_number":"pith:HUTWIJFK","schema_version":"1.0","canonical_sha256":"3d276424aa40803d94e25b8349735707dafc629de098bbd281307c4ea0134a7a","source":{"kind":"arxiv","id":"1808.09105","version":4},"attestation_state":"computed","paper":{"title":"SOLAR: Deep Structured Representations for Model-Based Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Laura Smith, Marvin Zhang, Matthew J. Johnson, Pieter Abbeel, Sergey Levine, Sharad Vikram","submitted_at":"2018-08-28T03:48:25Z","abstract_excerpt":"Model-based reinforcement learning (RL) has proven to be a data efficient approach for learning control tasks but is difficult to utilize in domains with complex observations such as images. In this paper, we present a method for learning representations that are suitable for iterative model-based policy improvement, even when the underlying dynamical system has complex dynamics and image observations, in that these representations are optimized for inferring simple dynamics and cost models given data from the current policy. This enables a model-based RL method based on the linear-quadratic r"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1808.09105","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-08-28T03:48:25Z","cross_cats_sorted":["cs.RO","stat.ML"],"title_canon_sha256":"d115f76c98523f73e25969f6f8666f719428fc44bf48d81ebc5de4554312e5b5","abstract_canon_sha256":"5529a9da1a07c49fee023fae29957d9793f73f6d2faf4c41b9eb3dcd105d265c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:43.081884Z","signature_b64":"TLNrvwFYAGZ22M0jtsEzreIlx2KUu4kpIZVwFUVQRASBDA0bvllR5wRdKu9LhKSxSgcsouBa7YeAR11CEYfvBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3d276424aa40803d94e25b8349735707dafc629de098bbd281307c4ea0134a7a","last_reissued_at":"2026-05-17T23:42:43.081245Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:43.081245Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SOLAR: Deep Structured Representations for Model-Based Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.RO","stat.ML"],"primary_cat":"cs.LG","authors_text":"Laura Smith, Marvin Zhang, Matthew J. Johnson, Pieter Abbeel, Sergey Levine, Sharad Vikram","submitted_at":"2018-08-28T03:48:25Z","abstract_excerpt":"Model-based reinforcement learning (RL) has proven to be a data efficient approach for learning control tasks but is difficult to utilize in domains with complex observations such as images. In this paper, we present a method for learning representations that are suitable for iterative model-based policy improvement, even when the underlying dynamical system has complex dynamics and image observations, in that these representations are optimized for inferring simple dynamics and cost models given data from the current policy. This enables a model-based RL method based on the linear-quadratic r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1808.09105","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1808.09105","created_at":"2026-05-17T23:42:43.081354+00:00"},{"alias_kind":"arxiv_version","alias_value":"1808.09105v4","created_at":"2026-05-17T23:42:43.081354+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1808.09105","created_at":"2026-05-17T23:42:43.081354+00:00"},{"alias_kind":"pith_short_12","alias_value":"HUTWIJFKICAD","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_16","alias_value":"HUTWIJFKICAD3FHC","created_at":"2026-05-18T12:32:28.185984+00:00"},{"alias_kind":"pith_short_8","alias_value":"HUTWIJFK","created_at":"2026-05-18T12:32:28.185984+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"1906.08649","citing_title":"Exploring Model-based Planning with Policy Networks","ref_index":43,"is_internal_anchor":true},{"citing_arxiv_id":"1910.07113","citing_title":"Solving Rubik's Cube with a Robot Hand","ref_index":121,"is_internal_anchor":true},{"citing_arxiv_id":"2005.01643","citing_title":"Offline Reinforcement Learning: Tutorial, Review, and Perspectives on Open Problems","ref_index":157,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7","json":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7.json","graph_json":"https://pith.science/api/pith-number/HUTWIJFKICAD3FHCLOBUS42XA7/graph.json","events_json":"https://pith.science/api/pith-number/HUTWIJFKICAD3FHCLOBUS42XA7/events.json","paper":"https://pith.science/paper/HUTWIJFK"},"agent_actions":{"view_html":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7","download_json":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7.json","view_paper":"https://pith.science/paper/HUTWIJFK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1808.09105&json=true","fetch_graph":"https://pith.science/api/pith-number/HUTWIJFKICAD3FHCLOBUS42XA7/graph.json","fetch_events":"https://pith.science/api/pith-number/HUTWIJFKICAD3FHCLOBUS42XA7/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7/action/storage_attestation","attest_author":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7/action/author_attestation","sign_citation":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7/action/citation_signature","submit_replication":"https://pith.science/pith/HUTWIJFKICAD3FHCLOBUS42XA7/action/replication_record"}},"created_at":"2026-05-17T23:42:43.081354+00:00","updated_at":"2026-05-17T23:42:43.081354+00:00"}