{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:FW3P5GL42MOCZ54XSH7MRVAFFZ","short_pith_number":"pith:FW3P5GL4","schema_version":"1.0","canonical_sha256":"2db6fe997cd31c2cf79791fec8d4052e565bc2a29990bb68a979ae1e960a22cf","source":{"kind":"arxiv","id":"1705.07177","version":2},"attestation_state":"computed","paper":{"title":"Model-Based Planning with Discrete and Continuous Actions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Mikael Henaff, William F. Whitney, Yann LeCun","submitted_at":"2017-05-19T20:38:49Z","abstract_excerpt":"Action planning using learned and differentiable forward models of the world is a general approach which has a number of desirable properties, including improved sample complexity over model-free RL methods, reuse of learned models across different tasks, and the ability to perform efficient gradient-based optimization in continuous action spaces. However, this approach does not apply straightforwardly when the action space is discrete. In this work, we show that it is in fact possible to effectively perform planning via backprop in discrete action spaces, using a simple paramaterization of th"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1705.07177","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2017-05-19T20:38:49Z","cross_cats_sorted":[],"title_canon_sha256":"121b4c9d571688c9b8f371bbfbd970c9d21af36b2a9d64ecc2796e6bdae9eb83","abstract_canon_sha256":"3db628a2f9d67e7a89e608da875adcc7cfac1665afacddbffdade88a47e03ea3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:19:17.230618Z","signature_b64":"IWV2mW7tBcpuNSMpNti/y4oWZPrydguUoa9k1H1f21cfrQuIgTYXX719nRHBtuf4BC6GCRfPT3sDbq1svb9qBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2db6fe997cd31c2cf79791fec8d4052e565bc2a29990bb68a979ae1e960a22cf","last_reissued_at":"2026-05-18T00:19:17.230049Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:19:17.230049Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Model-Based Planning with Discrete and Continuous Actions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Mikael Henaff, William F. Whitney, Yann LeCun","submitted_at":"2017-05-19T20:38:49Z","abstract_excerpt":"Action planning using learned and differentiable forward models of the world is a general approach which has a number of desirable properties, including improved sample complexity over model-free RL methods, reuse of learned models across different tasks, and the ability to perform efficient gradient-based optimization in continuous action spaces. However, this approach does not apply straightforwardly when the action space is discrete. In this work, we show that it is in fact possible to effectively perform planning via backprop in discrete action spaces, using a simple paramaterization of th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.07177","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1705.07177","created_at":"2026-05-18T00:19:17.230147+00:00"},{"alias_kind":"arxiv_version","alias_value":"1705.07177v2","created_at":"2026-05-18T00:19:17.230147+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.07177","created_at":"2026-05-18T00:19:17.230147+00:00"},{"alias_kind":"pith_short_12","alias_value":"FW3P5GL42MOC","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_16","alias_value":"FW3P5GL42MOCZ54X","created_at":"2026-05-18T12:31:15.632608+00:00"},{"alias_kind":"pith_short_8","alias_value":"FW3P5GL4","created_at":"2026-05-18T12:31:15.632608+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2605.04568","citing_title":"Dream-MPC: Gradient-Based Model Predictive Control with Latent Imagination","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21800","citing_title":"stable-worldmodel: A Platform for Reproducible World Modeling Research and Evaluation","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2305.14992","citing_title":"Reasoning with Language Model is Planning with World Model","ref_index":82,"is_internal_anchor":true},{"citing_arxiv_id":"2010.02193","citing_title":"Mastering Atari with Discrete World Models","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"1912.01603","citing_title":"Dream to Control: Learning Behaviors by Latent Imagination","ref_index":22,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04568","citing_title":"Dream-MPC: Gradient-Based Model Predictive Control with Latent Imagination","ref_index":3,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ","json":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ.json","graph_json":"https://pith.science/api/pith-number/FW3P5GL42MOCZ54XSH7MRVAFFZ/graph.json","events_json":"https://pith.science/api/pith-number/FW3P5GL42MOCZ54XSH7MRVAFFZ/events.json","paper":"https://pith.science/paper/FW3P5GL4"},"agent_actions":{"view_html":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ","download_json":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ.json","view_paper":"https://pith.science/paper/FW3P5GL4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1705.07177&json=true","fetch_graph":"https://pith.science/api/pith-number/FW3P5GL42MOCZ54XSH7MRVAFFZ/graph.json","fetch_events":"https://pith.science/api/pith-number/FW3P5GL42MOCZ54XSH7MRVAFFZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ/action/storage_attestation","attest_author":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ/action/author_attestation","sign_citation":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ/action/citation_signature","submit_replication":"https://pith.science/pith/FW3P5GL42MOCZ54XSH7MRVAFFZ/action/replication_record"}},"created_at":"2026-05-18T00:19:17.230147+00:00","updated_at":"2026-05-18T00:19:17.230147+00:00"}