{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:B7EP4CEHNWEP6DGCPN37SO53FD","short_pith_number":"pith:B7EP4CEH","schema_version":"1.0","canonical_sha256":"0fc8fe08876d88ff0cc27b77f93bbb28d405c92d7c6a7d51d0f3ab4f0e12c267","source":{"kind":"arxiv","id":"1711.00267","version":2},"attestation_state":"computed","paper":{"title":"Acquiring Target Stacking Skills by Goal-Parameterized Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.LG"],"primary_cat":"cs.RO","authors_text":"Jeannette Bohg, Mario Fritz, Wenbin Li","submitted_at":"2017-11-01T10:04:29Z","abstract_excerpt":"Understanding physical phenomena is a key component of human intelligence and enables physical interaction with previously unseen environments. In this paper, we study how an artificial agent can autonomously acquire this intuition through interaction with the environment. We created a synthetic block stacking environment with physics simulation in which the agent can learn a policy end-to-end through trial and error. Thereby, we bypass to explicitly model physical knowledge within the policy. We are specifically interested in tasks that require the agent to reach a given goal state that may b"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1711.00267","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2017-11-01T10:04:29Z","cross_cats_sorted":["cs.AI","cs.CV","cs.LG"],"title_canon_sha256":"f3645b8781abc3ec8d7ee4e4faafa8bc03732e09ec4a9f8dff63bc28b78dd10b","abstract_canon_sha256":"65776d14b7498272f98d8a894799e305bae4a850437dd32d333774eb0eb34345"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:29:51.136113Z","signature_b64":"zGyklw9X6ExcyuNB0Vrqm2flT8DLoplFAPe9O3G1Hy3pQ7S0LRReYQ22mVgOQW6r2xic7j6suwa2F4VHtBNCAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0fc8fe08876d88ff0cc27b77f93bbb28d405c92d7c6a7d51d0f3ab4f0e12c267","last_reissued_at":"2026-05-18T00:29:51.135572Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:29:51.135572Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Acquiring Target Stacking Skills by Goal-Parameterized Deep Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.LG"],"primary_cat":"cs.RO","authors_text":"Jeannette Bohg, Mario Fritz, Wenbin Li","submitted_at":"2017-11-01T10:04:29Z","abstract_excerpt":"Understanding physical phenomena is a key component of human intelligence and enables physical interaction with previously unseen environments. In this paper, we study how an artificial agent can autonomously acquire this intuition through interaction with the environment. We created a synthetic block stacking environment with physics simulation in which the agent can learn a policy end-to-end through trial and error. Thereby, we bypass to explicitly model physical knowledge within the policy. We are specifically interested in tasks that require the agent to reach a given goal state that may b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.00267","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1711.00267","created_at":"2026-05-18T00:29:51.135646+00:00"},{"alias_kind":"arxiv_version","alias_value":"1711.00267v2","created_at":"2026-05-18T00:29:51.135646+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.00267","created_at":"2026-05-18T00:29:51.135646+00:00"},{"alias_kind":"pith_short_12","alias_value":"B7EP4CEHNWEP","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_16","alias_value":"B7EP4CEHNWEP6DGC","created_at":"2026-05-18T12:31:08.081275+00:00"},{"alias_kind":"pith_short_8","alias_value":"B7EP4CEH","created_at":"2026-05-18T12:31:08.081275+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD","json":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD.json","graph_json":"https://pith.science/api/pith-number/B7EP4CEHNWEP6DGCPN37SO53FD/graph.json","events_json":"https://pith.science/api/pith-number/B7EP4CEHNWEP6DGCPN37SO53FD/events.json","paper":"https://pith.science/paper/B7EP4CEH"},"agent_actions":{"view_html":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD","download_json":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD.json","view_paper":"https://pith.science/paper/B7EP4CEH","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1711.00267&json=true","fetch_graph":"https://pith.science/api/pith-number/B7EP4CEHNWEP6DGCPN37SO53FD/graph.json","fetch_events":"https://pith.science/api/pith-number/B7EP4CEHNWEP6DGCPN37SO53FD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD/action/storage_attestation","attest_author":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD/action/author_attestation","sign_citation":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD/action/citation_signature","submit_replication":"https://pith.science/pith/B7EP4CEHNWEP6DGCPN37SO53FD/action/replication_record"}},"created_at":"2026-05-18T00:29:51.135646+00:00","updated_at":"2026-05-18T00:29:51.135646+00:00"}