{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:GJAER5VRE2YKI5FC4YVVMI42HS","short_pith_number":"pith:GJAER5VR","schema_version":"1.0","canonical_sha256":"324048f6b126b0a474a2e62b56239a3ca3b51bbebddafe64eef62b2442995e05","source":{"kind":"arxiv","id":"1809.00786","version":2},"attestation_state":"computed","paper":{"title":"Mapping Instructions to Actions in 3D Environments with Visual Goal Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Andrew Bennett, Dipendra Misra, Eyvind Niklasson, Max Shatkhin, Valts Blukis, Yoav Artzi","submitted_at":"2018-09-04T03:36:21Z","abstract_excerpt":"We propose to decompose instruction execution to goal prediction and action generation. We design a model that maps raw visual observations to goals using LINGUNET, a language-conditioned image generation network, and then generates the actions required to complete them. Our model is trained from demonstration only without external resources. To evaluate our approach, we introduce two benchmarks for instruction following: LANI, a navigation task; and CHAI, where an agent executes household instructions. Our evaluation demonstrates the advantages of our model decomposition, and illustrates the "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1809.00786","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-09-04T03:36:21Z","cross_cats_sorted":[],"title_canon_sha256":"55d5a819d39a47440c85a6473e045711ac527c327e7301be528cedba59ae7662","abstract_canon_sha256":"d92ab2b09e86bb1208497a02b22851b4531a0d01cb550234a4862bd0faabd150"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:51:05.506835Z","signature_b64":"CRPTzZSCDzmn3QEIHMKAlH5i6X+x5Uwnn4iQf/vLhP/qx+wV2e3LVLRgGESYiB7n7mVa4miCtaIcHlQ2+HRLCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"324048f6b126b0a474a2e62b56239a3ca3b51bbebddafe64eef62b2442995e05","last_reissued_at":"2026-05-17T23:51:05.506294Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:51:05.506294Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mapping Instructions to Actions in 3D Environments with Visual Goal Prediction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Andrew Bennett, Dipendra Misra, Eyvind Niklasson, Max Shatkhin, Valts Blukis, Yoav Artzi","submitted_at":"2018-09-04T03:36:21Z","abstract_excerpt":"We propose to decompose instruction execution to goal prediction and action generation. We design a model that maps raw visual observations to goals using LINGUNET, a language-conditioned image generation network, and then generates the actions required to complete them. Our model is trained from demonstration only without external resources. To evaluate our approach, we introduce two benchmarks for instruction following: LANI, a navigation task; and CHAI, where an agent executes household instructions. Our evaluation demonstrates the advantages of our model decomposition, and illustrates the "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.00786","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1809.00786","created_at":"2026-05-17T23:51:05.506379+00:00"},{"alias_kind":"arxiv_version","alias_value":"1809.00786v2","created_at":"2026-05-17T23:51:05.506379+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.00786","created_at":"2026-05-17T23:51:05.506379+00:00"},{"alias_kind":"pith_short_12","alias_value":"GJAER5VRE2YK","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_16","alias_value":"GJAER5VRE2YKI5FC","created_at":"2026-05-18T12:32:25.280505+00:00"},{"alias_kind":"pith_short_8","alias_value":"GJAER5VR","created_at":"2026-05-18T12:32:25.280505+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS","json":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS.json","graph_json":"https://pith.science/api/pith-number/GJAER5VRE2YKI5FC4YVVMI42HS/graph.json","events_json":"https://pith.science/api/pith-number/GJAER5VRE2YKI5FC4YVVMI42HS/events.json","paper":"https://pith.science/paper/GJAER5VR"},"agent_actions":{"view_html":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS","download_json":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS.json","view_paper":"https://pith.science/paper/GJAER5VR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1809.00786&json=true","fetch_graph":"https://pith.science/api/pith-number/GJAER5VRE2YKI5FC4YVVMI42HS/graph.json","fetch_events":"https://pith.science/api/pith-number/GJAER5VRE2YKI5FC4YVVMI42HS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS/action/storage_attestation","attest_author":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS/action/author_attestation","sign_citation":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS/action/citation_signature","submit_replication":"https://pith.science/pith/GJAER5VRE2YKI5FC4YVVMI42HS/action/replication_record"}},"created_at":"2026-05-17T23:51:05.506379+00:00","updated_at":"2026-05-17T23:51:05.506379+00:00"}