{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:KOCTRMCC2MKUCTKZ43RAXR653I","short_pith_number":"pith:KOCTRMCC","schema_version":"1.0","canonical_sha256":"538538b042d315414d59e6e20bc7ddda37f0f95b0a04c413f7c4b78b8f1ae65a","source":{"kind":"arxiv","id":"1903.01959","version":1},"attestation_state":"computed","paper":{"title":"Learning Exploration Policies for Navigation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.RO","authors_text":"Abhinav Gupta, Saurabh Gupta, Tao Chen","submitted_at":"2019-03-05T18:03:47Z","abstract_excerpt":"Numerous past works have tackled the problem of task-driven navigation. But, how to effectively explore a new environment to enable a variety of down-stream tasks has received much less attention. In this work, we study how agents can autonomously explore realistic and complex 3D environments without the context of task-rewards. We propose a learning-based approach and investigate different policy architectures, reward functions, and training paradigms. We find that the use of policies with spatial memory that are bootstrapped with imitation learning and finally finetuned with coverage rewards"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.01959","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.RO","submitted_at":"2019-03-05T18:03:47Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"e301779562f72f981a70f1d83308b15c51d2e63d2282ef9b45bc4576f2315ca3","abstract_canon_sha256":"21febad9cad5cd9717013f8f3e29dc28d5e6f82a1034bb6c214e4b94ae9111c8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:51:59.367126Z","signature_b64":"JaFVeeAkzzXw1bIq1aQw4wi61lbdSQiACcolhJw09BgiN7uW9RkR1Hqx2+awSXSFYlFXD1B4B9tbkExmvYn3Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"538538b042d315414d59e6e20bc7ddda37f0f95b0a04c413f7c4b78b8f1ae65a","last_reissued_at":"2026-05-17T23:51:59.366714Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:51:59.366714Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Exploration Policies for Navigation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.RO","authors_text":"Abhinav Gupta, Saurabh Gupta, Tao Chen","submitted_at":"2019-03-05T18:03:47Z","abstract_excerpt":"Numerous past works have tackled the problem of task-driven navigation. But, how to effectively explore a new environment to enable a variety of down-stream tasks has received much less attention. In this work, we study how agents can autonomously explore realistic and complex 3D environments without the context of task-rewards. We propose a learning-based approach and investigate different policy architectures, reward functions, and training paradigms. We find that the use of policies with spatial memory that are bootstrapped with imitation learning and finally finetuned with coverage rewards"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.01959","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.01959","created_at":"2026-05-17T23:51:59.366781+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.01959v1","created_at":"2026-05-17T23:51:59.366781+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.01959","created_at":"2026-05-17T23:51:59.366781+00:00"},{"alias_kind":"pith_short_12","alias_value":"KOCTRMCC2MKU","created_at":"2026-05-18T12:33:21.387695+00:00"},{"alias_kind":"pith_short_16","alias_value":"KOCTRMCC2MKUCTKZ","created_at":"2026-05-18T12:33:21.387695+00:00"},{"alias_kind":"pith_short_8","alias_value":"KOCTRMCC","created_at":"2026-05-18T12:33:21.387695+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I","json":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I.json","graph_json":"https://pith.science/api/pith-number/KOCTRMCC2MKUCTKZ43RAXR653I/graph.json","events_json":"https://pith.science/api/pith-number/KOCTRMCC2MKUCTKZ43RAXR653I/events.json","paper":"https://pith.science/paper/KOCTRMCC"},"agent_actions":{"view_html":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I","download_json":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I.json","view_paper":"https://pith.science/paper/KOCTRMCC","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.01959&json=true","fetch_graph":"https://pith.science/api/pith-number/KOCTRMCC2MKUCTKZ43RAXR653I/graph.json","fetch_events":"https://pith.science/api/pith-number/KOCTRMCC2MKUCTKZ43RAXR653I/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I/action/timestamp_anchor","attest_storage":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I/action/storage_attestation","attest_author":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I/action/author_attestation","sign_citation":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I/action/citation_signature","submit_replication":"https://pith.science/pith/KOCTRMCC2MKUCTKZ43RAXR653I/action/replication_record"}},"created_at":"2026-05-17T23:51:59.366781+00:00","updated_at":"2026-05-17T23:51:59.366781+00:00"}