{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:GCLR5BUPG53JTRPDK75QJHPFPQ","short_pith_number":"pith:GCLR5BUP","schema_version":"1.0","canonical_sha256":"30971e868f377699c5e357fb049de57c26a79d882f84a3d72259b968ae27041c","source":{"kind":"arxiv","id":"1901.03035","version":1},"attestation_state":"computed","paper":{"title":"Self-Monitoring Navigation Agent via Auxiliary Progress Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.CV","cs.RO"],"primary_cat":"cs.AI","authors_text":"Caiming Xiong, Chih-Yao Ma, Ghassan AlRegib, Jiasen Lu, Richard Socher, Zsolt Kira, Zuxuan Wu","submitted_at":"2019-01-10T06:46:50Z","abstract_excerpt":"The Vision-and-Language Navigation (VLN) task entails an agent following navigational instruction in photo-realistic unknown environments. This challenging task demands that the agent be aware of which instruction was completed, which instruction is needed next, which way to go, and its navigation progress towards the goal. In this paper, we introduce a self-monitoring agent with two complementary components: (1) visual-textual co-grounding module to locate the instruction completed in the past, the instruction required for the next action, and the next moving direction from surrounding images"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.03035","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2019-01-10T06:46:50Z","cross_cats_sorted":["cs.CL","cs.CV","cs.RO"],"title_canon_sha256":"35e1bd6f2dbecac2960501ff88e4275d9350fd90948e3f1c63afb2d3d157ac5e","abstract_canon_sha256":"b6a609ebcb6930aa93c6bd2737eb3f409350adc16ecba1fb62435672d7e0a0c8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:56:35.746646Z","signature_b64":"Rfw5mVWK4eO64kOCzhqIS7+iq12WwOKllo+vaDmMcNC9DZQRxz7XYTLbNR+/3F2sjx7eOb29gLw8C+xKznKDCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"30971e868f377699c5e357fb049de57c26a79d882f84a3d72259b968ae27041c","last_reissued_at":"2026-05-17T23:56:35.746046Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:56:35.746046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Self-Monitoring Navigation Agent via Auxiliary Progress Estimation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.CV","cs.RO"],"primary_cat":"cs.AI","authors_text":"Caiming Xiong, Chih-Yao Ma, Ghassan AlRegib, Jiasen Lu, Richard Socher, Zsolt Kira, Zuxuan Wu","submitted_at":"2019-01-10T06:46:50Z","abstract_excerpt":"The Vision-and-Language Navigation (VLN) task entails an agent following navigational instruction in photo-realistic unknown environments. This challenging task demands that the agent be aware of which instruction was completed, which instruction is needed next, which way to go, and its navigation progress towards the goal. In this paper, we introduce a self-monitoring agent with two complementary components: (1) visual-textual co-grounding module to locate the instruction completed in the past, the instruction required for the next action, and the next moving direction from surrounding images"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.03035","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.03035","created_at":"2026-05-17T23:56:35.746126+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.03035v1","created_at":"2026-05-17T23:56:35.746126+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.03035","created_at":"2026-05-17T23:56:35.746126+00:00"},{"alias_kind":"pith_short_12","alias_value":"GCLR5BUPG53J","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"GCLR5BUPG53JTRPD","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"GCLR5BUP","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.16899","citing_title":"LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2402.15852","citing_title":"NaVid: Video-based VLM Plans the Next Step for Vision-and-Language Navigation","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27620","citing_title":"SpaAct: Spatially-Activated Transition Learning with Curriculum Adaptation for Vision-Language Navigation","ref_index":49,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ","json":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ.json","graph_json":"https://pith.science/api/pith-number/GCLR5BUPG53JTRPDK75QJHPFPQ/graph.json","events_json":"https://pith.science/api/pith-number/GCLR5BUPG53JTRPDK75QJHPFPQ/events.json","paper":"https://pith.science/paper/GCLR5BUP"},"agent_actions":{"view_html":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ","download_json":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ.json","view_paper":"https://pith.science/paper/GCLR5BUP","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.03035&json=true","fetch_graph":"https://pith.science/api/pith-number/GCLR5BUPG53JTRPDK75QJHPFPQ/graph.json","fetch_events":"https://pith.science/api/pith-number/GCLR5BUPG53JTRPDK75QJHPFPQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ/action/storage_attestation","attest_author":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ/action/author_attestation","sign_citation":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ/action/citation_signature","submit_replication":"https://pith.science/pith/GCLR5BUPG53JTRPDK75QJHPFPQ/action/replication_record"}},"created_at":"2026-05-17T23:56:35.746126+00:00","updated_at":"2026-05-17T23:56:35.746126+00:00"}