{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:IS6N23UMGZFTRNNME7H34CEVJ6","short_pith_number":"pith:IS6N23UM","schema_version":"1.0","canonical_sha256":"44bcdd6e8c364b38b5ac27cfbe08954fb45bc62c7c51c2724545558b03620304","source":{"kind":"arxiv","id":"1904.04404","version":1},"attestation_state":"computed","paper":{"title":"Embodied Visual Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.RO"],"primary_cat":"cs.CV","authors_text":"David Crandall, Devi Parikh, Dhruv Batra, Jianwei Yang, Mingze Xu, Xinlei Chen, Zhile Ren","submitted_at":"2019-04-09T00:33:17Z","abstract_excerpt":"Passive visual systems typically fail to recognize objects in the amodal setting where they are heavily occluded. In contrast, humans and other embodied agents have the ability to move in the environment, and actively control the viewing angle to better understand object shapes and semantics. In this work, we introduce the task of Embodied Visual Recognition (EVR): An agent is instantiated in a 3D environment close to an occluded target object, and is free to move in the environment to perform object classification, amodal object localization, and amodal object segmentation. To address this, w"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.04404","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-04-09T00:33:17Z","cross_cats_sorted":["cs.AI","cs.LG","cs.RO"],"title_canon_sha256":"efbf825d9e1416886fcae7dd58124cc3336f4f613e57130afef27b8627614af0","abstract_canon_sha256":"d82b54de8133fa5ef4b5f8dbad50b3387dc73f6dbe646e7477c4057b1da6e610"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:48:59.084930Z","signature_b64":"dxe7C1DZ9SD6RV9y6sdBNqHT3LvacnaW2ScvN2dgr/SCb08LRCHOSmiA83tyLjuX90ftq068HZOq+pM/tX9vAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"44bcdd6e8c364b38b5ac27cfbe08954fb45bc62c7c51c2724545558b03620304","last_reissued_at":"2026-05-17T23:48:59.084560Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:48:59.084560Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Embodied Visual Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.RO"],"primary_cat":"cs.CV","authors_text":"David Crandall, Devi Parikh, Dhruv Batra, Jianwei Yang, Mingze Xu, Xinlei Chen, Zhile Ren","submitted_at":"2019-04-09T00:33:17Z","abstract_excerpt":"Passive visual systems typically fail to recognize objects in the amodal setting where they are heavily occluded. In contrast, humans and other embodied agents have the ability to move in the environment, and actively control the viewing angle to better understand object shapes and semantics. In this work, we introduce the task of Embodied Visual Recognition (EVR): An agent is instantiated in a 3D environment close to an occluded target object, and is free to move in the environment to perform object classification, amodal object localization, and amodal object segmentation. To address this, w"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.04404","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.04404","created_at":"2026-05-17T23:48:59.084610+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.04404v1","created_at":"2026-05-17T23:48:59.084610+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.04404","created_at":"2026-05-17T23:48:59.084610+00:00"},{"alias_kind":"pith_short_12","alias_value":"IS6N23UMGZFT","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"IS6N23UMGZFTRNNM","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"IS6N23UM","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2605.08816","citing_title":"Mirror, Mirror on the Wall: Can VLM Agents Tell Who They Are at All?","ref_index":36,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6","json":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6.json","graph_json":"https://pith.science/api/pith-number/IS6N23UMGZFTRNNME7H34CEVJ6/graph.json","events_json":"https://pith.science/api/pith-number/IS6N23UMGZFTRNNME7H34CEVJ6/events.json","paper":"https://pith.science/paper/IS6N23UM"},"agent_actions":{"view_html":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6","download_json":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6.json","view_paper":"https://pith.science/paper/IS6N23UM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.04404&json=true","fetch_graph":"https://pith.science/api/pith-number/IS6N23UMGZFTRNNME7H34CEVJ6/graph.json","fetch_events":"https://pith.science/api/pith-number/IS6N23UMGZFTRNNME7H34CEVJ6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6/action/storage_attestation","attest_author":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6/action/author_attestation","sign_citation":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6/action/citation_signature","submit_replication":"https://pith.science/pith/IS6N23UMGZFTRNNME7H34CEVJ6/action/replication_record"}},"created_at":"2026-05-17T23:48:59.084610+00:00","updated_at":"2026-05-17T23:48:59.084610+00:00"}