{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:FEZ2K7FDEH255PUEYP4VUAELEH","short_pith_number":"pith:FEZ2K7FD","schema_version":"1.0","canonical_sha256":"2933a57ca321f5debe84c3f95a008b21d31e96c5e208b57cdbc1a239e5996e7d","source":{"kind":"arxiv","id":"2412.09991","version":1},"attestation_state":"computed","paper":{"title":"Visual Object Tracking across Diverse Data Modalities: A Review","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Guang Dai, Jiazheng Xing, Jingdong Wang, Mengmeng Wang, Shuo Xin, Teli Ma, Xiaojun Hou, Yong Liu","submitted_at":"2024-12-13T09:25:18Z","abstract_excerpt":"Visual Object Tracking (VOT) is an attractive and significant research area in computer vision, which aims to recognize and track specific targets in video sequences where the target objects are arbitrary and class-agnostic. The VOT technology could be applied in various scenarios, processing data of diverse modalities such as RGB, thermal infrared and point cloud. Besides, since no one sensor could handle all the dynamic and varying environments, multi-modal VOT is also investigated. This paper presents a comprehensive survey of the recent progress of both single-modal and multi-modal VOT, es"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2412.09991","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2024-12-13T09:25:18Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a16c7a911a709bd3cd1bd2162b32cd28abaf5cbaceef84a6f04964a8e799fb84","abstract_canon_sha256":"b77e64a11c393d9eede92400f6c67191790d9b54fee80656cfbbca77bb44352e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T09:48:45.129954Z","signature_b64":"yJSPLAtcD5h3JjbbGZlUgquDeze2GGAsXa7H/fFZ8MzIGdcRkc01hJxIyPvsQV+kmMetFgWwMEsdsJAvHBgoCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2933a57ca321f5debe84c3f95a008b21d31e96c5e208b57cdbc1a239e5996e7d","last_reissued_at":"2026-07-05T09:48:45.129460Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T09:48:45.129460Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Visual Object Tracking across Diverse Data Modalities: A Review","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Guang Dai, Jiazheng Xing, Jingdong Wang, Mengmeng Wang, Shuo Xin, Teli Ma, Xiaojun Hou, Yong Liu","submitted_at":"2024-12-13T09:25:18Z","abstract_excerpt":"Visual Object Tracking (VOT) is an attractive and significant research area in computer vision, which aims to recognize and track specific targets in video sequences where the target objects are arbitrary and class-agnostic. The VOT technology could be applied in various scenarios, processing data of diverse modalities such as RGB, thermal infrared and point cloud. Besides, since no one sensor could handle all the dynamic and varying environments, multi-modal VOT is also investigated. This paper presents a comprehensive survey of the recent progress of both single-modal and multi-modal VOT, es"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2412.09991","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2412.09991/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2412.09991","created_at":"2026-07-05T09:48:45.129513+00:00"},{"alias_kind":"arxiv_version","alias_value":"2412.09991v1","created_at":"2026-07-05T09:48:45.129513+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2412.09991","created_at":"2026-07-05T09:48:45.129513+00:00"},{"alias_kind":"pith_short_12","alias_value":"FEZ2K7FDEH25","created_at":"2026-07-05T09:48:45.129513+00:00"},{"alias_kind":"pith_short_16","alias_value":"FEZ2K7FDEH255PUE","created_at":"2026-07-05T09:48:45.129513+00:00"},{"alias_kind":"pith_short_8","alias_value":"FEZ2K7FD","created_at":"2026-07-05T09:48:45.129513+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH","json":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH.json","graph_json":"https://pith.science/api/pith-number/FEZ2K7FDEH255PUEYP4VUAELEH/graph.json","events_json":"https://pith.science/api/pith-number/FEZ2K7FDEH255PUEYP4VUAELEH/events.json","paper":"https://pith.science/paper/FEZ2K7FD"},"agent_actions":{"view_html":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH","download_json":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH.json","view_paper":"https://pith.science/paper/FEZ2K7FD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2412.09991&json=true","fetch_graph":"https://pith.science/api/pith-number/FEZ2K7FDEH255PUEYP4VUAELEH/graph.json","fetch_events":"https://pith.science/api/pith-number/FEZ2K7FDEH255PUEYP4VUAELEH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH/action/storage_attestation","attest_author":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH/action/author_attestation","sign_citation":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH/action/citation_signature","submit_replication":"https://pith.science/pith/FEZ2K7FDEH255PUEYP4VUAELEH/action/replication_record"}},"created_at":"2026-07-05T09:48:45.129513+00:00","updated_at":"2026-07-05T09:48:45.129513+00:00"}