{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:ZEMSOJBUS6GSI6PNC7NETZ2M42","short_pith_number":"pith:ZEMSOJBU","schema_version":"1.0","canonical_sha256":"c919272434978d2479ed17da49e74ce6b3f1afc5277b18bc4da443972cde08d2","source":{"kind":"arxiv","id":"1804.07437","version":2},"attestation_state":"computed","paper":{"title":"Vision Meets Drones: A Challenge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Haibin Ling, Longyin Wen, Pengfei Zhu, Qinghua Hu, Xiao Bian","submitted_at":"2018-04-20T03:19:21Z","abstract_excerpt":"In this paper we present a large-scale visual object detection and tracking benchmark, named VisDrone2018, aiming at advancing visual understanding tasks on the drone platform. The images and video sequences in the benchmark were captured over various urban/suburban areas of 14 different cities across China from north to south. Specifically, VisDrone2018 consists of 263 video clips and 10,209 images (no overlap with video clips) with rich annotations, including object bounding boxes, object categories, occlusion, truncation ratios, etc. With intensive amount of effort, our benchmark has more t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1804.07437","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-04-20T03:19:21Z","cross_cats_sorted":[],"title_canon_sha256":"ae30626cf1dfb6d7abd26703f3531222bc23f09e93ed45f9949779c7ef73cbd1","abstract_canon_sha256":"6fd314bbfe5929b919efe5a7d0265de327a178c59a5bb6c319f5f1df02f7e415"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:17:52.914652Z","signature_b64":"z8muc1p1qyotkcX0OmKeeqx6WZsg8x5nWPMQP2dQO7UmcS0ZudDLOi+S3ooHLoX0AWy8H9tkk2nvs5h6EQQWAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c919272434978d2479ed17da49e74ce6b3f1afc5277b18bc4da443972cde08d2","last_reissued_at":"2026-05-18T00:17:52.914045Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:17:52.914045Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Vision Meets Drones: A Challenge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Haibin Ling, Longyin Wen, Pengfei Zhu, Qinghua Hu, Xiao Bian","submitted_at":"2018-04-20T03:19:21Z","abstract_excerpt":"In this paper we present a large-scale visual object detection and tracking benchmark, named VisDrone2018, aiming at advancing visual understanding tasks on the drone platform. The images and video sequences in the benchmark were captured over various urban/suburban areas of 14 different cities across China from north to south. Specifically, VisDrone2018 consists of 263 video clips and 10,209 images (no overlap with video clips) with rich annotations, including object bounding boxes, object categories, occlusion, truncation ratios, etc. With intensive amount of effort, our benchmark has more t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1804.07437","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1804.07437","created_at":"2026-05-18T00:17:52.914126+00:00"},{"alias_kind":"arxiv_version","alias_value":"1804.07437v2","created_at":"2026-05-18T00:17:52.914126+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1804.07437","created_at":"2026-05-18T00:17:52.914126+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZEMSOJBUS6GS","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZEMSOJBUS6GSI6PN","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZEMSOJBU","created_at":"2026-05-18T12:33:07.085635+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.13131","citing_title":"ERPPO: Entropy Regularization-based Proximal Policy Optimization","ref_index":235,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07991","citing_title":"MotionScape: A Large-Scale Real-World Highly Dynamic UAV Video Dataset for World Models","ref_index":36,"is_internal_anchor":false},{"citing_arxiv_id":"2604.15670","citing_title":"PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation","ref_index":63,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42","json":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42.json","graph_json":"https://pith.science/api/pith-number/ZEMSOJBUS6GSI6PNC7NETZ2M42/graph.json","events_json":"https://pith.science/api/pith-number/ZEMSOJBUS6GSI6PNC7NETZ2M42/events.json","paper":"https://pith.science/paper/ZEMSOJBU"},"agent_actions":{"view_html":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42","download_json":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42.json","view_paper":"https://pith.science/paper/ZEMSOJBU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1804.07437&json=true","fetch_graph":"https://pith.science/api/pith-number/ZEMSOJBUS6GSI6PNC7NETZ2M42/graph.json","fetch_events":"https://pith.science/api/pith-number/ZEMSOJBUS6GSI6PNC7NETZ2M42/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42/action/storage_attestation","attest_author":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42/action/author_attestation","sign_citation":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42/action/citation_signature","submit_replication":"https://pith.science/pith/ZEMSOJBUS6GSI6PNC7NETZ2M42/action/replication_record"}},"created_at":"2026-05-18T00:17:52.914126+00:00","updated_at":"2026-05-18T00:17:52.914126+00:00"}