{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:7AXBKQKIOCJS5OZJVQZBPCUJ5Z","short_pith_number":"pith:7AXBKQKI","schema_version":"1.0","canonical_sha256":"f82e15414870932ebb29ac32178a89ee7b25dc2e17288bf249ef23e621e47fe4","source":{"kind":"arxiv","id":"1506.02640","version":5},"attestation_state":"computed","paper":{"title":"You Only Look Once: Unified, Real-Time Object Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Ali Farhadi, Joseph Redmon, Ross Girshick, Santosh Divvala","submitted_at":"2015-06-08T19:52:52Z","abstract_excerpt":"We present YOLO, a new approach to object detection. Prior work on object detection repurposes classifiers to perform detection. Instead, we frame object detection as a regression problem to spatially separated bounding boxes and associated class probabilities. A single neural network predicts bounding boxes and class probabilities directly from full images in one evaluation. Since the whole detection pipeline is a single network, it can be optimized end-to-end directly on detection performance.\n  Our unified architecture is extremely fast. Our base YOLO model processes images in real-time at "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1506.02640","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-06-08T19:52:52Z","cross_cats_sorted":[],"title_canon_sha256":"a4a232b1bbdd4dd0dbdd7e517fe2e02af0d833a4435c2b7d699ebac2e088c8f8","abstract_canon_sha256":"08c804081fdc34f9f23fdfb064ce0a9203194a89a9739f967780a0ff7a25d04b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:15:15.185235Z","signature_b64":"31rGmF0yGAuE6mTFLZ9AGXh5Y5QkOtuOegOpravkkrElOrvJGOrtMBCFJYRCKBRnfhimu37uLPw/kz5/26KEDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f82e15414870932ebb29ac32178a89ee7b25dc2e17288bf249ef23e621e47fe4","last_reissued_at":"2026-05-18T01:15:15.184583Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:15:15.184583Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"You Only Look Once: Unified, Real-Time Object Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Ali Farhadi, Joseph Redmon, Ross Girshick, Santosh Divvala","submitted_at":"2015-06-08T19:52:52Z","abstract_excerpt":"We present YOLO, a new approach to object detection. Prior work on object detection repurposes classifiers to perform detection. Instead, we frame object detection as a regression problem to spatially separated bounding boxes and associated class probabilities. A single neural network predicts bounding boxes and class probabilities directly from full images in one evaluation. Since the whole detection pipeline is a single network, it can be optimized end-to-end directly on detection performance.\n  Our unified architecture is extremely fast. Our base YOLO model processes images in real-time at "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.02640","kind":"arxiv","version":5},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1506.02640","created_at":"2026-05-18T01:15:15.184701+00:00"},{"alias_kind":"arxiv_version","alias_value":"1506.02640v5","created_at":"2026-05-18T01:15:15.184701+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.02640","created_at":"2026-05-18T01:15:15.184701+00:00"},{"alias_kind":"pith_short_12","alias_value":"7AXBKQKIOCJS","created_at":"2026-05-18T12:29:07.941421+00:00"},{"alias_kind":"pith_short_16","alias_value":"7AXBKQKIOCJS5OZJ","created_at":"2026-05-18T12:29:07.941421+00:00"},{"alias_kind":"pith_short_8","alias_value":"7AXBKQKI","created_at":"2026-05-18T12:29:07.941421+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":19,"internal_anchor_count":9,"sample":[{"citing_arxiv_id":"1906.09266","citing_title":"A Multitask Network for Localization and Recognition of Text in Images","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"1907.00408","citing_title":"GarmNet: Improving Global with Local Perception for Robotic Laundry Folding","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"1907.08752","citing_title":"RobustTP: End-to-End Trajectory Prediction for Heterogeneous Road-Agents in Dense Traffic with Noisy Sensor Inputs","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22200","citing_title":"OSS: Open Suturing Skills Vision-Based Assessment Challenge 2024-2025","ref_index":64,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15551","citing_title":"Characterizing Learning in Deep Neural Networks using Tractable Algorithmic Complexity Analysis","ref_index":77,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18173","citing_title":"Do You Need Text Rectification? Soft Attention Mask Embedding for Rectification-Free Scene Text Spotting","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19866","citing_title":"Structured Layout Priors for Robust Out-of-Distribution Visual Document Understanding","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18310","citing_title":"Comparative blobs and holes dynamics in a tokamak plasma: deep learning analysis of fast imaging data","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2510.01433","citing_title":"AFFORD2ACT: Affordance-Guided Automatic Keypoint Selection for Generalizable and Lightweight Robotic Manipulation","ref_index":39,"is_internal_anchor":true},{"citing_arxiv_id":"2604.01044","citing_title":"A global dataset of continuous urban dashcam driving","ref_index":58,"is_internal_anchor":false},{"citing_arxiv_id":"2604.03401","citing_title":"Can LLMs Reason About Attention? Towards Zero-Shot Analysis of Multimodal Classroom Behavior","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12503","citing_title":"Unveiling Hidden Lyman Alpha Emitters in the DESI DR1 Data","ref_index":65,"is_internal_anchor":false},{"citing_arxiv_id":"2604.00313","citing_title":"Label-efficient underwater species classification with logistic regression on frozen foundation model embeddings","ref_index":21,"is_internal_anchor":false},{"citing_arxiv_id":"2604.13278","citing_title":"DroneScan-YOLO: Redundancy-Aware Lightweight Detection for Tiny Objects in UAV Imagery","ref_index":13,"is_internal_anchor":false},{"citing_arxiv_id":"2604.11042","citing_title":"Improving Layout Representation Learning Across Inconsistently Annotated Datasets via Agentic Harmonization","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08456","citing_title":"Entropy-Gradient Grounding: Training-Free Evidence Retrieval in Vision-Language Models","ref_index":25,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08722","citing_title":"AI Driven Soccer Analysis Using Computer Vision","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.12618","citing_title":"CODO: An Automated Compiler for Comprehensive Dataflow Optimization","ref_index":36,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17530","citing_title":"Real-Time Cellist Postural Evaluation With On-Device Computer Vision","ref_index":14,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z","json":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z.json","graph_json":"https://pith.science/api/pith-number/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/graph.json","events_json":"https://pith.science/api/pith-number/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/events.json","paper":"https://pith.science/paper/7AXBKQKI"},"agent_actions":{"view_html":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z","download_json":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z.json","view_paper":"https://pith.science/paper/7AXBKQKI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1506.02640&json=true","fetch_graph":"https://pith.science/api/pith-number/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/graph.json","fetch_events":"https://pith.science/api/pith-number/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/action/storage_attestation","attest_author":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/action/author_attestation","sign_citation":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/action/citation_signature","submit_replication":"https://pith.science/pith/7AXBKQKIOCJS5OZJVQZBPCUJ5Z/action/replication_record"}},"created_at":"2026-05-18T01:15:15.184701+00:00","updated_at":"2026-05-18T01:15:15.184701+00:00"}