{"work":{"id":"4e842f69-fa99-4dde-b8a7-b5a558d4c80b","openalex_id":null,"doi":null,"arxiv_id":"2203.03605","raw_key":null,"title":"DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection","authors":null,"authors_text":"Hao Zhang, Feng Li, Shilong Liu, Lei Zhang, Hang Su, Jun Zhu","year":2022,"venue":"cs.CV","abstract":"We present DINO (\\textbf{D}ETR with \\textbf{I}mproved de\\textbf{N}oising anch\\textbf{O}r boxes), a state-of-the-art end-to-end object detector. % in this paper. DINO improves over previous DETR-like models in performance and efficiency by using a contrastive way for denoising training, a mixed query selection method for anchor initialization, and a look forward twice scheme for box prediction. DINO achieves $49.4$AP in $12$ epochs and $51.3$AP in $24$ epochs on COCO with a ResNet-50 backbone and multi-scale features, yielding a significant improvement of $\\textbf{+6.0}$\\textbf{AP} and $\\textbf{+2.7}$\\textbf{AP}, respectively, compared to DN-DETR, the previous best DETR-like model. DINO scales well in both model size and data size. Without bells and whistles, after pre-training on the Objects365 dataset with a SwinL backbone, DINO obtains the best results on both COCO \\texttt{val2017} ($\\textbf{63.2}$\\textbf{AP}) and \\texttt{test-dev} (\\textbf{$\\textbf{63.3}$AP}). Compared to other models on the leaderboard, DINO significantly reduces its model size and pre-training data size while achieving better results. Our code will be available at \\url{https://github.com/IDEACVR/DINO}.","external_url":"https://arxiv.org/abs/2203.03605","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-23T19:05:47.041825+00:00","pith_arxiv_id":"2203.03605","created_at":"2026-05-09T06:25:46.342722+00:00","updated_at":"2026-05-23T19:05:47.041825+00:00","title_quality_ok":true,"display_title":"DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection","render_title":"DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection"},"hub":{"state":{"work_id":"4e842f69-fa99-4dde-b8a7-b5a558d4c80b","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":52,"external_cited_by_count":null,"distinct_field_count":5,"first_pith_cited_at":"2023-06-23T15:21:52+00:00","last_pith_cited_at":"2026-05-20T09:49:04+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-24T03:24:41.923067+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"method","n":6},{"context_role":"background","n":4},{"context_role":"baseline","n":1}],"polarity_counts":[{"context_polarity":"background","n":5},{"context_polarity":"use_method","n":5},{"context_polarity":"baseline","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}