{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:ZKUCKW7RUB6BDQIVXZWSPXKUSJ","short_pith_number":"pith:ZKUCKW7R","schema_version":"1.0","canonical_sha256":"caa8255bf1a07c11c115be6d27dd54926a42ce8404039ca9728347695b74beb1","source":{"kind":"arxiv","id":"1905.11946","version":5},"attestation_state":"computed","paper":{"title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Scaling depth, width, and resolution together with one compound coefficient produces more accurate and efficient convolutional networks than scaling any single dimension.","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Mingxing Tan, Quoc V. Le","submitted_at":"2019-05-28T17:05:32Z","abstract_excerpt":"Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.\n  To go eve"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"1905.11946","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-05-28T17:05:32Z","cross_cats_sorted":["cs.CV","stat.ML"],"title_canon_sha256":"5d57ed40bf99234b9f21dec3ed51799244e63fef2bb7a81fafadb22a131f417e","abstract_canon_sha256":"ed3605569195a209763a2b9cfe22d6d49f16a66c354514def888be6f55d12b8d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:47.894555Z","signature_b64":"tkIx2K+tOb2zTRH+t7yanVktIosCwbYOr9X8ZpzVhu8+5CoZef7PEYBD6QutmNsQovpW9junqDpDoem7MGcjBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"caa8255bf1a07c11c115be6d27dd54926a42ce8404039ca9728347695b74beb1","last_reissued_at":"2026-05-17T23:38:47.894023Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:47.894023Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Scaling depth, width, and resolution together with one compound coefficient produces more accurate and efficient convolutional networks than scaling any single dimension.","cross_cats":["cs.CV","stat.ML"],"primary_cat":"cs.LG","authors_text":"Mingxing Tan, Quoc V. Le","submitted_at":"2019-05-28T17:05:32Z","abstract_excerpt":"Convolutional Neural Networks (ConvNets) are commonly developed at a fixed resource budget, and then scaled up for better accuracy if more resources are available. In this paper, we systematically study model scaling and identify that carefully balancing network depth, width, and resolution can lead to better performance. Based on this observation, we propose a new scaling method that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound coefficient. We demonstrate the effectiveness of this method on scaling up MobileNets and ResNet.\n  To go eve"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The scaling ratios found by grid search on the small baseline network remain near-optimal when applied to much larger models and across different datasets and tasks.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"EfficientNet scales network depth, width, and resolution uniformly via a compound coefficient to deliver state-of-the-art accuracy and efficiency on image classification.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Scaling depth, width, and resolution together with one compound coefficient produces more accurate and efficient convolutional networks than scaling any single dimension.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"c600afcf4b0f88b48bdcaaf76357800f3408f65170f7dd1bdc29dc279b4192e9"},"source":{"id":"1905.11946","kind":"arxiv","version":5},"verdict":{"id":"7580ba5b-e30e-4b61-9d1a-f4a6fee8f472","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T12:15:52.068716Z","strongest_claim":"our EfficientNet-B7 achieves state-of-the-art 84.3% top-1 accuracy on ImageNet, while being 8.4x smaller and 6.1x faster on inference than the best existing ConvNet.","one_line_summary":"EfficientNet scales network depth, width, and resolution uniformly via a compound coefficient to deliver state-of-the-art accuracy and efficiency on image classification.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The scaling ratios found by grid search on the small baseline network remain near-optimal when applied to much larger models and across different datasets and tasks.","pith_extraction_headline":"Scaling depth, width, and resolution together with one compound coefficient produces more accurate and efficient convolutional networks than scaling any single dimension."},"references":{"count":52,"sample":[{"doi":"","year":2011,"title":"Berg, T., Liu, J., Woo Lee, S., Alexander, M. L., Jacobs, D. W., and Belhumeur, P. N. Birdsnap: Large-scale fine-grained visual categorization of birds. CVPR, pp.\\ 2011--2018, 2014","work_id":"7e0ed155-6002-40fe-845a-3c5d2e0cb48b","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2014,"title":"Food-101--mining discriminative components with random forests","work_id":"0c135045-d9c9-42d0-a750-3b3b57618704","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2019,"title":"Proxylessnas: Direct neural architecture search on target task and hardware","work_id":"08465e82-fa22-4794-9994-bef8a30900a0","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Xception: Deep learning with depthwise separable convolutions","work_id":"4b286089-2fcf-408c-a148-9b7c981be9d0","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2019,"title":"D., Zoph, B., Mane, D., Vasudevan, V., and Le, Q","work_id":"09a7ad71-285d-47e0-a077-67b96e9c1bb7","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":52,"snapshot_sha256":"acecf4bebb971b1c5af10f33105892479730e53011cad04443c4e95d20692619","internal_anchors":7},"formal_canon":{"evidence_count":3,"snapshot_sha256":"d3b5a4cb994d6c2dbfa4802ba6c71f853ceaa677ebb4320e7c8b4b3836fb1a71"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1905.11946","created_at":"2026-05-17T23:38:47.894125+00:00"},{"alias_kind":"arxiv_version","alias_value":"1905.11946v5","created_at":"2026-05-17T23:38:47.894125+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.11946","created_at":"2026-05-17T23:38:47.894125+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZKUCKW7RUB6B","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZKUCKW7RUB6BDQIV","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZKUCKW7R","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":30,"internal_anchor_count":30,"sample":[{"citing_arxiv_id":"1907.05272","citing_title":"Introduction to Camera Pose Estimation with Deep Learning","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2411.13311","citing_title":"A Resource Efficient Fusion Network for Object Detection in Bird's-Eye View using Camera and Raw Radar Data","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"2504.05679","citing_title":"Event-based Civil Infrastructure Visual Defect Detection: ev-CIVIL Dataset and Benchmark","ref_index":54,"is_internal_anchor":true},{"citing_arxiv_id":"2605.21789","citing_title":"Patch Hierarchical Attention Transformer for Efficient Particle Jet Tagging","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2507.10236","citing_title":"Navigating the Challenges of AI-Generated Image Detection in the Wild: What Truly Matters?","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2509.14165","citing_title":"Where Do Tokens Go? Understanding Pruning Behaviors in STEP at High Resolutions","ref_index":61,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15551","citing_title":"Characterizing Learning in Deep Neural Networks using Tractable Algorithmic Complexity Analysis","ref_index":51,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19520","citing_title":"A Value-added Physical Properties Catalog for Low-redshift Galaxies from DESI Legacy Imaging Surveys DR10","ref_index":110,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15484","citing_title":"When Does Sparse MoE Help in Vision? The Role of Backbone Compute Leverage in Sparse Routing","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2510.21833","citing_title":"Towards Accurate and Efficient Waste Image Classification: A Hybrid Deep Learning and Machine Learning Approach","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2102.01293","citing_title":"Scaling Laws for Transfer","ref_index":80,"is_internal_anchor":true},{"citing_arxiv_id":"2010.01412","citing_title":"Sharpness-Aware Minimization for Efficiently Improving Generalization","ref_index":42,"is_internal_anchor":true},{"citing_arxiv_id":"2601.01041","citing_title":"Generalizable Deepfake Detection Based on Forgery-aware Layer Masking and Multi-artifact Subspace Decomposition","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2604.04086","citing_title":"LAA-X: Unified Localized Artifact Attention for Quality-Agnostic and Generalizable Face Forgery Detection","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2006.07397","citing_title":"The DeepFake Detection Challenge (DFDC) Dataset","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2309.16588","citing_title":"Vision Transformers Need Registers","ref_index":90,"is_internal_anchor":true},{"citing_arxiv_id":"2604.25188","citing_title":"Image Classification via Random Dilated Convolution with Multi-Branch Feature Extraction and Context Excitation","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24426","citing_title":"DYMAPIA: A Multi-Domain Framework for Detecting AI-based Video Manipulation","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2604.24163","citing_title":"Robust Deepfake Detection, NTIRE 2026 Challenge: Report","ref_index":71,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04904","citing_title":"Exploring Clustering Capability of Inpainting Model Embeddings for Pattern-based Individual Identification","ref_index":78,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01563","citing_title":"Multi-Dataset Cross-Domain Knowledge Distillation for Unified Medical Image Segmentation, Classification, and Detection","ref_index":79,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01989","citing_title":"DBLP: Phase-Aware Bounded-Loss Transport for Burst-Resilient Distributed ML Training","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2112.00861","citing_title":"A General Language Assistant as a Laboratory for Alignment","ref_index":110,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06847","citing_title":"SMCNet: Supervised Surface Material Classification Using mmWave Radar IQ Signals and Complex-valued CNNs","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07254","citing_title":"Non-identifiability of Explanations from Model Behavior in Deep Networks of Image Authenticity Judgments","ref_index":8,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":3,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ","json":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ.json","graph_json":"https://pith.science/api/pith-number/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/graph.json","events_json":"https://pith.science/api/pith-number/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/events.json","paper":"https://pith.science/paper/ZKUCKW7R"},"agent_actions":{"view_html":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ","download_json":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ.json","view_paper":"https://pith.science/paper/ZKUCKW7R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1905.11946&json=true","fetch_graph":"https://pith.science/api/pith-number/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/graph.json","fetch_events":"https://pith.science/api/pith-number/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/action/storage_attestation","attest_author":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/action/author_attestation","sign_citation":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/action/citation_signature","submit_replication":"https://pith.science/pith/ZKUCKW7RUB6BDQIVXZWSPXKUSJ/action/replication_record"}},"created_at":"2026-05-17T23:38:47.894125+00:00","updated_at":"2026-05-17T23:38:47.894125+00:00"}