{"work":{"id":"5c6b13d6-e704-4bf4-9df7-3a3a4d3b6950","openalex_id":null,"doi":null,"arxiv_id":"1505.04597","raw_key":null,"title":"U-Net: Convolutional Networks for Biomedical Image Segmentation","authors":null,"authors_text":"Olaf Ronneberger, Philipp Fischer, Thomas Brox","year":2015,"venue":"cs.CV","abstract":"There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segmentation of neuronal structures in electron microscopic stacks. Using the same network trained on transmitted light microscopy images (phase contrast and DIC) we won the ISBI cell tracking challenge 2015 in these categories by a large margin. Moreover, the network is fast. Segmentation of a 512x512 image takes less than a second on a recent GPU. The full implementation (based on Caffe) and the trained networks are available at http://lmb.informatik.uni-freiburg.de/people/ronneber/u-net .","external_url":"https://arxiv.org/abs/1505.04597","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T11:16:55.521402+00:00","pith_arxiv_id":"1505.04597","created_at":"2026-05-08T18:08:52.955307+00:00","updated_at":"2026-05-25T11:16:55.521402+00:00","title_quality_ok":true,"display_title":"U-Net: Convolutional Networks for Biomedical Image Segmentation","render_title":"U-Net: Convolutional Networks for Biomedical Image Segmentation"},"hub":{"state":{"work_id":"5c6b13d6-e704-4bf4-9df7-3a3a4d3b6950","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":85,"external_cited_by_count":null,"distinct_field_count":19,"first_pith_cited_at":"2019-07-02T13:17:14+00:00","last_pith_cited_at":"2026-05-21T13:43:46+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-05-30T17:21:22.134404+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":11},{"context_role":"method","n":6},{"context_role":"baseline","n":3},{"context_role":"dataset","n":1}],"polarity_counts":[{"context_polarity":"background","n":9},{"context_polarity":"use_method","n":6},{"context_polarity":"baseline","n":3},{"context_polarity":"unclear","n":2},{"context_polarity":"use_dataset","n":1}],"runs":{"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T17:46:22.067285+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","work_id":"e96730e3-129b-4db6-b981-15ab7932e297","shared_citers":9},{"title":"Adam: A Method for Stochastic Optimization","work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","shared_citers":5},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":5},{"title":"Deep Residual Learning for Image Recognition","work_id":"ae9e5671-23e8-4853-82a4-699b5b8dd639","shared_citers":5},{"title":"Denoising Diffusion Implicit Models","work_id":"8fa2128b-d18c-405c-ac92-0e669cf89ac0","shared_citers":5},{"title":"Denoising Diffusion Probabilistic Models","work_id":"dc023f4e-7c79-471c-b713-deeb559ba010","shared_citers":5},{"title":"DINOv2: Learning Robust Visual Features without Supervision","work_id":"26b304e5-b54a-4f26-be7e-83299eca52e4","shared_citers":5},{"title":"Score-Based Generative Modeling through Stochastic Differential Equations","work_id":"d9110e53-a5d4-4794-a4c5-a575e91c31ad","shared_citers":5},{"title":"Segment Anything","work_id":"2bbf46ca-720a-45a1-8e9c-10c33fbeada0","shared_citers":5},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":4},{"title":"Improved techniques for training gans","work_id":"dfc09c9b-e1b2-4d18-8405-6d4ee87a2dd6","shared_citers":4},{"title":"Learning Transferable Visual Models From Natural Language Supervision","work_id":"6de86bb5-27bd-4d5c-8b89-967ebfc52659","shared_citers":4},{"title":"Classifier-Free Diffusion Guidance","work_id":"acf2c588-c088-4a6c-938e-150ad7c666d7","shared_citers":3},{"title":"Deep Unsupervised Learning using Nonequilibrium Thermodynamics","work_id":"986277c3-5997-4593-942c-17cdec737a72","shared_citers":3},{"title":"Group normalization","work_id":"19a9604a-9e6c-4e87-a353-712030317c6d","shared_citers":3},{"title":"Hierarchical Text-Conditional Image Generation with CLIP Latents","work_id":"0c6a768b-70b8-4242-bb0e-459f1008c9fc","shared_citers":3},{"title":"High-Resolution Image Synthesis with Latent Diffusion Models","work_id":"f0270d36-2952-47fb-84c1-95e3ec341126","shared_citers":3},{"title":"Make-A-Video: Text-to-Video Generation without Text-Video Data","work_id":"52a801fc-a707-45a1-a8cd-0d6702f124ab","shared_citers":3},{"title":"Progressive Distillation for Fast Sampling of Diffusion Models","work_id":"fd04f498-ff85-4de3-bcc7-31ef072b2ceb","shared_citers":3},{"title":"URLhttps://doi.org/10.48550/arXiv","work_id":"5c2060c6-427c-4321-be22-49ccae439d80","shared_citers":3},{"title":"","work_id":"eda661aa-a949-4605-b5c1-b915b5ab2c3b","shared_citers":2},{"title":"$\\pi_0$: A Vision-Language-Action Flow Model for General Robot Control","work_id":"f790abdc-a796-482f-a40d-f8ee035ecfc2","shared_citers":2},{"title":"$\\pi_{0.5}$: a Vision-Language-Action Model with Open-World Generalization","work_id":"d1ad7304-d09a-49bc-809e-846439f6aff9","shared_citers":2},{"title":"A Generalist Agent","work_id":"4b0a87cd-8d54-4abc-9698-c4cb20995600","shared_citers":2}],"time_series":[{"n":2,"year":2023},{"n":40,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T17:46:19.556389+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T17:46:14.619370+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"U-Net: Convolutional Networks for Biomedical Image Segmentation","claims":[{"claim_text":"There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segme","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks U-Net: Convolutional Networks for Biomedical Image Segmentation because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T17:46:17.034434+00:00"}},"summary":{"title":"U-Net: Convolutional Networks for Biomedical Image Segmentation","claims":[{"claim_text":"There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segme","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks U-Net: Convolutional Networks for Biomedical Image Segmentation because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","work_id":"e96730e3-129b-4db6-b981-15ab7932e297","shared_citers":9},{"title":"Adam: A Method for Stochastic Optimization","work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","shared_citers":5},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":5},{"title":"Deep Residual Learning for Image Recognition","work_id":"ae9e5671-23e8-4853-82a4-699b5b8dd639","shared_citers":5},{"title":"Denoising Diffusion Implicit Models","work_id":"8fa2128b-d18c-405c-ac92-0e669cf89ac0","shared_citers":5},{"title":"Denoising Diffusion Probabilistic Models","work_id":"dc023f4e-7c79-471c-b713-deeb559ba010","shared_citers":5},{"title":"DINOv2: Learning Robust Visual Features without Supervision","work_id":"26b304e5-b54a-4f26-be7e-83299eca52e4","shared_citers":5},{"title":"Score-Based Generative Modeling through Stochastic Differential Equations","work_id":"d9110e53-a5d4-4794-a4c5-a575e91c31ad","shared_citers":5},{"title":"Segment Anything","work_id":"2bbf46ca-720a-45a1-8e9c-10c33fbeada0","shared_citers":5},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":4},{"title":"Improved techniques for training gans","work_id":"dfc09c9b-e1b2-4d18-8405-6d4ee87a2dd6","shared_citers":4},{"title":"Learning Transferable Visual Models From Natural Language Supervision","work_id":"6de86bb5-27bd-4d5c-8b89-967ebfc52659","shared_citers":4},{"title":"Classifier-Free Diffusion Guidance","work_id":"acf2c588-c088-4a6c-938e-150ad7c666d7","shared_citers":3},{"title":"Deep Unsupervised Learning using Nonequilibrium Thermodynamics","work_id":"986277c3-5997-4593-942c-17cdec737a72","shared_citers":3},{"title":"Group normalization","work_id":"19a9604a-9e6c-4e87-a353-712030317c6d","shared_citers":3},{"title":"Hierarchical Text-Conditional Image Generation with CLIP Latents","work_id":"0c6a768b-70b8-4242-bb0e-459f1008c9fc","shared_citers":3},{"title":"High-Resolution Image Synthesis with Latent Diffusion Models","work_id":"f0270d36-2952-47fb-84c1-95e3ec341126","shared_citers":3},{"title":"Make-A-Video: Text-to-Video Generation without Text-Video Data","work_id":"52a801fc-a707-45a1-a8cd-0d6702f124ab","shared_citers":3},{"title":"Progressive Distillation for Fast Sampling of Diffusion Models","work_id":"fd04f498-ff85-4de3-bcc7-31ef072b2ceb","shared_citers":3},{"title":"URLhttps://doi.org/10.48550/arXiv","work_id":"5c2060c6-427c-4321-be22-49ccae439d80","shared_citers":3},{"title":"","work_id":"eda661aa-a949-4605-b5c1-b915b5ab2c3b","shared_citers":2},{"title":"$\\pi_0$: A Vision-Language-Action Flow Model for General Robot Control","work_id":"f790abdc-a796-482f-a40d-f8ee035ecfc2","shared_citers":2},{"title":"$\\pi_{0.5}$: a Vision-Language-Action Model with Open-World Generalization","work_id":"d1ad7304-d09a-49bc-809e-846439f6aff9","shared_citers":2},{"title":"A Generalist Agent","work_id":"4b0a87cd-8d54-4abc-9698-c4cb20995600","shared_citers":2}],"time_series":[{"n":2,"year":2023},{"n":40,"year":2026}],"dependency_candidates":[]},"authors":[]}}