{"work":{"id":"4b5c8148-382f-461b-8caf-6da2a9acbef5","openalex_id":null,"doi":null,"arxiv_id":"1312.6034","raw_key":null,"title":"Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps","authors":null,"authors_text":"Karen Simonyan, Andrea Vedaldi, Andrew Zisserman","year":2013,"venue":"cs.CV","abstract":"This paper addresses the visualisation of image classification models, learnt using deep Convolutional Networks (ConvNets). We consider two visualisation techniques, based on computing the gradient of the class score with respect to the input image. The first one generates an image, which maximises the class score [Erhan et al., 2009], thus visualising the notion of the class, captured by a ConvNet. The second technique computes a class saliency map, specific to a given image and class. We show that such maps can be employed for weakly supervised object segmentation using classification ConvNets. Finally, we establish the connection between the gradient-based ConvNet visualisation methods and deconvolutional networks [Zeiler et al., 2013].","external_url":"https://arxiv.org/abs/1312.6034","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T17:17:05.374242+00:00","pith_arxiv_id":"1312.6034","created_at":"2026-05-09T06:00:36.987234+00:00","updated_at":"2026-06-05T21:23:00.469572+00:00","title_quality_ok":true,"display_title":"Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps","render_title":"Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps"},"hub":{"state":{"work_id":"4b5c8148-382f-461b-8caf-6da2a9acbef5","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":86,"external_cited_by_count":null,"distinct_field_count":20,"first_pith_cited_at":"2019-06-24T23:08:03+00:00","last_pith_cited_at":"2026-05-21T17:34:39+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-11T09:07:37.683175+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":10},{"context_role":"method","n":1}],"polarity_counts":[{"context_polarity":"background","n":9},{"context_polarity":"unclear","n":1},{"context_polarity":"use_method","n":1}],"runs":{"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T18:10:18.127604+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"& Wattenberg, M","work_id":"9fe7734e-4726-441f-abc1-88bbce75815c","shared_citers":8},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":4},{"title":"Adam: A Method for Stochastic Optimization","work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","shared_citers":3},{"title":"Distilling the Knowledge in a Neural Network","work_id":"d927ab1f-17b8-4002-9d09-c3d55764fbad","shared_citers":3},{"title":"Towards A Rigorous Science of Interpretable Machine Learning","work_id":"45958f3f-1e35-4e8a-8ed0-e3989a6c8be5","shared_citers":3},{"title":"Toy Models of Superposition","work_id":"43875dbe-bc2d-4ab5-af63-744411533ff7","shared_citers":3},{"title":"Use hirescam instead of grad-cam for faithful explanations of convolutional neural networks","work_id":"d66a0052-0057-4828-89d9-d6d16e8281c8","shared_citers":3},{"title":"A consistent and efficient evaluation strategy for attribution methods","work_id":"4f27f376-9170-406c-87f9-1f9ebaa52c5c","shared_citers":2},{"title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","work_id":"e96730e3-129b-4db6-b981-15ab7932e297","shared_citers":2},{"title":"A primer on the inner workings of transformer-based language models","work_id":"e2a1ac9c-a52e-4b96-8aea-1eb401e09698","shared_citers":2},{"title":"Archetypal sae: Adaptive and stable dictionary learn- ing for concept extraction in large vision models","work_id":"c2a41b7d-f4da-4559-b164-4d502a9ee236","shared_citers":2},{"title":"Clip-dissect: Automatic description of neuron representations in deep vision networks","work_id":"92d91c32-30ac-4b08-a8d5-69aa181bb306","shared_citers":2},{"title":"Conditional Generative Adversarial Nets","work_id":"1d3bbf10-6268-4cc1-9702-27df9208c6af","shared_citers":2},{"title":"Improved Regularization of Convolutional Neural Networks with Cutout","work_id":"a3bf8477-f913-4f6a-8e36-125767300d1f","shared_citers":2},{"title":"& Kundaje, A","work_id":"6dde6c17-f037-4ba4-ba7c-ef8c7475057b","shared_citers":2},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":2},{"title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces","work_id":"4ee75248-1199-492c-a52f-6661e0f4adff","shared_citers":2},{"title":"Not just a black box: Learning important features through propagating activation differences","work_id":"c07fa0c5-030e-4dbc-9e9d-c62241570ede","shared_citers":2},{"title":"On Pixel-Wise Explanations for Non-Linear Classifier Decisions by Layer- Wise Relevance Propagation","work_id":"a2492a93-db2c-4bde-8872-6eaf0d7e310a","shared_citers":2},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":2},{"title":"Sanity checks for saliency maps","work_id":"3f6fe9ef-69cf-4151-b638-015435654acf","shared_citers":2},{"title":"Scaling and evaluating sparse autoencoders","work_id":"f3faddeb-36ed-42bc-a7c9-9e764dc9b368","shared_citers":2},{"title":"Sparse Autoencoders Find Highly Interpretable Features in Language Models","work_id":"51960d72-c69f-4db8-8efd-e90e8b4d9524","shared_citers":2},{"title":"Sparse Feature Circuits: Discovering and Editing Interpretable Causal Graphs in Language Models","work_id":"fb24e7e7-f336-4706-bc2d-62d656b28d74","shared_citers":2}],"time_series":[{"n":1,"year":2024},{"n":34,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T18:10:02.477063+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T18:10:12.580878+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps","claims":[{"claim_text":"This paper addresses the visualisation of image classification models, learnt using deep Convolutional Networks (ConvNets). We consider two visualisation techniques, based on computing the gradient of the class score with respect to the input image. The first one generates an image, which maximises the class score [Erhan et al., 2009], thus visualising the notion of the class, captured by a ConvNet. The second technique computes a class saliency map, specific to a given image and class. We show that such maps can be employed for weakly supervised object segmentation using classification ConvNe","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T18:09:53.267823+00:00"}},"summary":{"title":"Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps","claims":[{"claim_text":"This paper addresses the visualisation of image classification models, learnt using deep Convolutional Networks (ConvNets). We consider two visualisation techniques, based on computing the gradient of the class score with respect to the input image. The first one generates an image, which maximises the class score [Erhan et al., 2009], thus visualising the notion of the class, captured by a ConvNet. The second technique computes a class saliency map, specific to a given image and class. We show that such maps can be employed for weakly supervised object segmentation using classification ConvNe","claim_type":"abstract","evidence_strength":"source_metadata"}],"why_cited":"Pith tracks Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"& Wattenberg, M","work_id":"9fe7734e-4726-441f-abc1-88bbce75815c","shared_citers":8},{"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","shared_citers":4},{"title":"Adam: A Method for Stochastic Optimization","work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","shared_citers":3},{"title":"Distilling the Knowledge in a Neural Network","work_id":"d927ab1f-17b8-4002-9d09-c3d55764fbad","shared_citers":3},{"title":"Towards A Rigorous Science of Interpretable Machine Learning","work_id":"45958f3f-1e35-4e8a-8ed0-e3989a6c8be5","shared_citers":3},{"title":"Toy Models of Superposition","work_id":"43875dbe-bc2d-4ab5-af63-744411533ff7","shared_citers":3},{"title":"Use hirescam instead of grad-cam for faithful explanations of convolutional neural networks","work_id":"d66a0052-0057-4828-89d9-d6d16e8281c8","shared_citers":3},{"title":"A consistent and efficient evaluation strategy for attribution methods","work_id":"4f27f376-9170-406c-87f9-1f9ebaa52c5c","shared_citers":2},{"title":"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale","work_id":"e96730e3-129b-4db6-b981-15ab7932e297","shared_citers":2},{"title":"A primer on the inner workings of transformer-based language models","work_id":"e2a1ac9c-a52e-4b96-8aea-1eb401e09698","shared_citers":2},{"title":"Archetypal sae: Adaptive and stable dictionary learn- ing for concept extraction in large vision models","work_id":"c2a41b7d-f4da-4559-b164-4d502a9ee236","shared_citers":2},{"title":"Clip-dissect: Automatic description of neuron representations in deep vision networks","work_id":"92d91c32-30ac-4b08-a8d5-69aa181bb306","shared_citers":2},{"title":"Conditional Generative Adversarial Nets","work_id":"1d3bbf10-6268-4cc1-9702-27df9208c6af","shared_citers":2},{"title":"Improved Regularization of Convolutional Neural Networks with Cutout","work_id":"a3bf8477-f913-4f6a-8e36-125767300d1f","shared_citers":2},{"title":"& Kundaje, A","work_id":"6dde6c17-f037-4ba4-ba7c-ef8c7475057b","shared_citers":2},{"title":"LLaMA: Open and Efficient Foundation Language Models","work_id":"c018fc23-6f3f-4035-9d02-28a2173b2b9d","shared_citers":2},{"title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces","work_id":"4ee75248-1199-492c-a52f-6661e0f4adff","shared_citers":2},{"title":"Not just a black box: Learning important features through propagating activation differences","work_id":"c07fa0c5-030e-4dbc-9e9d-c62241570ede","shared_citers":2},{"title":"On Pixel-Wise Explanations for Non-Linear Classifier Decisions by Layer- Wise Relevance Propagation","work_id":"a2492a93-db2c-4bde-8872-6eaf0d7e310a","shared_citers":2},{"title":"Qwen3 Technical Report","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","shared_citers":2},{"title":"Sanity checks for saliency maps","work_id":"3f6fe9ef-69cf-4151-b638-015435654acf","shared_citers":2},{"title":"Scaling and evaluating sparse autoencoders","work_id":"f3faddeb-36ed-42bc-a7c9-9e764dc9b368","shared_citers":2},{"title":"Sparse Autoencoders Find Highly Interpretable Features in Language Models","work_id":"51960d72-c69f-4db8-8efd-e90e8b4d9524","shared_citers":2},{"title":"Sparse Feature Circuits: Discovering and Editing Interpretable Causal Graphs in Language Models","work_id":"fb24e7e7-f336-4706-bc2d-62d656b28d74","shared_citers":2}],"time_series":[{"n":1,"year":2024},{"n":34,"year":2026}],"dependency_candidates":[]},"authors":[]}}