{"work":{"id":"d76c6842-b84d-44ec-bcea-b80cd8d07981","openalex_id":null,"doi":null,"arxiv_id":"1907.02893","raw_key":null,"title":"Invariant Risk Minimization","authors":null,"authors_text":"Martin Arjovsky, L\\'eon Bottou, Ishaan Gulrajani, David Lopez-Paz","year":2019,"venue":"stat.ML","abstract":"We introduce Invariant Risk Minimization (IRM), a learning paradigm to estimate invariant correlations across multiple training distributions. To achieve this goal, IRM learns a data representation such that the optimal classifier, on top of that data representation, matches for all training distributions. Through theory and experiments, we show how the invariances learned by IRM relate to the causal structures governing the data and enable out-of-distribution generalization.","external_url":"https://arxiv.org/abs/1907.02893","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-23T21:55:50.847921+00:00","pith_arxiv_id":"1907.02893","created_at":"2026-05-09T05:50:26.714036+00:00","updated_at":"2026-05-23T21:55:50.847921+00:00","title_quality_ok":false,"display_title":"Invariant Risk Minimization","render_title":"Invariant Risk Minimization"},"hub":{"state":{"work_id":"d76c6842-b84d-44ec-bcea-b80cd8d07981","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":75,"external_cited_by_count":null,"distinct_field_count":11,"first_pith_cited_at":"2019-11-20T06:43:41+00:00","last_pith_cited_at":"2026-05-21T17:53:28+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-03T03:35:34.710523+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":12},{"context_role":"method","n":1},{"context_role":"other","n":1}],"polarity_counts":[{"context_polarity":"background","n":10},{"context_polarity":"unclear","n":3},{"context_polarity":"use_method","n":1}],"runs":{"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T17:49:35.176827+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Distributionally Robust Neural Networks for Group Shifts: On the Importance of Regularization for Worst-Case Generalization","work_id":"b9385d0d-bafd-43d3-8948-4d2da8ee27a0","shared_citers":6},{"title":"mixup: Beyond Empirical Risk Minimization","work_id":"7f61d5d0-82cd-471c-9ea6-17c31d56d24e","shared_citers":5},{"title":"arXiv preprint arXiv:1912.02178 , year=","work_id":"06c7469e-b1d3-402e-9575-be0c7c02bd83","shared_citers":3},{"title":"Benchmarking Neural Network Robustness to Common Corruptions and Perturbations","work_id":"e28eaed9-7f6b-46ed-ba19-fba407571ba7","shared_citers":3},{"title":"Certifying","work_id":"f13c7954-0712-439e-8efc-0f9b560646d0","shared_citers":3},{"title":"The information bottleneck method","work_id":"72655a80-0724-45ad-a330-1f4ed7aa613b","shared_citers":3},{"title":"Towards out-of-distribution generalization: A survey","work_id":"4fef1f90-5867-4c65-9809-7c3153413bf3","shared_citers":3},{"title":"2016 , month = jul, journal =","work_id":"c79f68c5-5cde-4062-a71a-4d20f08dc8f7","shared_citers":2},{"title":"2020 , month = nov, journal =","work_id":"d6c3d284-997a-46fa-b978-a5dd28d7658b","shared_citers":2},{"title":"//arxiv.org/abs/1811.04551","work_id":"146fc4a4-6db2-43a9-a57f-4dd133c0d315","shared_citers":2},{"title":"arXiv preprint arXiv:1807.09356 , Title =","work_id":"6d9c70a0-097e-417b-b92f-7502cff02794","shared_citers":2},{"title":"arXiv preprint arXiv:2006.10726 (2020) 2, 3, 12, 13, 36","work_id":"7f343a70-32a2-42dc-945a-66a6feb179fb","shared_citers":2},{"title":"arXiv preprint arXiv:2010.05761 (2020) 4, 8","work_id":"9f465ee9-fbd1-4205-aee5-7ca66f1e7cb9","shared_citers":2},{"title":"arXiv preprint arXiv:2010.09670 , year=","work_id":"8ae4b2b2-a2da-4900-9021-ad64ae1b860f","shared_citers":2},{"title":"arXiv preprint arXiv:2104.02008 (2021) 1, 2, 3, 9, 35","work_id":"8c9f5fdc-22d9-4212-8c56-7365298efa57","shared_citers":2},{"title":"arXiv preprint arXiv:2502.15657 , year=","work_id":"2ef0154a-8714-429a-b9b2-84fce35ee5c8","shared_citers":2},{"title":"Auto-Encoding Variational Bayes","work_id":"97d95295-30e1-42b4-bbf6-85f0fa4edb44","shared_citers":2},{"title":"Discovering invariant rationales for graph neural networks","work_id":"224904f2-e0e8-4c30-b710-f2efd033d19f","shared_citers":2},{"title":"Explaining and Harnessing Adversarial Examples","work_id":"2cedf8f6-7539-4c49-8136-f42a20487146","shared_citers":2},{"title":"IEEE transactions on pattern analysis and machine intelligence45(4), 4396–4415 (2023) https: //doi.org/10.1109/TPAMI.2022.3195549","work_id":"0ae6e0c8-f79b-40c8-bc12-7d9078d27f92","shared_citers":2},{"title":"Improved Regularization of Convolutional Neural Networks with Cutout","work_id":"a3bf8477-f913-4f6a-8e36-125767300d1f","shared_citers":2},{"title":"In search of lost domain generalization.arXiv preprint arXiv:2007.01434","work_id":"17342d04-23a8-4954-a070-13791b5f5fac","shared_citers":2},{"title":"Intriguing properties of neural networks","work_id":"7bcd9f41-780c-4b4b-9a08-830d4177cdd8","shared_citers":2},{"title":"Kaiser, M","work_id":"edc1a23e-c421-4569-ab9e-83b204eeb0fa","shared_citers":2}],"time_series":[{"n":1,"year":2019},{"n":1,"year":2020},{"n":1,"year":2021},{"n":1,"year":2023},{"n":35,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T17:56:22.314827+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T17:49:52.517666+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Invariant Risk Minimization","claims":[{"claim_text":"We introduce Invariant Risk Minimization (IRM), a learning paradigm to estimate invariant correlations across multiple training distributions. To achieve this goal, IRM learns a data representation such that the optimal classifier, on top of that data representation, matches for all training distributions. Through theory and experiments, we show how the invariances learned by IRM relate to the causal structures governing the data and enable out-of-distribution generalization.","claim_type":"abstract","evidence_strength":"source_metadata"},{"claim_text":"Θ ⊆ Rd are convex and compact, and letθ∗ ∈ Θ be a minimizer of the worst-group objectiveR(θ). Then there exists a distributionQ∗ ∈ Q such thatθ∗ ∈ arg minθ Ez∼Q∗[ℓ(θ;z)]. However, this equivalence breaks down when the lossℓ is non-convex: Counterexample 1. Consider a uniform data distributionP supported on two points Z = {z1,z 2}, and letℓ(θ;z) be as in Figure 4, withΘ = [0, 1]. The DRO solutionθ∗ achieves a worst-case loss of R(θ∗) = 0.6. Now consider any weights (w1,w 2) ∈ ∆2 and w.l.o.g. letw","claim_type":"background","confidence":0.6,"evidence_strength":"citation_context"}],"why_cited":"Pith tracks Invariant Risk Minimization because it crossed a citation-hub threshold. Current citing contexts most often use it as background evidence (1 contexts).","role_counts":[{"n":1,"context_role":"background"}]},"error":null,"updated_at":"2026-05-14T17:56:34.224461+00:00"}},"summary":{"title":"Invariant Risk Minimization","claims":[{"claim_text":"We introduce Invariant Risk Minimization (IRM), a learning paradigm to estimate invariant correlations across multiple training distributions. To achieve this goal, IRM learns a data representation such that the optimal classifier, on top of that data representation, matches for all training distributions. Through theory and experiments, we show how the invariances learned by IRM relate to the causal structures governing the data and enable out-of-distribution generalization.","claim_type":"abstract","evidence_strength":"source_metadata"},{"claim_text":"Θ ⊆ Rd are convex and compact, and letθ∗ ∈ Θ be a minimizer of the worst-group objectiveR(θ). Then there exists a distributionQ∗ ∈ Q such thatθ∗ ∈ arg minθ Ez∼Q∗[ℓ(θ;z)]. However, this equivalence breaks down when the lossℓ is non-convex: Counterexample 1. Consider a uniform data distributionP supported on two points Z = {z1,z 2}, and letℓ(θ;z) be as in Figure 4, withΘ = [0, 1]. The DRO solutionθ∗ achieves a worst-case loss of R(θ∗) = 0.6. Now consider any weights (w1,w 2) ∈ ∆2 and w.l.o.g. letw","claim_type":"background","confidence":0.6,"evidence_strength":"citation_context"}],"why_cited":"Pith tracks Invariant Risk Minimization because it crossed a citation-hub threshold. Current citing contexts most often use it as background evidence (1 contexts).","role_counts":[{"n":1,"context_role":"background"}]},"graph":{"co_cited":[{"title":"Distributionally Robust Neural Networks for Group Shifts: On the Importance of Regularization for Worst-Case Generalization","work_id":"b9385d0d-bafd-43d3-8948-4d2da8ee27a0","shared_citers":6},{"title":"mixup: Beyond Empirical Risk Minimization","work_id":"7f61d5d0-82cd-471c-9ea6-17c31d56d24e","shared_citers":5},{"title":"arXiv preprint arXiv:1912.02178 , year=","work_id":"06c7469e-b1d3-402e-9575-be0c7c02bd83","shared_citers":3},{"title":"Benchmarking Neural Network Robustness to Common Corruptions and Perturbations","work_id":"e28eaed9-7f6b-46ed-ba19-fba407571ba7","shared_citers":3},{"title":"Certifying","work_id":"f13c7954-0712-439e-8efc-0f9b560646d0","shared_citers":3},{"title":"The information bottleneck method","work_id":"72655a80-0724-45ad-a330-1f4ed7aa613b","shared_citers":3},{"title":"Towards out-of-distribution generalization: A survey","work_id":"4fef1f90-5867-4c65-9809-7c3153413bf3","shared_citers":3},{"title":"2016 , month = jul, journal =","work_id":"c79f68c5-5cde-4062-a71a-4d20f08dc8f7","shared_citers":2},{"title":"2020 , month = nov, journal =","work_id":"d6c3d284-997a-46fa-b978-a5dd28d7658b","shared_citers":2},{"title":"//arxiv.org/abs/1811.04551","work_id":"146fc4a4-6db2-43a9-a57f-4dd133c0d315","shared_citers":2},{"title":"arXiv preprint arXiv:1807.09356 , Title =","work_id":"6d9c70a0-097e-417b-b92f-7502cff02794","shared_citers":2},{"title":"arXiv preprint arXiv:2006.10726 (2020) 2, 3, 12, 13, 36","work_id":"7f343a70-32a2-42dc-945a-66a6feb179fb","shared_citers":2},{"title":"arXiv preprint arXiv:2010.05761 (2020) 4, 8","work_id":"9f465ee9-fbd1-4205-aee5-7ca66f1e7cb9","shared_citers":2},{"title":"arXiv preprint arXiv:2010.09670 , year=","work_id":"8ae4b2b2-a2da-4900-9021-ad64ae1b860f","shared_citers":2},{"title":"arXiv preprint arXiv:2104.02008 (2021) 1, 2, 3, 9, 35","work_id":"8c9f5fdc-22d9-4212-8c56-7365298efa57","shared_citers":2},{"title":"arXiv preprint arXiv:2502.15657 , year=","work_id":"2ef0154a-8714-429a-b9b2-84fce35ee5c8","shared_citers":2},{"title":"Auto-Encoding Variational Bayes","work_id":"97d95295-30e1-42b4-bbf6-85f0fa4edb44","shared_citers":2},{"title":"Discovering invariant rationales for graph neural networks","work_id":"224904f2-e0e8-4c30-b710-f2efd033d19f","shared_citers":2},{"title":"Explaining and Harnessing Adversarial Examples","work_id":"2cedf8f6-7539-4c49-8136-f42a20487146","shared_citers":2},{"title":"IEEE transactions on pattern analysis and machine intelligence45(4), 4396–4415 (2023) https: //doi.org/10.1109/TPAMI.2022.3195549","work_id":"0ae6e0c8-f79b-40c8-bc12-7d9078d27f92","shared_citers":2},{"title":"Improved Regularization of Convolutional Neural Networks with Cutout","work_id":"a3bf8477-f913-4f6a-8e36-125767300d1f","shared_citers":2},{"title":"In search of lost domain generalization.arXiv preprint arXiv:2007.01434","work_id":"17342d04-23a8-4954-a070-13791b5f5fac","shared_citers":2},{"title":"Intriguing properties of neural networks","work_id":"7bcd9f41-780c-4b4b-9a08-830d4177cdd8","shared_citers":2},{"title":"Kaiser, M","work_id":"edc1a23e-c421-4569-ab9e-83b204eeb0fa","shared_citers":2}],"time_series":[{"n":1,"year":2019},{"n":1,"year":2020},{"n":1,"year":2021},{"n":1,"year":2023},{"n":35,"year":2026}],"dependency_candidates":[]},"authors":[]}}