{"total":40,"items":[{"citing_arxiv_id":"2606.03148","ref_index":30,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"$A^2$: Smaller Self-Supervised ViTs Localize Better than Larger Ones","primary_cat":"cs.CV","submitted_at":"2026-06-02T04:45:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Smaller self-supervised ViTs localize objects better via attention than larger ViTs, enabling A² to decouple localization from feature extraction for competitive performance on distribution-shifted benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31164","ref_index":22,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"D$^3$: Dynamic Directional Graph-Constrained Data Scheduling for LLM Training","primary_cat":"cs.CL","submitted_at":"2026-05-29T11:13:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"D³ introduces a dynamic directional graph-constrained framework that models sample interactions via loss dependencies to derive an optimized training sequence for LLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07596","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Shortcuts in the Tail: Debiasing via Post-Hoc Spectral Compression of Fine-Tuning Updates","primary_cat":"cs.LG","submitted_at":"2026-05-29T05:18:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Post-hoc truncation of the tail of the SVD of ΔW reduces spurious-group gaps by up to 5× with <2 pp accuracy loss across 0.5B–7B models and four benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30089","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Distributionally Robust Set Representation Learning Under Inference-Time Element Corruption","primary_cat":"cs.LG","submitted_at":"2026-05-28T15:35:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SW-DRSO optimizes a tractable surrogate of worst-case expected loss over plausible inference-time corruptions using a barycentric adversary approximated via simplex weights.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29836","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CB-SLICE: Concept-Based Interpretable Error Slice Discovery","primary_cat":"cs.LG","submitted_at":"2026-05-28T12:16:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CB-SLICE uses concept mispredictions from CBMs to discover and explain error slices, claiming better performance than existing methods on benchmarks for bias detection.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22644","ref_index":121,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Why SGD is not Brownian Motion: A New Perspective on Stochastic Dynamics","primary_cat":"cs.LG","submitted_at":"2026-05-21T15:50:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SGD is reformulated via a master equation from discrete updates, producing a discrete Fokker-Planck equation that predicts non-stationary variance growth proportional to learning rate in flat Hessian directions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18579","ref_index":28,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"S2Aligner: Pair-Efficient and Transferable Pre-Training for Sparse Text-Attributed Graphs","primary_cat":"cs.LG","submitted_at":"2026-05-18T15:56:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"S2Aligner decouples semantic and structural components in LLM-as-Aligner pre-training for sparse TAGs and uses structure-oriented reconstruction plus domain risk balancing to improve transferability and reduce generalization gaps.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15775","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Continual Learning of Domain-Invariant Representations","primary_cat":"cs.LG","submitted_at":"2026-05-15T09:31:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces replay-based continual learning with sequential invariance alignment to learn domain-invariant representations, outperforming baselines on generalization to unseen domains across six datasets in vision, medicine, manufacturing, and ecology.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14654","ref_index":180,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Beyond Instance-Level Self-Supervision in 3D Multi-Modal Medical Imaging","primary_cat":"cs.CV","submitted_at":"2026-05-14T10:10:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A self-supervised approach uses consistent spatial relationships of anatomical structures across patients to improve 3D multi-modal medical image representations, yielding modest gains on segmentation and classification tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14350","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Distributionally Robust Multi-Task Reinforcement Learning via Adaptive Task Sampling","primary_cat":"cs.LG","submitted_at":"2026-05-14T04:22:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DRATS derives a minimax objective from a feasibility formulation of MTRL to adaptively sample tasks with the largest return gaps, leading to better worst-task performance on MetaWorld benchmarks.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"We derive a closed- form solution to the inner maximization of Eq. 4 following a procedure similar to Peters et al. [2010], Abdolmaleki et al. [2018], Peng et al. [2019]. We provide a brief sketch here and a complete derivation in Appendix A. We first form the unconstrained Lagrangian relaxation of Eq. 4: min θ max q∈∆k \u001a Ei∼q[gi(θ)]− 1 η KL(q∥p0) \u001b ,(5) where η >0 is the inverse Lagrange multiplier. By strong duality, Eq. 4 and Eq. 5 have the same solution for an appropriate choice of η. Differentiating the objective in Eq. 5 with respect to q, setting to zero, and solving for the optimalq ∗ yields: q∗ i = exp (ηgi(θ)) Pk j=1 exp (ηgj(θ)) ,or equivalently,q ∗ =softmax(ηg(θ))(6) Here, η acts as an inverse temperature controlling the sharpness of q∗: as η→ ∞ , q∗ concentrates all"},{"citing_arxiv_id":"2605.11107","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Birds of a Feather Flock Together: Background-Invariant Representations via Linear Structure in VLMs","primary_cat":"cs.CV","submitted_at":"2026-05-11T18:13:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Exploiting linear structure in VLM embeddings, a synthetic-data pre-training method yields background-invariant representations that exceed 90% worst-group accuracy on Waterbirds even under 100% spurious correlation with no minority examples in training.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"method achieves, to our knowledge, the first worst-group accuracy exceeding90% on Waterbirds under perfect (100%) spurious correlation (i.e., no minority-group examples in the training data). Furthermore, it demonstrates strong sim-to-real transfer and requires no access to real-world debiased data, making it practical for real-world deployment. 1 Introduction Vision-language models (VLMs), such as CLIP [30] and SigLIP 2 [34], have become widely adopted for image classification, both in zero-shot settings [30, 34] and as frozen feature extractors paired with lightweight heads or adapters [10]. Their flexibility and strong transfer performance have led to their use in a wide range of downstream tasks such as recognition, detection, retrieval, and multimodal reasoning [15, 45]."},{"citing_arxiv_id":"2605.10521","ref_index":10,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"DuetFair: Coupling Inter- and Intra-Subgroup Robustness for Fair Medical Image Segmentation","primary_cat":"cs.CV","submitted_at":"2026-05-11T13:08:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DuetFair couples inter-subgroup adaptation with intra-subgroup robustness via FairDRO (dMoE plus subgroup-conditioned DRO) to boost worst-case and equity-scaled performance on medical segmentation benchmarks.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"Group- and sample-wise robust learning.Beyond fairness-specific methods in medical imaging, robust learning has been widely studied in classification and natural language processing, where the main goal is often to improve group-wise or worst-group performance. A representative approach is GroupDRO, which minimizes the worst average risk over predefined groups [10]. Later methods reduce the reliance on explicit group annotations by using misclassified examples as proxies for hard groups, rebalancing classes or groups, or discovering hidden subclasses before applying robust optimization [13, 14, 15]. Despite these differences, the robustness is still applied at the level of groups, classes, or discovered subclasses: the objective compares aggregate risks across groups, and"},{"citing_arxiv_id":"2605.09946","ref_index":37,"ref_count":4,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Structure from Strategic Interaction & Uncertainty: Risk Sensitive Games for Robust Preference Learning","primary_cat":"cs.GT","submitted_at":"2026-05-11T03:50:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Risk-sensitive preference games using convex risk measures produce policies that are robust across data strata and match or exceed standard Nash learning performance without added cost.","context_count":2,"top_context_role":"background","top_context_polarity":"background","context_text":"FR(θ) =β(θ−θ ref)− P R(πθ), withP R(π)y :=R Y ′′∼π[P(y≻Y ′′)]. Let ( bPR,m(π))y denote the sample-based estimator of (P R(π))y using mi.i.d. samplesY ′′ 1 , . . . , Y′′ m ∼π(the same samples shared across componentsy). The empirical operator is bFR,m(θ) :=β(θ−θ ref)− bPR,m(πθ).(36) 44 Define bm(θ) :=E \u0002bFR,m(θ) \u0003 −F R(θ), ζ m(θ) := bFR,m(θ)−E \u0002bFR,m(θ) \u0003 ,(37) sob m(θ) is deterministic givenθandE[ζ m(θ)|θ] = 0. By construction, we therefore have that bm(θ) =P R(πθ)−E[ bPR,m(πθ)] and ζm(θ) =E[ bPR,m(πθ)]− bPR,m(πθ). Abstract bias and variance assumptions.The convergence theorems in Section H rely on the following uniform bias and variance bounds. (O1)Uniform bias bound.There existsB m ≥0 (non-increasing inm) such that for allθ∈θ ref +W,"},{"citing_arxiv_id":"2605.09330","ref_index":32,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"The Trap of Trajectory: Towards Understanding and Mitigating Spurious Correlations in Agentic Memory","primary_cat":"cs.LG","submitted_at":"2026-05-10T05:04:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Agentic memory improves clean reasoning but worsens performance when spurious patterns are present in stored trajectories; CAMEL calibration reduces this reliance while preserving clean performance.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[30] Jonas Peters, Peter Bühlmann, and Nicolai Meinshausen. Causal inference by using invariant prediction: identification and confidence intervals.Journal of the Royal Statistical Society Series B: Statistical Methodology, 78(5):947-1012, 2016. [31] Jonas Peters, Joris M Mooij, Dominik Janzing, and Bernhard Schölkopf. Causal discovery with continuous additive noise models. 2014. [32] Shiori Sagawa, Pang Wei Koh, Tatsunori B Hashimoto, and Percy Liang. Distributionally robust neural networks for group shifts: On the importance of regularization for worst-case generalization.arXiv preprint arXiv:1911.08731, 2019. [33] Timo Schick, Jane Dwivedi-Yu, Roberto Dessì, Roberta Raileanu, Maria Lomeli, Eric Hambro, Luke Zettlemoyer, Nicola Cancedda, and Thomas Scialom."},{"citing_arxiv_id":"2605.06643","ref_index":38,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Are We Making Progress in Multimodal Domain Generalization? A Comprehensive Benchmark Study","primary_cat":"cs.CV","submitted_at":"2026-05-07T17:51:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A large-scale benchmark finds that recent multimodal domain generalization methods give only marginal gains over a plain ERM baseline, with no method winning consistently and all degrading sharply under corruption or missing modalities.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06522","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Agentic AIs Are the Missing Paradigm for Out-of-Distribution Generalization in Foundation Models","primary_cat":"cs.LG","submitted_at":"2026-05-07T16:29:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Agentic AI systems are required to overcome the parameter coverage ceiling that prevents foundation models from handling certain out-of-distribution cases.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06368","ref_index":24,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"eXplaining to Learn (eX2L): Regularization Using Contrastive Visual Explanation Pairs for Distribution Shifts","primary_cat":"cs.CV","submitted_at":"2026-05-07T14:46:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"eX2L improves robustness to distribution shifts by penalizing similarity between Grad-CAM maps of a label classifier and a confounder classifier, reaching new SOTA average and worst-group accuracy on the Spawrious benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01868","ref_index":27,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Robust Conditional Conformal Prediction via Branched Normalizing Flow","primary_cat":"cs.LG","submitted_at":"2026-05-03T13:29:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Branched Normalizing Flow improves conditional coverage robustness of conformal prediction under distribution shift by normalizing test inputs to the calibration distribution and mapping prediction sets back.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26676","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"A Toolkit for Detecting Spurious Correlations in Speech Datasets","primary_cat":"cs.SD","submitted_at":"2026-04-29T13:47:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A toolkit flags spurious correlations in speech datasets by checking if non-speech regions predict the target class better than chance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26301","ref_index":75,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Cheeger--Hodge Contrastive Learning for Structurally Robust Graph Representation Learning","primary_cat":"cs.LG","submitted_at":"2026-04-29T05:04:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CHCL aligns a Cheeger-Hodge joint signature across graph augmentations to produce embeddings that remain stable under local structural changes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26024","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Correcting Performance Estimation Bias in Imbalanced Classification with Minority Subconcepts","primary_cat":"cs.LG","submitted_at":"2026-04-28T18:05:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The authors introduce predicted-weighted balanced accuracy (pBA), a utility-weighted evaluation metric that uses predicted subconcept posteriors to reduce bias from within-class heterogeneity in imbalanced data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.24163","ref_index":63,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Robust Deepfake Detection, NTIRE 2026 Challenge: Report","primary_cat":"cs.CV","submitted_at":"2026-04-27T08:19:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"The NTIRE 2026 challenge finds that large foundation models combined with ensembles and degradation-aware training produce the most robust deepfake detectors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20685","ref_index":65,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"MGDA-Decoupled: Geometry-Aware Multi-Objective Optimisation for DPO-based LLM Alignment","primary_cat":"cs.LG","submitted_at":"2026-04-22T15:33:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MGDA-Decoupled applies geometry-based multi-objective optimization within the DPO framework to find shared descent directions that account for each objective's convergence dynamics, yielding higher win rates on UltraFeedback.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18797","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"CrossPan: A Comprehensive Benchmark for Cross-Sequence Pancreas MRI Segmentation and Generalization","primary_cat":"cs.CV","submitted_at":"2026-04-20T20:00:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CrossPan benchmark shows cross-sequence MRI domain shifts cause pancreas segmentation models to fail catastrophically, establishing sequence generalization as the primary barrier to clinical deployment over center variability or architecture choices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16892","ref_index":36,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"CrossFlowDG: Bridging the Modality Gap with Cross-modal Flow Matching for Domain Generalization","primary_cat":"cs.CV","submitted_at":"2026-04-18T07:53:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CrossFlowDG bridges the modality gap in domain generalization by learning a continuous transformation that moves image embeddings to matching text embeddings using noise-free cross-modal flow matching.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.13326","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Right Regions, Wrong Labels: Semantic Label Flips in Segmentation under Correlation Shift","primary_cat":"cs.CV","submitted_at":"2026-04-14T22:15:17+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12303","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Labeled TrustSet Guided: Batch Active Learning with Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-04-14T05:22:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"BRAL-T uses TrustSet-guided reinforcement learning for batch active learning and reports state-of-the-art results on 10 image classification benchmarks plus 2 fine-tuning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11507","ref_index":110,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Deep Learning for Sequential Decision Making under Uncertainty: Foundations, Frameworks, and Frontiers","primary_cat":"math.OC","submitted_at":"2026-04-13T14:11:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A tutorial framing deep learning as a complement to optimization for sequential decision-making under uncertainty, with applications in supply chains, healthcare, and energy.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[108] Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors.Nature 323(6088):533-536, URLhttp://dx.doi.org/10.1038/323533a0. [109] Sadana U, Chenreddy A, Delage E, Forel A, Frejinger E, Vidal T (2025) A survey of contextual optimization methods for decision-making under uncertainty.European Journal of Operational Research320(2):271-289, URLhttp://dx.doi.org/10.1016/j.ejor.2024.03.020. [110] Sagawa S, Koh PW, Hashimoto TB, Liang P (2020) Distributionally robust neural networks for group shifts: On the importance of regularization for worst-case generalization.arXiv preprint arXiv:1911.08731URL http://dx.doi.org/10.48550/arXiv.1911.08731. [111] Saghafian S (2024) Ambiguous dynamic treatment regimes: A reinforcement learning approach.Management"},{"citing_arxiv_id":"2604.08404","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Adversarial Label Invariant Graph Data Augmentations for Out-of-Distribution Generalization","primary_cat":"cs.LG","submitted_at":"2026-04-09T16:02:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RIA uses adversarial exploration of counterfactual graph environments via label-invariant augmentations to improve OoD generalization in graph classification tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08192","ref_index":61,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings","primary_cat":"cs.LG","submitted_at":"2026-04-09T12:44:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Circuit-based metrics from Vision Transformer internals provide better label-free proxies for generalization under distribution shift than existing methods like model confidence.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[59] Achyuta Rajaram, Neil Chowdhury, Antonio Torralba, Jacob Andreas, and Sarah Schwettmann. Automatic discovery of visual circuits.arXiv preprint arXiv:2404.14349, 2024. 8 [60] Benjamin Recht, Rebecca Roelofs, Ludwig Schmidt, and Vaishaal Shankar. Do imagenet classifiers generalize to im- agenet? InInternational conference on machine learning, pages 5389-5400. PMLR, 2019. 7, 1 [61] Shiori Sagawa, Pang Wei Koh, Tatsunori B Hashimoto, and Percy Liang. Distributionally robust neural networks for group shifts: On the importance of regularization for worst- case generalization.arXiv preprint arXiv:1911.08731, 2019. 4 [62] Rahul Saxena, Taeyoun Kim, Aman Mehra, Christina Baek, J Zico Kolter, and Aditi Raghunathan. Predicting the per-"},{"citing_arxiv_id":"2604.06440","ref_index":74,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Visual prompting reimagined: The power of the Activation Prompts","primary_cat":"cs.CV","submitted_at":"2026-04-07T20:28:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Activation prompts on intermediate layers outperform input-level visual prompting and parameter-efficient fine-tuning in accuracy and efficiency across 29 datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05285","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Robust Learning of Heterogeneous Dynamic Systems","primary_cat":"stat.ME","submitted_at":"2026-04-07T00:40:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A distributionally robust ODE learning framework for heterogeneous systems that uses worst-case optimization over convex derivative combinations to produce a stabilized weighted estimator with theoretical guarantees.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.02946","ref_index":31,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Learning from Synthetic Data via Provenance-Based Input Gradient Guidance","primary_cat":"cs.CV","submitted_at":"2026-04-03T10:28:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A framework that applies provenance-based guidance to input gradients during synthetic data training to promote learning from target regions only.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"iWildCamiWildCam [20] is an image classification dataset composed of images of wild animals from around the world, annotated with species labels. Because the cap- ture environment, animal species, and imaging devices vary substantially across images, there exists a significant do- main shift between the training and evaluation subsets. WaterbirdsWaterbirds [31] is a two-class bird image classification dataset consisting of landbirds and waterbirds, in which the bird class and background environment are constructed to be strongly correlated, making it easy for models to make predictions based on the background rather than the foreground. It is therefore widely used as a bench- mark for evaluating model robustness to spurious correla-"},{"citing_arxiv_id":"2602.23580","ref_index":22,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"BRIDGE the Gap: Mitigating Bias Amplification in Automated Scoring of English Language Learners via Inter-group Data Augmentation","primary_cat":"cs.CL","submitted_at":"2026-02-27T01:11:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"BRIDGE reduces bias against high-scoring ELL students in automated scoring by generating synthetic samples via inter-group content pasting and quality discrimination, achieving fairness gains comparable to additional real data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.18502","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Mitigating Shortcut Learning via Feature Disentanglement in Medical Imaging: A Benchmark Study","primary_cat":"cs.CV","submitted_at":"2026-02-17T10:51:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Benchmark shows that combining data rebalancing with feature disentanglement mitigates shortcut learning more effectively than rebalancing alone in medical imaging models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.08813","ref_index":43,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Robust Policy Optimization to Prevent Catastrophic Forgetting","primary_cat":"cs.LG","submitted_at":"2026-02-09T15:50:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FRPO applies a max-min robust optimization over KL-bounded policy neighborhoods during RLHF to reduce catastrophic forgetting of safety and accuracy under subsequent SFT or RL fine-tuning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2511.08666","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Privacy Beyond Pixels: Latent Anonymization for Privacy-Preserving Video Understanding","primary_cat":"cs.CV","submitted_at":"2025-11-11T18:56:27+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A plug-and-play Anonymizing Adapter Module removes private information from video latent features using self-supervised privacy objectives and consistency losses while retaining utility on action recognition, temporal detection, and anomaly tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.15640","ref_index":27,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Data Mixing Agent: Learning to Re-weight Domains for Continual Pre-training","primary_cat":"cs.LG","submitted_at":"2025-07-21T14:01:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"An RL agent learns domain re-weighting policies from evaluation feedback to improve balanced performance in continual pre-training of LLMs across source and target domains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2412.11136","ref_index":57,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Minimax Regret Estimation for Generalizing Heterogeneous Treatment Effects with Multisite Data","primary_cat":"stat.ME","submitted_at":"2024-12-15T10:00:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Proposes a minimax-regret framework for learning generalizable CATE models from multisite data by minimizing worst-case regret over convex combinations of site-specific CATEs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2411.04696","ref_index":6,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"The Pragmatic Frames of Spurious Correlations in Machine Learning: Interpreting How and Why They Matter","primary_cat":"cs.LG","submitted_at":"2024-11-07T13:29:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ML researchers assess spurious correlations via four pragmatic frames (relevance, generalizability, human-likeness, harmfulness) rather than a fixed statistical definition.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}