{"total":15,"items":[{"citing_arxiv_id":"2607.02435","ref_index":65,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MARVEL: Margin-Aware Robust von Mises-Fischer Expert Learning for Long-Tailed Out-of-Distribution Detection","primary_cat":"cs.CV","submitted_at":"2026-07-02T17:06:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MARVEL introduces a multi-expert NvMF-based system with an outlier expert that reduces FPR95 in OOD detection on medical datasets by 8-37%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.28556","ref_index":17,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"IMCBench: A benchmark for multimodal LLMs in Image-grounded Medical Conversations","primary_cat":"cs.AI","submitted_at":"2026-06-26T19:18:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"IMCBench is a new benchmark for image-grounded multi-turn medical conversations that evaluates eight multimodal LLMs on safety, accuracy, and uncertainty, finding Claude Opus highest overall but safety drops for malignant and rare conditions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22892","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"IViT: A Novel Interpretable Visual Transformer for Skin Disease Detection","primary_cat":"eess.IV","submitted_at":"2026-06-22T06:06:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"IViT applies quadratic programming to a pre-trained Vision Transformer with a multi-objective loss, achieving 93.80% accuracy on six skin disease datasets (0.21% below baseline) while reducing feature redundancy by 29.5% and producing clinically consistent activations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.13135","ref_index":4,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Cascade Classification of Dermoscopic Images of Skin Neoplasms with Controllable Sensitivity and External Clinical Validation","primary_cat":"cs.CV","submitted_at":"2026-06-11T09:55:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Cascade classification improves macro F1 over single-stage for some models by allowing sensitivity control but reveals a large generalization gap on external clinical data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.25168","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Methodology for Creating a Clinically Verified Dermoscopic Image Dataset","primary_cat":"cs.CV","submitted_at":"2026-05-24T16:56:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Describes a methodology and the resulting dataset of 1,026 dermoscopic images with structured metadata and verified diagnostic labels for medical informatics research.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14403","ref_index":25,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"DermAgent: A Self-Reflective Agentic System for Dermatological Image Analysis with Multi-Tool Reasoning and Traceable Decision-Making","primary_cat":"cs.CV","submitted_at":"2026-05-14T05:41:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DermAgent orchestrates seven vision-language tools in a Plan-Execute-Reflect loop with dual-modality retrieval from 413k cases and a critic module to outperform GPT-4o by 17.6% in zero-shot dermatological diagnosis accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13688","ref_index":46,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MedCore: Boundary-Preserving Medical Core Pruning for MedSAM","primary_cat":"cs.CV","submitted_at":"2026-05-13T15:42:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MedCore achieves 60% parameter and 58.4% FLOP reduction on MedSAM with Dice 0.9549 and preserved boundary metrics via dual-intervention pruning and a new boundary leverage principle.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26324","ref_index":30,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Federated Medical Image Classification under Class and Domain Imbalance exploiting Synthetic Sample Generation","primary_cat":"cs.CV","submitted_at":"2026-04-29T06:12:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FedSSG generates and shares synthetic samples within a federated setup to reduce class imbalance and domain shift problems in medical image classification.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26024","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Correcting Performance Estimation Bias in Imbalanced Classification with Minority Subconcepts","primary_cat":"cs.LG","submitted_at":"2026-04-28T18:05:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The authors introduce predicted-weighted balanced accuracy (pBA), a utility-weighted evaluation metric that uses predicted subconcept posteriors to reduce bias from within-class heterogeneity in imbalanced data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19323","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Concept Inconsistency in Dermoscopic Concept Bottleneck Models: A Rough-Set Analysis of the Derm7pt Dataset","primary_cat":"cs.LG","submitted_at":"2026-04-21T10:45:50+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Rough-set analysis finds 16.4% of 305 concept profiles in Derm7pt inconsistent (306 images), capping hard CBM accuracy at 92.1%; symmetric filtering produces a 705-image consistent benchmark where EfficientNet-B5 reaches 0.90 label accuracy.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"concept heads are updated solely by the concept lossL C, preserving the semantic purity of the bottleneck. The total training loss combines a concept prediction term and a label prediction term: L= 1 N N ∑ i=1 \u0002 λ1 LC(xi,v i) +λ2 LY (xi,y i) \u0003 ,(14) whereλ 1,λ 2 ≥0 are balancing coefficients. The concept loss is the mean of per-concept weighted cross-entropies: LC(xi,v i) = 1 |C| ∑ c∈C CE \u0010 l(i) c ,v (i) c ,w c \u0011 ,(15) where v(i) c ∈V c is the ground-truth value for concept c andw c ∈R |Vc| is the vector of class weights computed via Equation (11) for conceptc. The label loss is a weighted cross-entropy: LY (xi,y i) =CE \u0010 ˆy(i),y i,w y \u0011 , (16) withw y = (w0,w 1)computed from Equation (11). 4.3 Empirical Results Nineteen backbone architectures are evaluated: EfficientNet-B0 through B7 (8 models)33, DenseNet-121, 161, 169, and 201 (4"},{"citing_arxiv_id":"2604.05081","ref_index":17,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MedGemma 1.5 Technical Report","primary_cat":"cs.AI","submitted_at":"2026-04-06T18:35:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"MedGemma 1.5 4B reports absolute gains of 11% on 3D MRI classification, 3% on 3D CT, 47% macro F1 on pathology slides, 35% IoU on anatomical localization, and 5-22% on clinical QA tasks over MedGemma 1.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.05880","ref_index":17,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Contour Refinement using Discrete Diffusion in Low Data Regime","primary_cat":"cs.CV","submitted_at":"2026-02-05T16:55:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A CNN-based discrete diffusion method refines sparse contours from segmentation masks using simplified denoising steps and minimal post-processing, outperforming baselines on small medical and environmental datasets while running 3.5 times faster.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.23304","ref_index":28,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"MedGemma vs GPT-4: Open-Source and Proprietary Zero-shot Medical Disease Classification from Images","primary_cat":"cs.CV","submitted_at":"2025-12-29T08:48:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Fine-tuned MedGemma outperforms untuned GPT-4 in zero-shot medical image disease classification, achieving 80.37% versus 69.58% mean test accuracy with higher sensitivity for cancer and pneumonia.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2509.01299","ref_index":59,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Cross-Domain Few-Shot Segmentation via Ordinary Differential Equations over Time Intervals","primary_cat":"cs.CV","submitted_at":"2025-09-01T09:35:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FSS-TIs models cross-domain few-shot segmentation as an ODE process with Fourier-based spectral perturbations to create domain-agnostic features and enable effective fine-tuning on limited support samples.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.11638","ref_index":51,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Safeguarding AI in Medical Imaging: Post-Hoc Out-of-Distribution Detection with Normalizing Flows","primary_cat":"cs.CV","submitted_at":"2025-02-17T10:31:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Post-hoc normalizing flows for OOD detection in medical imaging achieve 84.61% AUROC on MedOOD and 93.8% on MedMNIST, outperforming ViM, MDS, and ReAct.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}