{"total":14,"items":[{"citing_arxiv_id":"2606.31811","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MuSViT: A Foundation Vision Model for Sheet Music Representation","primary_cat":"cs.CV","submitted_at":"2026-06-30T15:27:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MuSViT is the first foundation vision model for sheet music, pre-trained on 9.7M IMSLP pages, that outperforms general encoders on recognition, detection, and classification tasks while encoding symbolic structure in its embeddings.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2607.00052","ref_index":42,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AGE: Adaptive-masking for Graph Embedding in Graph Retrieval-Augmented Generation","primary_cat":"cs.IR","submitted_at":"2026-06-30T01:23:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AGE applies adaptive masking via a learnable sampler in Transformer-based SSL to align graph and text embeddings, yielding higher accuracy on four GraphQA benchmarks for non-parametric GraphRAG.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03802","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Template Collapse and Information-Theoretic Limits in Camera rPPG Pulse Morphology Restoration","primary_cat":"cs.CV","submitted_at":"2026-06-02T15:50:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Empirical tests of 16 architectures on 153 subjects show camera rPPG signals contain no recoverable subject-specific pulse morphology, with all models exhibiting template collapse.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03338","ref_index":76,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"IdEst: Assessing Self-Supervised Learning Representations via Intrinsic Dimension","primary_cat":"cs.LG","submitted_at":"2026-06-02T08:47:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"IdEst estimates intrinsic dimension of SSL representations via dim_MST and reports strong correlation with linear probe accuracy across datasets and objectives.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01443","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"UR-JEPA: Uniform Rectifiability as a Regularizer for Joint-Embedding Predictive Architectures","primary_cat":"cs.LG","submitted_at":"2026-05-31T20:26:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"UR-JEPA applies uniform rectifiability regularization via a smoothed Carleson square function to JEPA training, producing embeddings with 4-5 order PCA spectral drop at dimension 20-25 and lower seed variance than Gaussian regularization on Inet10, Galaxy10, and EuroSAT.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31068","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"HQ-JEPA: Hybrid Quantum Joint-Embedding Predictive Architecture for Cross-Modal Remote Sensing Representation Learning","primary_cat":"cs.CV","submitted_at":"2026-05-29T09:37:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"HQ-JEPA combines JEPA-style predictive self-supervision with cross-modal alignment and a SWAP-test-based quantum fidelity loss for learning representations from paired remote sensing imagery, reporting competitive results on GeoBench tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17854","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Learning over Positive and Negative Edges with Contrastive Message Passing","primary_cat":"cs.LG","submitted_at":"2026-05-18T04:52:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Contrastive Message Passing lets GNNs apply similarity-preserving transforms to positive edges and dissimilarity-inducing transforms to negative edges via soft positive semidefinite constraints on weights, yielding gains in low-label high-homophily regimes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10790","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Elucidating Representation Degradation Problem in Diffusion Model Training","primary_cat":"cs.LG","submitted_at":"2026-05-11T16:21:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Diffusion models suffer representation degradation at high noise due to recoverability mismatch; ERD mitigates this by dynamic optimization reallocation, accelerating convergence across backbones.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Only a small fraction of the target remains effectively recoverable within finite training time, while standard training may still allocate substantial optimization effort to these weakly recoverable regions [3]. This mismatch is consistent with spectral degeneration [44], effective low-rank behavior, and contributes to representation degradation [21] in the extreme-noise regime. These findings identify representation degradation as an intrinsic optimization bottleneck rather than a purely architectural limitation. To address it, we proposeElucidated Representation Diffusion (ERD), a training framework that reallocates optimization effort according to target recoverability without relying on external alignment networks."},{"citing_arxiv_id":"2604.25065","ref_index":45,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"ShapeY: A Principled Framework for Measuring Shape Recognition Capacity via Nearest-Neighbor Matching","primary_cat":"cs.CV","submitted_at":"2026-04-27T23:42:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ShapeY is a benchmark dataset and nearest-neighbor protocol that measures shape-based recognition in vision models, revealing that even state-of-the-art networks fail to generalize consistently across 3D viewpoints and non-shape appearance changes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18804","ref_index":55,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Geometric Decoupling: Diagnosing the Structural Instability of Latent","primary_cat":"cs.CV","submitted_at":"2026-04-20T20:22:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Latent diffusion models exhibit geometric decoupling where curvature in out-of-distribution generation is misallocated to unstable semantic boundaries instead of image details, identifying geometric hotspots as the structural cause of editing instability.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.16678","ref_index":8,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"UniCon: Unified Framework for Efficient Contrastive Alignment via Kernels","primary_cat":"cs.LG","submitted_at":"2026-04-17T20:21:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"UniCon unifies contrastive alignment across encoders and alignment types using kernels to enable exact closed-form updates instead of stochastic optimization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.21986","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SpecTran: Spectral-Aware Transformer-based Adapter for LLM-Enhanced Sequential Recommendation","primary_cat":"cs.IR","submitted_at":"2026-01-29T17:00:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SpecTran applies a spectral-aware transformer adapter with learnable position encoding to aggregate informative components across the full spectrum of LLM embeddings, yielding 9.17% average gains on sequential recommendation tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.04847","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Language Models as Semantic Teachers: Post-Training Alignment for Medical Audio Understanding","primary_cat":"cs.SD","submitted_at":"2025-12-04T14:30:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AcuLa aligns audio models with medical language models via contrastive and self-supervised objectives on LLM-generated clinical reports, raising mean AUROC from 0.68 to 0.79 across 18 cardio-respiratory tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2511.08544","ref_index":78,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LeJEPA: Provable and Scalable Self-Supervised Learning Without the Heuristics","primary_cat":"cs.LG","submitted_at":"2025-11-11T18:21:55+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LeJEPA derives an optimal isotropic Gaussian target for embeddings and enforces it via sketched regularization to deliver scalable, heuristics-free self-supervised pretraining with 79% ImageNet linear accuracy on ViT-H/14.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}