{"total":31,"items":[{"citing_arxiv_id":"2605.23645","ref_index":19,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Learning Through Noise: Why Subliminal Learning Works and When It Fails","primary_cat":"cs.LG","submitted_at":"2026-05-22T13:59:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Subliminal learning occurs via compatible auxiliary and class output heads on task-unrelated inputs, even with random hidden layers or architecture changes, with theory and upper bounds on failure.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23138","ref_index":74,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Classical State Preparation for Variational Quantum Algorithms via Reinforcement Learning","primary_cat":"quant-ph","submitted_at":"2026-05-22T01:24:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CRiSP uses neural-guided MCTS and curriculum learning to insert Clifford prefixes before parameterized rotations in VQAs, yielding mean 3.17x and max 45x gains in energy accuracy on 22-qubit QAOA benchmarks versus prior Clifford initializers.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21800","ref_index":15,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"stable-worldmodel: A Platform for Reproducible World Modeling Research and Evaluation","primary_cat":"cs.LG","submitted_at":"2026-05-20T22:58:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"The paper presents stable-worldmodel (swm), a platform with high-performance data layer, modern world model baselines, planning solvers, and extended environments for reproducible research and generalization evaluation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20624","ref_index":61,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Accelerating Video Inverse Problem Solvers with Autoregressive Diffusion Models","primary_cat":"cs.CV","submitted_at":"2026-05-20T02:16:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AVIS applies autoregressive diffusion models to video inverse problems by streaming restoration with measurement-consistent initialization, reducing latency from 114s to 4s and raising throughput to 1.18 FPS (or 5.91 FPS in the Flash variant).","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18180","ref_index":35,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Canonical Regularisation of Wide Feature-Learning Neural Networks","primary_cat":"stat.ML","submitted_at":"2026-05-18T10:23:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"Derives geodesic ridge regularization and Riemannian Gibbs Process prior for feature-learning wide neural networks, generalizing kernel-regime results via function-space axiomatization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17354","ref_index":67,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"GeoHand: Unlocking Prior Geometry Knowledge for Monocular 3D Hand Reconstruction","primary_cat":"cs.CV","submitted_at":"2026-05-17T09:45:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GeoHand adapts priors from a general-scene geometry estimator via a GeoAdapter, gated fusion, and keypoint-queried refiner to reach SOTA monocular 3D hand reconstruction on FreiHAND, DexYCB, and HO3Dv3 under heavy occlusion.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14413","ref_index":26,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"MahaVar: OOD Detection via Class-wise Mahalanobis Distance Variance under Neural Collapse","primary_cat":"cs.LG","submitted_at":"2026-05-14T05:58:19+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MahaVar augments the Mahalanobis OOD score with class-wise distance variance, which is theoretically higher for in-distribution samples under relaxed Neural Collapse geometry.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13835","ref_index":37,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Unlocking Patch-Level Features for CLIP-Based Class-Incremental Learning","primary_cat":"cs.CV","submitted_at":"2026-05-13T17:56:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SPA unlocks patch-level features in CLIP for class-incremental learning via semantic-guided selection and optimal transport alignment with class descriptions, plus projectors and pseudo-feature replay to reduce forgetting.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12709","ref_index":12,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Spectral Energy Centroid: a Metric for Improving Performance and Analyzing Spectral Bias in Implicit Neural Representations","primary_cat":"cs.LG","submitted_at":"2026-05-12T20:16:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Spectral Energy Centroid is a new metric that quantifies signal frequency and INR spectral bias, supporting better hyperparameter selection and cross-architecture analysis.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12365","ref_index":50,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"QAP-Router: Tackling Qubit Routing as Dynamic Quadratic Assignment with Reinforcement Learning","primary_cat":"quant-ph","submitted_at":"2026-05-12T16:34:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"QAP-Router models qubit routing as dynamic QAP and applies RL with a solution-aware Transformer to cut CNOT counts by 12-30% versus industry compilers on real circuit benchmarks.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"In2018 IEEE Data Science Workshop (DSW), pages 1-5. IEEE, 2018. [49] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library.Advances in neural information processing systems, 32, 2019. [50] Alberto Peruzzo, Jarrod McClean, Peter Shadbolt, Man-Hong Yung, Xiao-Qi Zhou, Peter J Love, Alán Aspuru-Guzik, and Jeremy L O'brien. A variational eigenvalue solver on a photonic quantum processor.Nature communications, 5:4213, 2014. [51] Christopher Portmann and Renato Renner. Security in quantum cryptography.Reviews of Modern Physics, 94(2):025008, 2022."},{"citing_arxiv_id":"2605.12051","ref_index":29,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Learning plug-in surrogate endpoints for randomized experiments","primary_cat":"cs.LG","submitted_at":"2026-05-12T12:31:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Two methods are introduced to learn plug-in composite surrogates that maximize effect predictiveness, with the direct surrogate-effect modeling approach outperforming baselines on synthetic data with known effects and real-world experiment data.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"InBiostatistics in Biopharmaceutical Research and Development: Clinical Trial Analysis, Volume 2, pages 243-286. Springer, 2024. [28] George Papamakarios, Eric Nalisnick, Danilo Jimenez Rezende, Shakir Mohamed, and Balaji Lakshminarayanan. Normalizing flows for probabilistic modeling and inference.Journal of Machine Learning Research, 22(57):1-64, 2021. [29] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library.Advances in neural information processing systems, 32, 2019. [30] Judea Pearl.Causality. Cambridge university press, 2009. [31] Judea Pearl and Elias Bareinboim."},{"citing_arxiv_id":"2605.11093","ref_index":31,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Enabling Performant and Flexible Model-Internal Observability for LLM Inference","primary_cat":"cs.LG","submitted_at":"2026-05-11T18:01:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DMI-Lib delivers 0.4-6.8% overhead for offline batch LLM inference and ~6% for moderate online serving while exposing rich internal signals across backends, cutting latency overhead 2-15x versus prior observability baselines.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"To enable these observability use cases, researchers have been retrofitting existing inference stacks with additional extraction logic. However, these efforts fall into two unsatisfying categories: (1) ad-hocobservation built on extension mechanisms provided by PyTorch, which serves as the standard model implementation framework for many modern LLMs [ 31]; or (2)inference-engine-bound functionality, such as engine-provided APIs or output interfaces, whose access semantics are defined and implemented by the engine itself. Neither approach provides a complete solution. Methods built on PyTorch extensions inherit the limitations of the underlying extension surface: for example, register_forward_hook typically"},{"citing_arxiv_id":"2605.10521","ref_index":25,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"DuetFair: Coupling Inter- and Intra-Subgroup Robustness for Fair Medical Image Segmentation","primary_cat":"cs.CV","submitted_at":"2026-05-11T13:08:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DuetFair couples inter-subgroup adaptation with intra-subgroup robustness via FairDRO (dMoE plus subgroup-conditioned DRO) to boost worst-case and equity-scaled performance on medical segmentation benchmarks.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"reported to perform well on radiotherapy target delineation [3]. Patches of 384×384×128 voxels are sampled at training time with a batch size of 4, and full CT volumes are evaluated via sliding-window inference. The optimizer is run for up to 100 epochs with an initial learning rate of 5×10−5 and early stopping driven by the validation set. All models are implemented in PyTorch [25] on CUDA 11.8 and optimized with AdamW [26] under exponential learning-rate decay. The 2D experiments are run on a single NVIDIA A100 80 GB GPU and the 3D experiments on a single NVIDIA RTX A6000 48 GB GPU. 4.3 Baseline Method and Evaluation Metrics On Harvard-FairSeg, we report the published numbers for the four baselines released with the bench-"},{"citing_arxiv_id":"2605.10115","ref_index":40,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Generating Symmetric Materials using Latent Flow Matching","primary_cat":"cs.LG","submitted_at":"2026-05-11T07:32:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SymADiT generates stable symmetric materials by enforcing Wyckoff-position and space-group constraints inside a latent generative model built on the prior ADiT architecture.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"function approximation in reinforcement learning.Neural networks, 107:3-11, 2018. [39] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library.Advances in neural information processing systems, 32, 2019. [40] Walter Kohn and Lu Jeu Sham. Self-consistent equations including exchange and correlation effects.Phys. Rev., 140(4A):A1133, 1965. [41] Pierre Hohenberg and Walter Kohn. Inhomogeneous electron gas.Phys. Rev., 136(3B):B864, 1964. [42] Georg Kresse and Jürgen Hafner. Ab initio molecular dynamics for liquid metals.Phys. Rev. B, 47(1):558, 1993. [43] Georg Kresse and Jürgen Hafner."},{"citing_arxiv_id":"2605.09685","ref_index":54,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Learning Unified Representations of Normalcy for Time Series Anomaly Detection","primary_cat":"cs.LG","submitted_at":"2026-05-10T18:12:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"U²AD learns unified normal data representations via score-based generative modeling and a novel time-dependent score network to outperform prior methods in accuracy and early anomaly detection for multivariate time series.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"InProceedings of the 32nd ACM International Conference on Information and Knowledge Management, pages 1411-1420, 2023. [53] Guojin Zhong, Jin Yuan, Zhiyong Li, Long Chen, et al. Multi-resolution decomposable diffusion model for non- stationary time series anomaly detection. InThe Thirteenth International Conference on Learning Representations, 2025. [54] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library.Advances in neural information processing systems, 32, 2019. [55] Diederik P Kingma and Jimmy Ba. Adam: A method for stochastic optimization."},{"citing_arxiv_id":"2605.09424","ref_index":64,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Tabular Foundation Model for Generative Modelling","primary_cat":"cs.LG","submitted_at":"2026-05-10T08:52:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"TabFORGE generates high-quality synthetic tabular data by leveraging pretrained causality-aware representations in a two-stage diffusion-decoder architecture that mitigates latent distribution shifts.","context_count":1,"top_context_role":"other","top_context_polarity":"unclear","context_text":"Tabsds: a lightweight, fully non-parametric, and model free approach for generating synthetic tabular data. InForty-second International Conference on Machine Learning, 2025. [63] Dang Nguyen, Sunil Gupta, Kien Do, Thin Nguyen, and Svetha Venkatesh. Generating realistic tabular data with large language models. In2024 IEEE International Conference on Data Mining (ICDM), pages 330-339. IEEE, 2024. [64] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library.Advances in neural information processing systems, 32, 2019. [65] Zhaozhi Qian, Rob Davis, and Mihaela Van Der Schaar. Synthcity: a benchmark framework for"},{"citing_arxiv_id":"2605.07977","ref_index":28,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Self-Play Enhancement via Advantage-Weighted Refinement in Online Federated LLM Fine-Tuning with Real-Time Feedback","primary_cat":"cs.LG","submitted_at":"2026-05-08T16:35:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SPEAR enables online federated LLM fine-tuning by using feedback-guided self-play to create contrastive pairs trained with maximum likelihood on correct completions and confidence-weighted unlikelihood on incorrect ones, outperforming baselines without ground-truth contexts.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"We consider the usage of LoRA [13] as our PEFT method, targeting the attention and projection modules at each layer. For all experiments, the AdamW optimizer is utilized [26], with linear warmup and cosine decay on the learning rate [ 1]. All experiments are conducted on a server with a NVIDIA A100-40GB GPU, utilizing the HuggingFace [15] and PyTorch [28] libraries for implementation. More detailed specifications can be found in Appendix D. Datasets.We consider four benchmark datasets encompassing a diverse range of domains: ARC- Challenge [7] for science-based question answering, HellaSwag [46] for common-sense reasoning sentence completion, MathMCQA for competition-level mathematics [4], and StrategyQA [9] for"},{"citing_arxiv_id":"2605.07456","ref_index":81,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Inference-Time Attribute Distribution Alignment for Unconditional Diffusion","primary_cat":"cs.LG","submitted_at":"2026-05-08T09:02:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An optimal control formulation adds time-dependent perturbations to the reverse diffusion process to match target attribute distributions while preserving sample fidelity.","context_count":1,"top_context_role":"background","top_context_polarity":"unclear","context_text":"[79] Stephen Boyd and Lieven Vandenberghe.Convex Optimization. Cambridge University Press, 2004. [80] Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. Pytorch: An imperative style, high-performance deep learning library.Advances in neural information processing systems, 32, 2019. [81] Yaofo Chen. Pytorch CIFAR models. https://github.com/chenyaofo/ pytorch-cifar-models, 2025. Accessed: 2025-5-17. [82] Diederik P Kingma and Max Welling. Auto-encoding variational Bayes.arXiv preprint arXiv:1312.6114, 2013. 15 Table of Contents for Appendix A Theoretical Derivations 17 B Method Details 18 B.1 PF-ODE Instances . . . . . . . . . . ."},{"citing_arxiv_id":"2605.07029","ref_index":38,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"BGM-IV: an AI-powered Bayesian generative modeling approach for instrumental variable analysis","primary_cat":"stat.ML","submitted_at":"2026-05-07T23:18:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"BGM-IV performs nonlinear IV regression by inferring causally structured latent components and replacing the outcome likelihood with an instrument-averaged pseudo-likelihood, showing strongest results in high-dimensional covariate regimes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06809","ref_index":34,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"LookWhen? Fast Video Recognition by Learning When, Where, and What to Compute","primary_cat":"cs.CV","submitted_at":"2026-05-07T18:08:31+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LookWhen factorizes video recognition into learning when, where, and what to compute via uniqueness-based token selection and dual-teacher distillation, achieving better accuracy-FLOPs trade-offs than baselines on multiple datasets.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"cient in practice because of Flash Attention [33] incompatibility for token-merging layers (the first 2-4 layers). IV2+RLT is less efficient in practice because it requires a batch size of 1 or masking (where all tokens are processed and the contributions of some tokens are ignored). Our LookWhen uses standard deep learning operations so is efficient in practice using standard libraries, e.g. PyTorch [34]. Because of its selector, LookWhen has more param- eters than IV2, yet LookWhen ismore memory-efficientbecause it has fewer activations. Please see accuracy-speed plots for all 12 evaluations and memory-use statistics in Appendix §A.1. 4 Ablations and Analysis: Looking Inside We analyze the key design choices behind LookWhen. We first describe the ablation dataset and"},{"citing_arxiv_id":"2605.06501","ref_index":56,"ref_count":2,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Cubit: Token Mixer with Kernel Ridge Regression","primary_cat":"cs.LG","submitted_at":"2026-05-07T16:18:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Cubit replaces Transformer's attention with a closed-form Kernel Ridge Regression token mixer and reports larger gains as training sequence length increases.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.03520","ref_index":25,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Parametrizing Convex Sets Using Sublinear Neural Networks","primary_cat":"math.OC","submitted_at":"2026-05-05T08:57:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Sublinear neural networks parametrize convex sets by learning their support and gauge functions, backed by a universal approximation theorem and tested on shape optimization tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26310","ref_index":8,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Benchmarking PyCaret AutoML Against BiLSTM for Fine-Grained Emotion Classification: A Comparative Study on 20-Class Emotion Detection","primary_cat":"cs.CL","submitted_at":"2026-04-29T05:31:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"BiLSTM achieves 89% accuracy and 0.89 weighted F1 on 20-class emotion detection, marginally outperforming SVM at 88.11% on a 79,595-sentence dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23525","ref_index":34,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Triple Configuration of Brain Networks Based on Recurrent Neural Networks: The Synergistic Effects of Exogenous Stimuli, Task Demands, and Spontaneous Activity","primary_cat":"q-bio.NC","submitted_at":"2026-04-26T04:17:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"RNNs with dynamic constraints applied to EEG data separate brain network activity into three configurations driven by stimuli, tasks, and spontaneous processes, highlighting the parietal network as a central hub.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19343","ref_index":32,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Scalable Memristive-Friendly Reservoir Computing for Time Series Classification","primary_cat":"cs.NE","submitted_at":"2026-04-21T11:26:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MARS parallel reservoirs achieve up to 21x training speedups and outperform LRU, S5, and Mamba on long sequence benchmarks while remaining gradient-free and compact.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06519","ref_index":76,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Multiscale topology optimization of compressible and nearly incompressible anisotropic hyperelastic structures using physics-augmented neural networks","primary_cat":"cs.CE","submitted_at":"2026-04-07T23:22:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"UNKNOWN","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Physics-augmented neural networks act as stable, thermodynamically consistent surrogates for microscale problems, enabling simultaneous optimization of macroscale material layout and microscale descriptors in nonlinear finite-strain anisotropic hyperelastic structures.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.04539","ref_index":65,"ref_count":2,"confidence":0.55,"is_internal_anchor":false,"paper_title":"FlashSAC: Fast and Stable Off-Policy Reinforcement Learning for High-Dimensional Robot Control","primary_cat":"cs.LG","submitted_at":"2026-04-06T09:03:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FlashSAC improves training speed and final performance of off-policy RL on high-dimensional robot tasks by reducing update frequency, increasing model scale, and bounding norms to limit critic error accumulation.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"The updates-to-data ratio is set to 2/1024, meaning only 2 gradient updates are performed per 1024 new transitions. Although such infrequent updates are typically ineffective in off-policy RL, the combination of large batches, higher learning rates, and increased model capacity enables fast convergence with fewer updates. Code Optimization.FlashSACis implemented in PyTorch [65], with both training and inference JIT-compiled to minimize Python overhead. We use mixed-precision throughout training [52], which reduces wall-clock time by 5-10%. 4.2 Stable Training Scaling data and model accelerates training but does not prevent instability arising from bootstrapped critic updates. In the Bellman backup, estimation errors at next-state action pairs propagate into the current"},{"citing_arxiv_id":"2603.22241","ref_index":26,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"MemDLM: Memory-Enhanced DLM Training","primary_cat":"cs.CL","submitted_at":"2026-03-23T17:39:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MemDLM embeds a simulated denoising trajectory into DLM training via bi-level optimization, creating a parametric memory that improves convergence and long-context performance even when the memory is dropped at test time.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.17467","ref_index":28,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Harnessing Reasoning Trajectories for Hallucination Detection via Answer-agreement Representation Shaping","primary_cat":"cs.LG","submitted_at":"2026-01-24T13:47:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ARS shapes reasoning trace representations by clustering states that produce consistent answers and separating those that produce inconsistent ones via latent perturbations, improving plug-and-play hallucination detection without human annotations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.14577","ref_index":36,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"FBApro: A fast, simple linear transformation for diverse metabolic modeling tasks","primary_cat":"q-bio.QM","submitted_at":"2026-01-21T01:25:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"FBApro computes the nearest steady-state flux distribution to a reference vector via a closed-form linear projection derived from orthogonal projections onto affine spaces.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2501.17653","ref_index":42,"ref_count":1,"confidence":0.55,"is_internal_anchor":false,"paper_title":"Drivetrain simulation using variational autoencoders","primary_cat":"cs.LG","submitted_at":"2025-01-29T13:37:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Variational autoencoders generate jerk signals from torque inputs in electric drivetrains and outperform physics-based baselines without detailed parametrization.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}