{"total":1034,"items":[{"citing_arxiv_id":"2606.27627","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"HybridCodec: Modeling Discrete and Continuous Representations for Efficient Speech Language Models","primary_cat":"cs.LG","submitted_at":"2026-06-26T00:53:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"HybridCodec combines discrete tokens with continuous residuals via a focal modulation codec and hybrid Transformer to improve speaker retention and reduce autoregressive steps in speech language models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.27559","ref_index":63,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Sensitivity-Aware Test Collection for Search Among Personal Information","primary_cat":"cs.IR","submitted_at":"2026-06-25T21:24:17+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A new sensitivity-labeled test collection is released from Enron emails with crowdsourced queries, relevance judgments, and LLM extensions for evaluating sensitivity-aware search.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26538","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CascadeFormer: Depth-Tapered Transformers Motivated by Gradient Fan-in Asymmetry","primary_cat":"cs.LG","submitted_at":"2026-06-25T02:25:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CascadeFormer tapers Transformer width with depth based on gradient fan-in asymmetry to match uniform baselines in perplexity while cutting latency 8.6%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.23591","ref_index":58,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Quantifying the Agreement Between Data-Influence and Data-Similarity to Understand LLM Behavior","primary_cat":"cs.LG","submitted_at":"2026-06-22T17:00:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Data-similarity and data-influence produce significantly overlapping rankings of training documents for LLM outputs, with asymmetry allowing a favorable cost-accuracy trade-off.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18681","ref_index":31,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Moving Beyond Diversity: Visual Token Pruning as Subspace Reconstruction for Efficient VLMs","primary_cat":"cs.CV","submitted_at":"2026-06-17T04:45:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SPARE reformulates visual token pruning as column subset selection to minimize reconstruction error and uses anti-relevance for context-aware selection in VLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18524","ref_index":25,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"On the Residual Scaling of Looped Transformers: Stability and Transferability","primary_cat":"cs.LG","submitted_at":"2026-06-16T22:39:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Looped Transformers require residual scaling ε = 1/N due to correlated updates from weight sharing, unlike standard 1/sqrt(L), enabling learning rate transfer independent of loop count N.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.17816","ref_index":47,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Conservation Laws for Modern Neural Architectures","primary_cat":"cs.LG","submitted_at":"2026-06-16T11:44:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Unified framework characterizes conservation laws for gradient flow in feedforward networks with GELU/SiLU/SwiGLU, multihead attention with positional encodings, and MoE models under various gating.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.09131","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Late-Layer Fusion is Enough: Dual-Path Vision Token Routing for Multimodal Large Language Models under Visual Saturation","primary_cat":"cs.AI","submitted_at":"2026-06-08T07:28:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DPVR-LF routes saturated vision tokens into a one-layer side branch after layer 4, runs text-only processing through layers 5-17, and performs late fusion at the final layer to reduce visual computation while preserving multimodal performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.06947","ref_index":46,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DREAM: Dynamic Refinement of Early Assignment Mappings","primary_cat":"cs.IR","submitted_at":"2026-06-05T06:21:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DREAM proposes intent-aware tokenization, frozen-model evaluation, and dynamic beams to refine early SID assignments and improve cold-start performance in generative recommenders on Amazon benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04166","ref_index":28,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"End-to-End Text Line Detection and Ordering","primary_cat":"cs.CV","submitted_at":"2026-06-02T19:29:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Orli is an autoregressive image-to-sequence model that jointly detects text lines and determines their reading order on historical documents via chord-frame baselines, trained on 196k pages across ten scripts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03073","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Efficient Hyperparameter Optimization for LLM Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-06-02T03:02:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"JF-HPO jointly adapts model size and training budget as fidelity for efficient HPO in LLM RL, reporting up to 14.9x trial speedup and performance gains of 5.8-111.6% over the VeRL recipe.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02245","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"When Knowledge Is Not Free: Cost-Aware Evidence Selection in Retrieval-Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-06-01T13:39:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Defines cost-aware RAG with evidence cost tiers and shows static selectors are brittle while agentic LLM-based selection is promising but model-dependent.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01394","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"UniD$^3$: A Knowledge Graph-Enhanced RAG Framework for Drug-Disease Discovery and Reasoning","primary_cat":"cs.CL","submitted_at":"2026-05-31T18:36:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"UniD³ applies KG-RAG with Llama 3.3-70B to build six knowledge graphs and generate large validated datasets for drug-disease matching, effectiveness assessment, and target analysis from biomedical literature.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01380","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Training-free image inversion for one-step diffusion models","primary_cat":"cs.CV","submitted_at":"2026-05-31T18:10:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TFinv proposes iterative noise alignment and suffix learning to enable training-free inversion and editing for one-step diffusion models, achieving SOTA performance and higher efficiency than multistep methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01155","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"When Data Is Scarce: Scaling Sparse Language Models with Repeated Training","primary_cat":"cs.LG","submitted_at":"2026-05-31T10:51:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Sparse LLMs in data-scarce multi-epoch regimes follow a scaling law based on active parameters, unique tokens, repetition count, and sparsity level that predicts performance and delays data saturation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07630","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Active Learning with Foundation Model Priors: Efficient Learning under Class Imbalance","primary_cat":"cs.LG","submitted_at":"2026-05-30T23:34:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Active learning with foundation model priors achieves over 50% annotation savings on imbalanced noisy datasets across image and text domains while maintaining performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.24894","ref_index":55,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"RWGBench: Evaluating Scholarly Positioning in Related Work Generation","primary_cat":"cs.DL","submitted_at":"2026-05-30T16:53:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"RWGBench is a citation-centric benchmark for related work generation built from 40k CS papers and a 100-paper test set, with multi-dimensional metrics that better match human expert judgment than standard similarity scores.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00746","ref_index":105,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Scaling Parallel Sequence Models to Foundation-Scale Vision Encoders","primary_cat":"cs.CV","submitted_at":"2026-05-30T14:29:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"C-GSPN scales 2D spatial propagation to foundation vision encoders via a fast CUDA kernel, compressed blocks, and two-stage distillation, matching ViT performance with 15% fewer parameters and 4x block speedup at 2K resolution.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00511","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Saliency-Aware Model Merging","primary_cat":"cs.LG","submitted_at":"2026-05-30T04:00:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SA-Merging extends SynFlow-style saliency to task vectors, adds merge-aware modulation and iterative pruning, and applies rank-wise decomposition to LoRAs, narrowing the gap to test-time adaptation on vision and language tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00494","ref_index":57,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ProjQ: Project-and-Quantize for Adapter-Aware LLM Compression","primary_cat":"cs.LG","submitted_at":"2026-05-30T02:54:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ProjQ constrains post-training quantization noise to a low-rank manifold through orthogonal subspace projection, enabling better compensation by LoRA adapters and preserving greater model plasticity than standard PTQ.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00359","ref_index":136,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Next-Billion AI Index: The compass for AI utility and adoption in the global majority","primary_cat":"cs.CY","submitted_at":"2026-05-29T21:01:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces nexbax, a diagnostic framework with three themes and 10 dimensions for evaluating AI economic viability, operational practicality, and societal integrity in next-billion-user contexts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00306","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Rethinking the Role of Temperature in Large Language Model Distillation","primary_cat":"cs.LG","submitted_at":"2026-05-29T19:32:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Including temperature scaling makes forward KL divergence outperform reverse KL in LLM distillation on instruction benchmarks, overturning the τ=1 preference for reverse KL.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00241","ref_index":210,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"InfoAtlas: A Foundation Model for Zero-Shot Statistical Dependence Estimate","primary_cat":"cs.LG","submitted_at":"2026-05-29T18:16:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"InfoAtlas is a pretrained neural model for zero-shot mutual information estimation that matches state-of-the-art accuracy with 100x speedup and handles varying dimensions via a single model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00230","ref_index":34,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Pre-Training Analogue of Grokking in Language Models: Tracing Delayed Grammatical Generalization","primary_cat":"cs.LG","submitted_at":"2026-05-29T18:04:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An exposure-based split on BLiMP data reveals delayed generalization in five grammatical phenomena during LLM pre-training, with post-generalization shifts in concept vector predictiveness and attention patterns.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31371","ref_index":48,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Softsign: Smooth Sign in Your Optimizer For Better Parameter Heterogeneity Handling","primary_cat":"cs.LG","submitted_at":"2026-05-29T14:41:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SoftSignum replaces hard sign with soft-sign in optimizers via temperature control and quantile scheduling, extends to SoftMuon, provides a convergence proof for stochastic non-convex settings, and reports better performance than sign-based methods and AdamW on deep learning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07604","ref_index":51,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Contribution Weights: A Geometrical Analysis of Self-Attention Transformers","primary_cat":"cs.LG","submitted_at":"2026-05-29T09:40:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Contribution Weights combine attention, value magnitude, and directional alignment to measure token influence more faithfully than attention alone, and show attention sinks actively suppress information via a convex sink-rate to output-norm relationship.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07603","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MetaEvo: A Meta-Optimization Framework for Experience-Driven Agent Evolution","primary_cat":"cs.LG","submitted_at":"2026-05-29T09:31:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"MetaEvo is a two-stage framework using preference optimization for principle abstraction followed by modular reuse to enable continual improvement of LLM agents on reasoning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07599","ref_index":59,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DiffoR: A Unified Continuous Generative Framework for Universal Ordinal Regression","primary_cat":"cs.LG","submitted_at":"2026-05-29T07:38:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DiffOR reformulates ordinal regression as continuous generative modeling using diffusion models with dual-decoupling to capture soft semantic transitions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30911","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"What Makes LVLMs Hallucinate Less? Unveiling the Architectural Factors Behind Hallucination Robustness","primary_cat":"cs.CV","submitted_at":"2026-05-29T06:47:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"The study links three LVLM architectural dimensions to three hallucination types via a new benchmark, finding that language foundation quality reduces co-occurrence errors, visual encoder strength reduces similarity errors, alignment reduces uncertainty errors, and joint visual-alignment improvement","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30889","ref_index":7,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MLIPilot: LLM-Driven Auto-Research for Machine-Learned Interatomic Potentials","primary_cat":"physics.chem-ph","submitted_at":"2026-05-29T06:25:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"MLIPilot deploys LLM agents to autonomously optimize MACE MLIP training on molecular and periodic datasets by proposing code edits and validating against a domain-specific scorecard.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07597","ref_index":63,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Repetition Mismatch: Why Data Mixture Experiments Don't Scale and How to Fix Them","primary_cat":"cs.LG","submitted_at":"2026-05-29T06:08:57+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Repetition rate mismatch between small-scale proxies and target budgets is the main reason data mixture experiments do not scale; a subsampling procedure that equalizes repetition rates recovers optimal mixtures from 1/16-scale experiments.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11232","ref_index":51,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Every Act Has Its Price: Compressed Moral Composition in Frontier LLMs","primary_cat":"cs.CL","submitted_at":"2026-05-29T02:36:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Moral Trolley Arena shows frontier LLMs produce composite moral preferences that are compressed rather than additive functions of calibrated component act strengths across Moral Foundations Theory.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30325","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Veda: Scalable Video Diffusion via Distilled Sparse Attention","primary_cat":"cs.CV","submitted_at":"2026-05-28T17:57:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Veda formulates tile selection in video diffusion attention as a reconstruction problem from full attention maps, using statistics-aware and head-aware scoring to enable high sparsity with maintained quality and hardware speedups up to 5.1x end-to-end.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30027","ref_index":61,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DocRetriever: A Plug-and-Play Framework for Multimodal Document Retrieval with Comprehensive Benchmark","primary_cat":"cs.CV","submitted_at":"2026-05-28T14:50:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DocRetriever introduces a framework using layout-aware sparse embeddings for hybrid encoding without OCR and a generalizable reasoning-augmented reranker for few-shot settings, plus the MultiDocR benchmark for evaluation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30014","ref_index":42,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"From GPS Points to Travel Patterns: Flexible and Semantic Trajectory Generation with LLMs","primary_cat":"cs.AI","submitted_at":"2026-05-28T14:39:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HTP hierarchically generates travel patterns via RQ-VAE tokenization then uses SFT-tuned LLMs to produce conditioned trajectory sequences, outperforming baselines by 29.78% on two datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29863","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"STAP: A Shuffle-Tokenized App Predictor with Ultra Long Context for Vocabulary-Free Mobile App Prediction","primary_cat":"cs.LG","submitted_at":"2026-05-28T12:44:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A Transformer model with app-identity shuffling and ultra-long context achieves vocabulary-free next-app prediction with cross-dataset zero-shot capability and competitive cold-start performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29860","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ESPO: Early-Stopping Proximal Policy Optimization","primary_cat":"cs.LG","submitted_at":"2026-05-28T12:40:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"ESPO adds on-the-fly early stopping to PPO rollouts for LLM math reasoning using cumulative surrogate regret, improving AIME, AMC, and MATH-500 scores over PPO while cutting over 20% rollout tokens on a 7B model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29705","ref_index":45,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices","primary_cat":"cs.AI","submitted_at":"2026-05-28T10:04:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"BitTP applies weight-only 1.58-bit quantization to LLM trajectory predictors, claiming improved ADE/FDE over BF16 baseline with reduced resource demands on edge devices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29659","ref_index":34,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Opir: Efficient Multi-Task Safety Classification for Toxicity, Jailbreaks, Hate Speech, and Harmful Content","primary_cat":"cs.LG","submitted_at":"2026-05-28T09:21:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Opir introduces efficient multi-task encoder models trained on a 996-category safety taxonomy that match or exceed larger baselines on most safety benchmarks while using under 100M parameters for edge variants.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29640","ref_index":61,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"VikingMem: A Memory Base Management System for Stateful LLM-based Applications","primary_cat":"cs.AI","submitted_at":"2026-05-28T09:07:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"VikingMem implements the Memory Base paradigm via event-centric extraction and entity updates on VikingDB with temporal compression, claiming up to 30% better retrieval effectiveness on long-term memory benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29488","ref_index":46,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AnyMo: Scaling Any-Modality Conditional Motion Generation with Masked Modeling","primary_cat":"cs.CV","submitted_at":"2026-05-28T07:15:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AnyMo is a masked-modeling framework for any-modality human motion generation trained on the new OmniHuMo dataset of 5,000+ hours of multimodal motion sequences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29460","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"FedSmoothLoRA: Toward Smoother and Faster Convergence in Federated Low-Rank Adaptation","primary_cat":"cs.CV","submitted_at":"2026-05-28T06:53:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FedSmoothLoRA improves federated LoRA fine-tuning by constructing local initializations from a round-matching matrix for cross-round continuity and a gradient-aligned matrix for client-specific guidance, yielding faster convergence than prior methods in image and text tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29421","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Learning Design Skills as Memory Policies for Agentic Photonic Inverse Design","primary_cat":"cs.CL","submitted_at":"2026-05-28T06:14:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SkillPCF is a closed-loop agent framework with a physics-guided memory skill bank, reinforcement-learned skill selection, and simulator-grounded evolution that improves design quality and efficiency for photonic crystal fiber inverse design under limited simulation budgets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29247","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DenseSteer: Steering Small Language Models towards Dense Math Reasoning","primary_cat":"cs.AI","submitted_at":"2026-05-28T02:07:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DenseSteer is an inference-time steering framework that improves small LLMs' accuracy on math reasoning by modulating representations toward dense reasoning patterns with fewer but higher-density steps.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.28920","ref_index":61,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Conf-Gen: Conformal Uncertainty Quantification for Generative Models","primary_cat":"cs.LG","submitted_at":"2026-05-27T18:00:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Conf-Gen adapts conformal risk control to generative tasks by relaxing assumptions, unifying prior CP work on LLMs and extending guarantees to image generators, conversational AI, and AI agent correctness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.28444","ref_index":31,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Bilinear Coordinate Alignment for Training-Free Task-Vector Transfer","primary_cat":"cs.LG","submitted_at":"2026-05-27T13:10:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BiCo transfers task vectors across models differing in width, depth, and pre-training by estimating dual-space orthogonal Procrustes mappings from one forward-backward pass on a calibration set.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00110","ref_index":122,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"General Covariant Action Modeling: Constructing Generalized Manifolds via Spatio-Temporal Decoupling","primary_cat":"cs.CV","submitted_at":"2026-05-27T03:38:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"GAM framework uses arc-length parameterization for temporal invariance and schema-affine factorization for geometric invariance to build a covariant action manifold integrated into VLA models for improved generalization from sparse data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27790","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SYNAPSE: Neuro-Symbolic Visual Thought-to-Text Decoding via Topological Semantic Denoising","primary_cat":"cs.LG","submitted_at":"2026-05-27T00:12:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SYNAPSE stabilizes EEG-to-imagined-text decoding via inference-time symbolic regularization with commonsense graphs, achieving gains over baselines without LLM fine-tuning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27786","ref_index":30,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Locality-Aware Redundancy Pruning for LLM Depth Compression","primary_cat":"cs.LG","submitted_at":"2026-05-27T00:09:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LoRP uses a new Representation Locality Score derived from inter-layer hidden-state similarity to cluster layers and prune intra-cluster redundancies in one shot, yielding better perplexity and task accuracy than prior depth-pruning baselines across LLM families.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23780","ref_index":41,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Beyond Binary Edits Robust Multimodal Knowledge Editing with Adversarial Subspace Alignment","primary_cat":"cs.AI","submitted_at":"2026-05-22T15:46:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces Latent Adversarial Robustification and Rank-Constrained Subspace Learning to enable robust generalization in multimodal knowledge editing through adversarial subspace alignment.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}