{"total":70,"items":[{"citing_arxiv_id":"2606.24841","ref_index":20,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Matching Tasks to Objectives: Fine-Tuning and Prompt-Tuning Strategies for Encoder-Decoder Pre-trained Language Models","primary_cat":"cs.AI","submitted_at":"2026-06-23T17:21:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Introduces MTO framework for matching tasks to pre-training objectives in encoder-decoder models, achieving over 120% performance gains in few-shot commonsense tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00776","ref_index":42,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Latent Diffusion Pretraining for Crystal Property Prediction","primary_cat":"cs.LG","submitted_at":"2026-05-30T15:44:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CrysLDNet combines VAE and latent diffusion pretraining on unlabeled crystals to improve graph encoder performance on property prediction by about 4-5% on JARVIS and MP datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00508","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"V-LynX: Token Interface Alignment for Video+X LLMs","primary_cat":"cs.CV","submitted_at":"2026-05-30T03:54:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"V-LynX integrates novel modalities into frozen Video LLMs by aligning to an internalized continuous token manifold using unpaired unimodal data and attention/statistical matching.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29498","ref_index":34,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Mask the Target: A Plug-and-Play Regularizer Against LoRA Forgetting","primary_cat":"cs.CL","submitted_at":"2026-05-28T07:22:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A plug-and-play KL regularizer that masks the target token and renormalizes probabilities to improve the learning-forgetting trade-off in LoRA adaptation of LLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19623","ref_index":44,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation","primary_cat":"cs.CV","submitted_at":"2026-05-19T10:00:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PrAda adapts text-prompted segmentation models in a few-shot setting by learning and fusing class-specific prototypes from fine-grained and high-level features, yielding significant gains on semantic, instance, and panoptic segmentation across five benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19301","ref_index":25,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"iGSP:Implicit Gradient Subspace Projection for Efficient Continual Learning of Vision-Language Models","primary_cat":"cs.CV","submitted_at":"2026-05-19T03:22:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"iGSP uses implicit gradient subspace projection in two phases to enable efficient continual adaptation of vision-language models, claiming SOTA accuracy with 42.7% fewer trainable parameters and 86.9% less total parameter growth.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16113","ref_index":29,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DebiasRAG: A Tuning-Free Path to Fair Generation in Large Language Models through Retrieval-Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-05-15T15:58:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DebiasRAG uses a three-stage RAG process to generate and rerank query-specific debiasing contexts that act as fairness constraints for LLM outputs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14055","ref_index":56,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PEML: Parameter-efficient Multi-Task Learning with Optimized Continuous Prompts","primary_cat":"cs.CL","submitted_at":"2026-05-13T19:25:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PEML co-optimizes continuous prompts and low-rank adaptations to deliver up to 6.67% average accuracy gains over existing multi-task PEFT methods on GLUE, SuperGLUE, and other benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13421","ref_index":180,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Combining pre-trained models via localized model averaging","primary_cat":"stat.ME","submitted_at":"2026-05-13T12:16:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Localized model averaging with covariate-dependent weights achieves asymptotic optimality and weight consistency for combining pre-trained models under a general loss framework.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08842","ref_index":76,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"XPERT: Expert Knowledge Transfer for Effective Training of Language Models","primary_cat":"cs.CL","submitted_at":"2026-05-09T09:53:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"XPERT extracts and reuses cross-domain expert knowledge from pre-trained MoE LLMs via inference analysis and tensor decomposition to improve performance and convergence in downstream language model training.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05776","ref_index":90,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"HEDP: A Hybrid Energy-Distance Prompt-based Framework for Domain Incremental Learning","primary_cat":"cs.AI","submitted_at":"2026-05-07T07:09:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"HEDP uses energy regularization inspired by Helmholtz free energy plus hybrid energy-distance weighting in prompts to improve domain selection and achieve a 2.57% accuracy gain on benchmarks like CORe50 while mitigating catastrophic forgetting.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.04651","ref_index":14,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"FAAST: Forward-Only Associative Learning via Closed-Form Fast Weights for Test-Time Supervised Adaptation","primary_cat":"cs.LG","submitted_at":"2026-05-06T08:58:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FAAST performs test-time supervised adaptation by analytically deriving fast weights from examples in one forward pass, matching backprop performance with over 90% less adaptation time and up to 95% memory savings versus memory-based methods.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"A key challenge in supervised adaptation is scale: while classification tasks may involve up to 106 input-output pairs, language models may involve the order of 1010 tokens. Storing all key-value pairs explicitly is infeasible. To address it, we propose the incremental update rule of the fast-weight matrix: Wt+1 = Nt Nt+1 Wt + N Nt+1 W ⋆, N t+1 =N t +N,(14) where W ⋆ is computed from a new batch of N key-value pairs and update the new weights Wt+1. This update rule incrementally aggregates associative evidence without re- taining all past data, and its validity is theoretically justified in Appendix B.3. Trade-off Between Underfitting and Overfitting.The generalization behavior of the fast-weight matrixW ⋆ is gov-"},{"citing_arxiv_id":"2605.04447","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Deep Reprogramming Distillation for Medical Foundation Models","primary_cat":"cs.CV","submitted_at":"2026-05-06T03:22:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DRD introduces a reprogramming module and CKA-based distillation to enable efficient, robust adaptation of medical foundation models to downstream 2D/3D classification and segmentation tasks, outperforming prior PEFT and KD methods on 18 tasks.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Song, J. Wang, and P. Luo, \"Adaptformer: Adapting vision transformers for scalable visual recogni- tion,\"Advances in Neural Information Processing Systems, vol. 35, pp. 16 664-16 678, 2022. [10] M. Jia, L. Tang, B.-C. Chen, C. Cardie, S. Belongieet al., \"Visual prompt tuning,\" inEuropean Conference on Computer Vision. Springer, 2022, pp. 709-727. [11] X. L. Li and P. Liang, \"Prefix-tuning: Optimizing continuous prompts for generation,\"arXiv preprint arXiv:2101.00190, 2021. [12] E. J. Hu, Y . Shen, P. Wallis, Z. Allen-Zhu, Y . Li, S. Wang, L. Wang, and W. Chen, \"Lora: Low-rank adaptation of large language models,\" arXiv preprint arXiv:2106.09685, 2021. [13] X. He, C. Li, P. Zhang, J. Yang, and X."},{"citing_arxiv_id":"2605.00650","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"AdaMeZO: Adam-style Zeroth-Order Optimizer for LLM Fine-tuning Without Maintaining the Moments","primary_cat":"cs.LG","submitted_at":"2026-05-01T13:31:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AdaMeZO adapts Adam moment estimates to zeroth-order LLM fine-tuning without extra memory storage, outperforming MeZO with up to 70% fewer forward passes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27415","ref_index":14,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ChipLingo: A Systematic Training Framework for Large Language Models in EDA","primary_cat":"cs.LG","submitted_at":"2026-04-30T04:35:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ChipLingo trains LLMs on EDA data via corpus construction, domain-adaptive pretraining, and RAG scenario alignment, reaching 59.7% accuracy with an 8B model and 70.02% with a 32B model on a new internal EDA benchmark.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"to learn the relationship between knowledge and tasks earlier in training. 2.3 Parameter-Efficient Fine-Tuning Methods As large language model sizes continue to grow, the computational cost of full-parameter fine- tuning has become a significant limiting factor for model deployment [16, 17]. Researchers have proposed a series of parameter-efficient fine-tuning methods, including Adapter [13], Prefix- Tuning [14], and LoRA [15]. However, recent research indicates that parameter-efficient fine-tuning methods may have certain limitations in knowledge-intensive tasks [18]. Due to constraints on the expressiveness of low-rank updates, models may struggle to fully absorb domain knowledge when tasks require learning large amounts of fine-grained knowledge. 4 2."},{"citing_arxiv_id":"2604.26340","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Adaptive and Fine-grained Module-wise Expert Pruning for Efficient LoRA-MoE Fine-Tuning","primary_cat":"cs.LG","submitted_at":"2026-04-29T06:45:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DMEP prunes experts module-by-module in LoRA-MoE and removes load balancing after pruning, cutting trainable parameters 35-43% and raising throughput ~10% while matching or exceeding uniform baselines on reasoning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19087","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"OLLM: Options-based Large Language Models","primary_cat":"cs.AI","submitted_at":"2026-04-21T04:59:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"OLLM models next-token generation as a latent-indexed set of options, enabling up to 70% math reasoning correctness versus 51% baselines and structure-based alignment via a compact latent policy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19015","ref_index":66,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"FedProxy: Federated Fine-Tuning of LLMs via Proxy SLMs and Heterogeneity-Aware Fusion","primary_cat":"cs.LG","submitted_at":"2026-04-21T03:06:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FedProxy replaces weak adapters with a proxy SLM for federated LLM fine-tuning, outperforming prior methods and approaching centralized performance via compression, heterogeneity-aware aggregation, and training-free fusion.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18559","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ConforNets: Latents-Based Conformational Control in OpenFold3","primary_cat":"q-bio.BM","submitted_at":"2026-04-20T17:47:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ConforNets use channel-wise affine transforms on pre-Pairformer pair latents in OpenFold3 to achieve state-of-the-art unsupervised generation of alternate protein states and supervised conformational transfer across families.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18124","ref_index":27,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"TLoRA: Task-aware Low Rank Adaptation of Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-04-20T11:43:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TLoRA jointly optimizes LoRA initialization via task-data SVD and sensitivity-driven rank allocation, delivering stronger results than standard LoRA across NLU, reasoning, math, code, and chat tasks while using fewer trainable parameters.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17889","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"AeroRAG: Structured Multimodal Retrieval-Augmented LLM for Fine-Grained Aerial Visual Reasoning","primary_cat":"cs.CV","submitted_at":"2026-04-20T07:02:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AeroRAG improves fine-grained aerial visual question answering by converting images to scene graphs and using retrieval-augmented generation to create compact LLM prompts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15795","ref_index":38,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Fed3D: Federated 3D Object Detection","primary_cat":"cs.CV","submitted_at":"2026-04-17T07:55:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Fed3D is a federated 3D object detection system using local-global class-aware loss for heterogeneity and prompt modules for low-bandwidth communication, claiming better performance than prior methods on limited local data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12686","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"BID-LoRA: A Parameter-Efficient Framework for Continual Learning and Unlearning","primary_cat":"cs.LG","submitted_at":"2026-04-14T12:57:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BID-LoRA uses bi-directional low-rank adapters with retain/new/unlearn pathways and escape unlearning to enable continual learning and unlearning while minimizing knowledge leakage and parameter updates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12610","ref_index":44,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Transforming External Knowledge into Triplets for Enhanced Retrieval in RAG of LLMs","primary_cat":"cs.CL","submitted_at":"2026-04-14T11:36:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Tri-RAG turns external knowledge into Condition-Proof-Conclusion triplets and retrieves via the Condition anchor to improve efficiency and quality in LLM RAG.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"Soft Prompt Tuning introduces a small set of trainable continuous prompt vectors prepended to the input sequence while keeping all model parameters frozen. By optimizing only prompt parameters, this approach significantly reduces training cost and memory overhead, and has demonstrated strong perfor- mance and generalization across models such as BERT [44], GPT [45], and T5 [46]. Subsequent work further extends this paradigm to improve expressiveness and adaptability. P-Tuning [47] combines continuous prompts with discrete templates, while Prefix-Tuning [44] injects trainable prefix vectors into Transformer layers to better capture task-specific semantics. Recent studies also show that prompt-based adap-"},{"citing_arxiv_id":"2605.13858","ref_index":42,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Hormone-inspired Emotion Layer for Transformer language models (HELT)","primary_cat":"cs.NE","submitted_at":"2026-04-13T11:53:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"HormoneT5 augments T5 with a hormone-inspired block that predicts six continuous emotion values and uses them to modulate responses, reporting over 85% per-hormone accuracy and human preference for emotional quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11091","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"LDEPrompt: Layer-importance guided Dual Expandable Prompt Pool for Pre-trained Model-based Class-Incremental Learning","primary_cat":"cs.CV","submitted_at":"2026-04-13T07:11:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LDEPrompt introduces layer-importance guided dual expandable prompt pools to achieve state-of-the-art class-incremental learning by enabling adaptive layer selection and dynamic prompt management.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09034","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"The nextAI Solution to the NeurIPS 2023 LLM Efficiency Challenge","primary_cat":"cs.LG","submitted_at":"2026-04-10T06:52:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A competition entry achieved efficient fine-tuning of LLaMa2 70B on one GPU in 24 hours with competitive QA benchmark performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.11810","ref_index":45,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"GRACE: A Dynamic Coreset Selection Framework for Large Language Model Optimization","primary_cat":"cs.DB","submitted_at":"2026-04-09T14:08:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GRACE dynamically constructs and updates coresets for LLM training using representation diversity, gradient-based importance, and k-NN graph propagation to improve efficiency and performance.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"We evaluate the generated output using the ROUGE-L metric [49], which measures the comprehensiveness of answers compared to hu- man references. Typically, we measure the F1 score of the ROUGE-L metric. The higher the F1 score, The better the QA answers or sum- marized text match the reference. 5.1.2 Models and Baselines.We evaluate GRACE on supervised fine-tuning tasks with LoRA [31] using Phi-2 [45], Llama-2-7b [2] and Qwen2.5-7b [63] models. We compare GRACE with several baselines that employ diverse selection methods and strategies. We set up the following selection types: static and dynamic. Static selection: Static selection methods select one fixed coreset for the entire training procedure. We compare with the following static selection baselines:"},{"citing_arxiv_id":"2604.06440","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Visual prompting reimagined: The power of the Activation Prompts","primary_cat":"cs.CV","submitted_at":"2026-04-07T20:28:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Activation prompts on intermediate layers outperform input-level visual prompting and parameter-efficient fine-tuning in accuracy and efficiency across 29 datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06095","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"LLM4CodeRE: Generative AI for Code Decompilation Analysis and Reverse Engineering","primary_cat":"cs.CR","submitted_at":"2026-04-07T17:08:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLM4CodeRE adapts LLMs with multi-adapter and seq2seq fine-tuning for accurate assembly-to-source decompilation and reverse translation in code reverse engineering.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05732","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Graph Topology Information Enhanced Heterogeneous Graph Representation Learning","primary_cat":"cs.LG","submitted_at":"2026-04-07T11:35:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ToGRL learns high-quality graph structures from raw heterogeneous graphs via a two-stage topology extraction process and prompt tuning, outperforming prior methods on five datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.03314","ref_index":6,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CoLA: Cross-Modal Low-rank Adaptation for Multimodal Downstream Tasks","primary_cat":"cs.CV","submitted_at":"2026-04-01T01:37:24+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.19926","ref_index":13,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Rethinking LoRA for Privacy-Preserving Federated Learning in Large Models","primary_cat":"cs.LG","submitted_at":"2026-02-23T15:05:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LA-LoRA decouples LoRA matrix updates in DPFL settings to improve robustness to privacy noise, delivering up to 16.83% higher accuracy than prior LoRA variants on Swin-B under strict epsilon=1.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.13193","ref_index":68,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Steerable Vision-Language-Action Policies for Embodied Reasoning and Hierarchical Control","primary_cat":"cs.RO","submitted_at":"2026-02-13T18:57:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Steerable VLAs trained on rich synthetic commands at subtask, motion, and pixel levels enable VLMs to steer robot behavior more effectively, outperforming prior hierarchical baselines on real-world manipulation and generalization tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.22655","ref_index":22,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Do Fine-Tuned LLMs Understand Vulnerabilities? An Investigation into the Semantic Trap","primary_cat":"cs.CR","submitted_at":"2026-01-30T07:19:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Fine-tuned decoder-only LLMs fall into a Semantic Trap on vulnerability detection, achieving high scores on unpaired normal code but failing on paired vulnerable-patched code, semantic perturbations, and gap analysis, while reasoning supervision reduces symptoms at the cost of recall.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.14053","ref_index":85,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LLMOrbit: A Circular Taxonomy of Large Language Models -From Scaling Walls to Agentic AI Systems","primary_cat":"cs.LG","submitted_at":"2026-01-20T15:06:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"A survey taxonomy of LLMs identifies three scaling crises and six efficiency paradigms while tracing the shift from generation to tool-using agents.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.14004","ref_index":176,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Locate, Steer, and Improve: A Practical Survey of Actionable Mechanistic Interpretability in Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-01-20T14:23:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"The survey organizes mechanistic interpretability techniques into a Locate-Steer-Improve framework to enable actionable improvements in LLM alignment, capability, and efficiency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.09448","ref_index":42,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"One Prompt, Many Sounds: Modeling Listener Variability in LLM-Based Equalization","primary_cat":"cs.SD","submitted_at":"2026-01-14T12:51:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs using in-context learning and fine-tuning on listener experiment data generate equalization settings that align better with population preferences than random sampling or static presets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2511.05297","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Building Specialized Software-Assistant ChatBot with Graph-Based Retrieval-Augmented Generation","primary_cat":"cs.SE","submitted_at":"2025-11-07T14:56:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A graph-based RAG framework extracts state-action knowledge graphs from enterprise web apps to ground LLM responses for specialized software assistance in digital adoption platforms.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.20505","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"RELOOP: Recursive Retrieval with Multi-Hop Reasoner and Planners for Heterogeneous QA","primary_cat":"cs.CL","submitted_at":"2025-10-23T12:48:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"RELOOP unifies retrieval across text, tables, and KGs via hierarchical sequences and dual-agent guided iteration, reporting EM/F1 gains over baselines on HotpotQA, HybridQA/TAT-QA, and MetaQA.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.14543","ref_index":13,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Exploring Cross-Modal Flows for Few-Shot Learning","primary_cat":"cs.CV","submitted_at":"2025-10-16T10:32:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"FMA introduces flow matching for multi-step cross-modal feature alignment in few-shot learning, using fixed coupling, noise augmentation, and early-stopping to outperform one-step PEFT methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.09474","ref_index":1,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Multimodal Policy Internalization for Conversational Agents","primary_cat":"cs.CL","submitted_at":"2025-10-10T15:28:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"The paper defines the MPI task and proposes TriMPI, a three-stage training pipeline of continual pretraining, supervised finetuning, and policy-aware reinforcement learning that internalizes multimodal policies into model parameters for improved adherence without prompts at inference.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2509.21637","ref_index":7,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"BoHA: Blockwise Hadamard Product Adaptation for Parameter-Efficient Fine-Tuning","primary_cat":"cs.LG","submitted_at":"2025-09-25T21:54:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"BoHA partitions frozen weights into a b by b grid and applies independent low-rank Hadamard factors per block, outperforming LoRA on matched-budget single-task averages while retaining 57.66% first-stage accuracy in a commonsense-to-arithmetic continual-learning test on Llama-3.2-3B.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2508.04227","ref_index":60,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Continual Learning for VLMs: A Survey and Taxonomy Beyond Forgetting","primary_cat":"cs.CV","submitted_at":"2025-08-06T09:03:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"The paper offers a comprehensive survey and proposes a new taxonomy for continual learning strategies in VLMs and MLLMs to combat catastrophic forgetting beyond traditional methods.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Continual learning with large pre-trained models (PTMs) [52, 53, 54, 55, 56] leverages parameter-efficient strategies to adapt to new tasks while preserving existing knowledge. These strategies primarily fall into two categories: parameter-efficient fine-tuning (PEFT) [57] and prompt learning [58]. PEFT methods, such as Adapter [59], LoRA [22], and Prefix-tuning [60], focus on updating only a small subset of parameters. A key approach within PEFT is enforcing orthogonality between task-specific parameters. For example, O-LoRA [61] minimizes catastrophic forgetting by constrain- ing the parameters for new tasks to an orthogonal subspace relative to past tasks. This orthogonality between the low-rank adaptation matrices At and Ai for any two distinct tasks t and"},{"citing_arxiv_id":"2506.13674","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PrefixMemory-Tuning: Modernizing Prefix-Tuning by Decoupling the Prefix from Attention","primary_cat":"cs.CL","submitted_at":"2025-06-16T16:30:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PrefixMemory-Tuning decouples the prefix from attention to overcome performance limits of traditional prefix-tuning and reaches competitive results with modern PEFT methods on LLM adaptation benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2505.12741","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Language Model Networks: Supervision-Efficient Learning through Dense Communication","primary_cat":"cs.AI","submitted_at":"2025-05-19T05:56:06+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2505.11237","ref_index":21,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Concept Drift Guided LayerNorm Tuning for Efficient Multimodal Metaphor Identification","primary_cat":"cs.MM","submitted_at":"2025-05-16T13:27:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CDGLT achieves SOTA on MET-Meme for multimodal metaphor identification by using SLERP-based concept drift and prompt-adapted LayerNorm tuning with reduced compute.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2504.15214","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Histogram-based Parameter-efficient Tuning for Passive and Active Sonar Classification","primary_cat":"cs.LG","submitted_at":"2025-04-21T16:36:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HPT uses histograms of feature embeddings to modulate pre-trained models for sonar classification, achieving higher accuracy than standard adapters on passive sonar datasets like VTUAD.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2502.18864","ref_index":150,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Towards an AI co-scientist","primary_cat":"cs.AI","submitted_at":"2025-02-26T06:17:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A multi-agent AI system generates novel biomedical hypotheses that show promising experimental validation in drug repurposing for leukemia, new targets for liver fibrosis, and a bacterial gene transfer mechanism.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2501.05465","ref_index":69,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Small Language Models (SLMs) Can Still Pack a Punch: A survey (updated 2026)","primary_cat":"cs.CL","submitted_at":"2025-01-03T19:53:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A literature survey of Small Language Models (1-8B parameters) that can perform comparably or better than larger models, covering general-purpose and task-specific approaches plus creation techniques.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":", Ding, J., Schlesinger, C., Schoelkopf, H., Ebert, J., Dao, T., Mishra, M., Gu, A., Robinson, J., Ander- son, C. J., Dolan-Gavitt, B., Contractor, D., Reddy, S., Fried, D., Bahdanau, D., Jernite, Y., Ferrandis, C. M., Hughes, S. M., Wolf, T., Guha, A., von Werra, L., and de Vries, H. Starcoder: may the source be with you! ArXiv abs/2305.06161 (2023). [69] Li, X. L., and Liang, P. Prefix-tuning: Optimizing continuous prompts for generation. Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) abs/2101.00190 (2021). 34 [70] Li, Y., Bubeck, S., Eldan, R., Giorno, A. D."}],"limit":50,"offset":0}