{"total":87,"items":[{"citing_arxiv_id":"2606.18333","ref_index":58,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Polarisation and Faraday rotation measure imaging at metre wavelengths with sub-arcsecond resolution: a foundational calibration strategy","primary_cat":"astro-ph.IM","submitted_at":"2026-06-16T18:00:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A calibration strategy using full-Jones corrections with an in-field unpolarised calibrator and visibility-based multi-epoch alignment enables sub-arcsecond polarimetric imaging with LOFAR at metre wavelengths.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31192","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"The Regularizing Power of Language-Training Deepfake Detectors","primary_cat":"cs.CV","submitted_at":"2026-05-29T12:01:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A dual-encoder deepfake detector pairs a frozen specialist with a LoRA-tuned MLLM, trained first via binary alignment then via RL to reward explain-then-classify behavior, yielding improved cross-dataset performance and interpretability.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30981","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Cognitive Fatigue in Autoregressive Transformers: Formalization and Measurement","primary_cat":"cs.CL","submitted_at":"2026-05-29T08:18:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Autoregressive transformers exhibit measurable cognitive fatigue during extended generation, quantified by the Fatigue Index that predicts degradation (AUROC 0.95) and repetition (rho 0.94).","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30589","ref_index":3,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ImmigrationQA: A Source-Grounded Dataset and Small-Model Adaptation for U.S. Immigration Law","primary_cat":"cs.CL","submitted_at":"2026-05-28T21:36:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A new source-grounded QA dataset for U.S. immigration law is built from official documents and used to fine-tune a 3B model, yielding a 27% mean score improvement over the base model on a held-out sample.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30415","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Domain Adaptation and Reasoning Frameworks in Language Models: A Controlled Experiment with Historical Cosmology","primary_cat":"cs.CL","submitted_at":"2026-05-28T18:00:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Fine-tuning on historical cosmology data reshapes language model explanatory frameworks, leading to stance changes as a secondary effect from regime redistribution.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29400","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Architecture-Sensitive Supervised Fine-Tuning for Screen-Conditioned Action Prediction: A PiSAR Benchmark","primary_cat":"cs.AI","submitted_at":"2026-05-28T05:49:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Fine-tuned Qwen3-VL-8B reaches sem_sim 0.783 on PiSAR held-out set vs 0.46-0.48 for frontier zero-shot, while Gemma-4-26B scores 0.441.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29183","ref_index":1,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"TIMEGATE: Sustainable Time-Boxed Promotion Gates for Continual ML Adaptation Under Resource Constraints","primary_cat":"cs.LG","submitted_at":"2026-05-27T23:41:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"TIMEGATE introduces time-boxed promotion gates and an M signal that delivers 66% evaluation-compute savings in simulation and 89% wall-clock/energy reduction on LLaMA-3.1-8B experiments with no silent mis-promotions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23901","ref_index":7,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LLMs as Noisy Channels: A Shannon Perspective on Model Capacity and Scaling Laws","primary_cat":"cs.LG","submitted_at":"2026-05-22T17:59:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The Shannon Scaling Law treats LLM training as noisy-channel transmission and predicts U-shaped performance degradation when signal-to-noise ratio falls below a threshold, outperforming monotonic scaling laws on Pythia and OLMo2 data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23067","ref_index":28,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"What Training Data Teaches RL Memory Agents: An Empirical Study of Curriculum Effects in Memory-Augmented QA","primary_cat":"cs.CL","submitted_at":"2026-05-21T21:58:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Controlled study shows mixed training curricula improve aggregate F1 on memory QA benchmarks while out-of-domain data transfers targeted skills like temporal reasoning, with per-question-type effects exceeding aggregate differences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20706","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Llamas on the Web: Memory-Efficient, Performance-Portable, and Multi-Precision LLM Inference with WebGPU","primary_cat":"cs.DC","submitted_at":"2026-05-20T05:05:10+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LlamaWeb is a WebGPU backend for llama.cpp that uses static memory planning, tunable kernels, and templated multi-precision support to cut memory use by 29-33% and raise decode throughput by 45-69% versus prior browser frameworks on tested hardware.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20368","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Security Document Classification with a Fine-Tuned Local Large Language Model: Benchmark Data and an Open-Source System","primary_cat":"cs.CR","submitted_at":"2026-05-19T18:18:19+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A fine-tuned local Qwen 3.5 27B model achieves 95% category-level accuracy on security document classification, outperforming commercial models on both internal and external test sets while keeping processing local.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16154","ref_index":6,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Learn Where Outcomes Diverge: Efficient VLA RL via Probabilistic Chunk Masking","primary_cat":"cs.LG","submitted_at":"2026-05-15T16:33:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PCM uses success-failure action variance to probabilistically select and mask chunks for gradient updates in GRPO, matching standard success rates with 2.38x wall-clock speedup and 60% lower memory on LIBERO benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15104","ref_index":46,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"From Text to Voice: A Reproducible and Verifiable Framework for Evaluating Tool Calling LLM Agents","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:22:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A dataset-agnostic framework converts text tool-calling benchmarks to paired audio evaluations via TTS, speaker variation and noise, then evaluates seven omni-modal models showing model- and task-dependent performance with small text-to-voice gaps.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13989","ref_index":16,"ref_count":3,"confidence":0.98,"is_internal_anchor":true,"paper_title":"VectraYX-Nano: A 42M-Parameter Spanish Cybersecurity Language Model with Curriculum Learning and Native Tool Use","primary_cat":"cs.CL","submitted_at":"2026-05-13T18:03:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Trains a 42M-parameter Spanish cybersecurity LLM from scratch with curriculum phases and achieves 0.23 tool-selection accuracy after SFT mixture rebalancing to 1:21 tool-use ratio.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13779","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MinT: Managed Infrastructure for Training and Serving Millions of LLMs","primary_cat":"cs.LG","submitted_at":"2026-05-13T16:59:08+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13053","ref_index":3,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Standardized Re-evaluation of Conversational Recommender Systems on the ReDial Dataset","primary_cat":"cs.IR","submitted_at":"2026-05-13T06:20:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Standardized re-evaluation of CRS methods on ReDial shows nearly 50% of reported accuracy stems from repetition shortcuts absent in novelty-focused tests, with gains driven more by LLM backbone than architectures and recall overstating effectiveness.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12610","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Fine-Tuning Models for Automated Code Review Feedback","primary_cat":"cs.SE","submitted_at":"2026-05-12T18:02:04+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PEFT fine-tuning of Code Llama yields feedback on student Java bugs that students judge equal to ChatGPT and better than prompt engineering, using BLEU/ROUGE/BERTScore plus human ratings.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12345","ref_index":23,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Output Composability of QLoRA PEFT Modules for Plug-and-Play Attribute-Controlled Text Generation","primary_cat":"cs.CL","submitted_at":"2026-05-12T16:21:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Summing outputs from separately trained QLoRA PEFT modules provides strong performance for attribute-controlled text generation, often matching or exceeding single-task modules even on single-attribute tests.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.10772","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Towards a Large Language-Vision Question Answering Model for MSTAR Automatic Target Recognition","primary_cat":"cs.CV","submitted_at":"2026-05-11T16:05:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A fine-tuned large language-vision model achieves 98% accuracy on visual question answering for military vehicle identification in SAR imagery from an extended MSTAR benchmark.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"adapter that projects the vision embedding into the language -embedded space with matching dimensions . With this alignment of the language and vision encoders , each dimension of the ir embedding vectors captures similar representations. The published LLaVA model was trained on 8 GPUs, each with 80 GB of VRAM, for 18 hours [14]. This LLaVA architecture can be further fine-tuned using Quantized Low-Rank Adaptation (QLoRA) [16] to reduce the memory footprint while training. QLoRA and similar techniques are known as parameter-efficient fine-tuning (PEFT) methods. 2.4 Remote Sensing Applications Applied research has adapted language models for automated remote sensing tasks, including image captioning and VQA. One of the earliest examples of VQA for electro-optical (EO) satellite imagery was the RSVQA method published in 2020"},{"citing_arxiv_id":"2605.09015","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LLiMba: Sardinian on a Single GPU -- Adapting a 3B Language Model to a Vanishing Romance Language","primary_cat":"cs.CL","submitted_at":"2026-05-09T15:54:11+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Qwen2.5-3B was continued-pretrained and then fine-tuned with rsLoRA r256 on Sardinian data to reach 28.5 BLEU into the language, outperforming full fine-tuning and other LoRA variants.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08614","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DiagnosticIQ: A Benchmark for LLM-Based Industrial Maintenance Action Recommendation from Symbolic Rules","primary_cat":"cs.AI","submitted_at":"2026-05-09T02:17:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DiagnosticIQ benchmark shows frontier LLMs perform similarly on standard rule-to-action tasks but lose substantial accuracy under distractor expansion and condition inversion, pointing to calibration as the key deployment issue.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"org/abs/2506.03278. [9] Andrew A Cook, Göksel Mısırlı, and Zhong Fan. Anomaly detection for iot time-series data: A survey.IEEE Internet of Things Journal, 7(7):6481-6494, 2019. [10] Cytiva. Intelligent maintenance powered by iot and ai, Apr 2026. URL https://www. cytivalifesciences.com/en/us/insights/intelligent-equipment-maintenance. Ac- cessed: 2026-05-02. [11] Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. Qlora: Efficient finetuning of quantized llms, 2023. URLhttps://arxiv.org/abs/2305.14314. [12] Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. Measuring massive multitask language understanding.arXiv preprint arXiv:2009.03300, 2020."},{"citing_arxiv_id":"2605.16347","ref_index":36,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"HPC-LLM: Practical Domain Adaptation and Retrieval-Augmented Generation for HPC Support","primary_cat":"cs.LG","submitted_at":"2026-05-08T03:54:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"HPC-LLM fine-tunes Llama 3.1 8B via QLoRA on 9k-24k HPC examples and adds dense retrieval to deliver practical support for job scheduling, MPI, and GPU workflows, approaching the performance of larger general models at lower memory and latency cost.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02105","ref_index":68,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Sharpness-Aware Pretraining Mitigates Catastrophic Forgetting","primary_cat":"cs.LG","submitted_at":"2026-05-04T00:02:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Sharpness-aware pretraining and related flat-minima interventions reduce catastrophic forgetting by up to 80% after post-training across 20M-150M models and by 31-40% at 1B scale.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01336","ref_index":74,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Multi-View Media Profiling Suite: Resources, Evaluation, and Analysis","primary_cat":"cs.CL","submitted_at":"2026-05-02T09:17:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Presents MBFC-2025 dataset and multi-view embeddings with fusion methods for media bias and factuality, reporting SOTA results on ACL-2020 and new benchmarks on MBFC-2025.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00650","ref_index":27,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AdaMeZO: Adam-style Zeroth-Order Optimizer for LLM Fine-tuning Without Maintaining the Moments","primary_cat":"cs.LG","submitted_at":"2026-05-01T13:31:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"AdaMeZO adapts Adam moment estimates to zeroth-order LLM fine-tuning without extra memory storage, outperforming MeZO with up to 70% fewer forward passes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00206","ref_index":26,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"State Stream Transformer (SST) V2: Parallel Training of Nonlinear Recurrence for Latent Space Reasoning","primary_cat":"cs.LG","submitted_at":"2026-04-30T20:30:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SST V2 introduces parallel-trainable nonlinear recurrence in latent space to let transformers reason continuously across positions, delivering +15 points on GPQA-Diamond and halving remaining GSM8K errors over matched baselines.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"(8) When pass 2 blends this O(α) approximation with weight αl, the error is αl ⊙O(α) =O(α 2). With learned α∈[0.024,0.035] , α2 ∈[5.8×10 −4,1.2×10 −3]. The two-pass blend matches the true sequential computation to first order inα. The full derivation is given in Appendix B.3. Co-adaptation.The O(α2) bound assumes fixed weights. In practice, the LoRA [26] adapters, blend parameters, and state normalisation weights are all trained jointly across both passes. Gradients flow from pass 2's loss through the blend, through the scan, and into pass 1's feedforward outputs. The model learns pass 1 outputs that are maximally useful when propagated and blended into pass 2. The two passes co-adapt rather than one approximating the other,"},{"citing_arxiv_id":"2605.00195","ref_index":53,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Diversity in Large Language Models under Supervised Fine-Tuning","primary_cat":"cs.LG","submitted_at":"2026-04-30T20:20:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TOFU loss mitigates the narrowing of generative diversity in LLMs after supervised fine-tuning by addressing neglect of low-frequency patterns and forgetting of prior knowledge.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27415","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"ChipLingo: A Systematic Training Framework for Large Language Models in EDA","primary_cat":"cs.LG","submitted_at":"2026-04-30T04:35:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ChipLingo trains LLMs on EDA data via corpus construction, domain-adaptive pretraining, and RAG scenario alignment, reaching 59.7% accuracy with an 8B model and 70.02% with a 32B model on a new internal EDA benchmark.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[14] Li X L, Liang P. Prefix-Tuning: Optimizing Continuous Prompts for Generation. arXiv:2101.00190, 2021. [15] Hu E J, Shen Y, Wallis P, et al. LoRA: Low-Rank Adaptation of Large Language Models. arXiv:2106.09685, 2021. [16] Zhang Q, Chen M, Bukharin A, et al. AdaLoRA: Adaptive Budget Allocation for Parameter-Efficient Fine-Tuning. arXiv:2303.10512, 2023. [17] Dettmers T, Pagnoni A, Holtzman A, et al. QLoRA: Efficient Finetuning of Quantized LLMs. arXiv:2305.14314, 2023. [18] Pletenev S, Marina M, Moskovskiy D, et al. How Much Knowledge Can You Pack into a LoRA Adapter without Harming LLM? InFindings of the Association for Computa- tional Linguistics: NAACL 2025, pages 4309-4322, 2025. doi:10.18653/v1/2025."},{"citing_arxiv_id":"2604.26217","ref_index":5,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"OpenSOC-AI: Democratizing Security Operations with Parameter Efficient LLM Log Analysis","primary_cat":"cs.CR","submitted_at":"2026-04-29T01:46:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"LoRA fine-tuning of TinyLlama-1.1B on 450 SOC examples produces 68% threat classification accuracy and 58% severity accuracy on 50 held-out logs, with full code, weights, and data released.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.24678","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Leveraging LLMs for Multi-File DSL Code Generation: An Industrial Case Study","primary_cat":"cs.SE","submitted_at":"2026-04-27T16:38:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Fine-tuning 7B code LLMs on a custom multi-file DSL dataset achieves structural fidelity of 1.00, high exact-match accuracy, and practical utility validated by expert survey and execution checks.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[9] Tim Dettmers, Mike Lewis, Younes Belkada, and Luke Zettlemoyer. 2022. LLM.int8(): 8-bit Matrix Multiplication for Transformers at Scale. arXiv:2208.07339 [cs.LG] https://arxiv.org/abs/2208.07339 [10] Tim Dettmers, Artidoro Pagnoni, Ari Holtzman, and Luke Zettlemoyer. 2023. QLoRA: Efficient Finetuning of Quantized LLMs. arXiv:2305.14314 [cs.LG] https: //arxiv.org/abs/2305.14314 [11] Li Dong and Mirella Lapata. 2016. Language to Logical Form with Neural Attention. InProceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Katrin Erk and Noah A. Smith (Eds.). Association for Computational Linguistics, Berlin, Germany, 33-43. doi:10.18653/v1/P16-1004 [12] Qingxiu Dong, Lei Li, Damai Dai, Ce Zheng, Jingyuan Ma, Rui Li, Heming Xia,"},{"citing_arxiv_id":"2604.24447","ref_index":4,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Characterizing Vision-Language-Action Models across XPUs: Constraints and Acceleration for On-Robot Deployment","primary_cat":"cs.RO","submitted_at":"2026-04-27T13:12:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"VLA models exhibit a compute-bound VLM phase followed by a memory-bound action phase on edge hardware; DP-Cache and V-AEFusion reduce redundancy and enable pipeline parallelism for up to 6x speedup on NPUs with marginal task degradation.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":", Li, J., Zhu, M., Wu, K., Xu, Z., Cheng, R., Shen, C., Peng, Y ., Feng, F., et al. Tinyvla: Towards fast, data-efficient vision-language-action models for robotic manipulation.arXiv preprint arXiv:2409.12514, 2024. Williams, S., Waterman, A., and Patterson, D. Roofline: an insightful visual performance model for multicore architectures.Commun. ACM, 52(4):65-76, April 2009. ISSN 0001-0782. doi: 10.1145/1498765. 1498785. URL https://doi.org/10.1145/ 1498765.1498785. Xu, S., Wang, Y ., Xia, C., Zhu, D., Huang, T., and Xu, C. Vla-cache: Efficient vision-language-action manipu- lation via adaptive token caching, 2025. URL https: //arxiv.org/abs/2502.02175. Yang, Y ., Wang, Y ., Wen, Z., Zhongwei, L., Zou, C."},{"citing_arxiv_id":"2604.23817","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"FUTURAL: A Metasearch Platform for Empowering Rural Areas with Smart Solutions","primary_cat":"cs.IR","submitted_at":"2026-04-26T17:36:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"FUTURAL's metasearch MVP uses LLMs to enable natural language queries over smart solutions data to support rural development.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21571","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Separable Expert Architecture: Toward Privacy-Preserving LLM Personalization via Composable Adapters and Deletable User Proxies","primary_cat":"cs.AI","submitted_at":"2026-04-23T11:51:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A separable expert architecture uses base models, LoRA adapters, and deletable per-user proxies to enable privacy-preserving personalization and deterministic unlearning in LLMs.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"proximate gradient manipulation [6, 14], LLM-specific methods such as model-generated knowledge replace- ment [15], NPO [7], or representation-level unlearning [8]. On the other hand, the infrastructure for compos- able adapter stacks has matured substantially: LoRA [16] and QLoRA [17] enable efficient adapter training, LoraHub [18] and task arithmetic [19, 20] demonstrate multi-adapter composition, and S-LoRA [21] enables serving thousands of concurrent adapters from a single base model while Punica [22] provides efficient multi- tenant batching via segmented gather-matrix-vector kernels. Activation steering methods, including Con- trastive Activation Addition [23] and Inference-Time Intervention [24], show that behavioral modification"},{"citing_arxiv_id":"2604.18302","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Toward Zero-Egress Psychiatric AI: On-Device LLM Deployment for Privacy-Preserving Mental Health Decision Support","primary_cat":"cs.AI","submitted_at":"2026-04-20T14:09:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A cross-platform mobile application deploys an ensemble of quantized open-source LLMs for fully local, DSM-5-aligned psychiatric decision support with claimed accuracy comparable to prior cloud versions.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"This architectural dependency creates an irreconcilable tension with the pri- vacy requirements of the most at-risk and underserved populations. The emergence of lightweight, highly efficient open-source LLMs-such as Gemma [10], Phi-3.5-mini [11], and Qwen2 [12]-combined with advances in model compression via Quantized Low-Rank Adaptation (QLoRA) [13] and mobile inference runtimes, has fundamentally changed what is compu- tationally achievable on consumer mobile hardware. Modern smartphones equipped with dedicated neural processing units (NPUs) are now capable of running billion-parameter language models at interactive speeds, opening the door to genuinely on-device, zero-egress AI inference [14, 15]."},{"citing_arxiv_id":"2604.13440","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"A KL Lens on Quantization: Fast, Forward-Only Sensitivity for Mixed-Precision SSM-Transformer Models","primary_cat":"cs.LG","submitted_at":"2026-04-15T03:40:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"KL divergence provides a superior forward-only metric for identifying quantization-sensitive parts in SSM-Transformer hybrids, outperforming MSE and SQNR and supporting practical mixed-precision deployment on edge devices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.12358","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Why and When Visual Token Pruning Fails? A Study on Relevant Visual Information Shift in MLLMs Decoding","primary_cat":"cs.CV","submitted_at":"2026-04-14T06:48:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Visual token pruning in MLLMs fails on complex reasoning due to Relevant Visual Information Shift during decoding, but the DSTP framework fixes it training-free across models.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Research for Efficient Multimodal Large Language Models (MLLMs) has emerged to address the significant challenges in inference speed and memory consump- tion caused by their massive scale. These studies can be broadly divided into two categories:1) Changing MLLM Internals: These methods modify the architecture or the parameters of the model itself. Common techniques include Quantization[12,24,27]-reduceing the precision of the numbers the model uses (e.g., moving from16-bit to8-bit or even4-bit) to save space;Distilla- tion[6,19 -23]-where a large teacher model helps a smaller student model learn to perform just as well; andLayer Pruning[18,28,46]-identifying and removing parts of the neural network that do not contribute significantly to the"},{"citing_arxiv_id":"2604.11687","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Please Make it Sound like Human: Encoder-Decoder vs. Decoder-Only Transformers for AI-to-Human Text Style Transfer","primary_cat":"cs.CL","submitted_at":"2026-04-13T16:30:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"BART-large outperforms Mistral-7B in AI-to-human style transfer with higher reference similarity scores and far fewer parameters, while showing that marker shift can reflect overshoot rather than accurate transfer.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09952","ref_index":11,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"SLM Finetuning for Natural Language to Domain Specific Code Generation in Production","primary_cat":"cs.LG","submitted_at":"2026-04-10T23:11:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Fine-tuned small language models outperform larger models in natural language to domain-specific code generation with improved performance, latency, and the ability to adapt to customer-specific scenarios without losing general capabilities.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09791","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Pioneer Agent: Continual Improvement of Small Language Models in Production","primary_cat":"cs.AI","submitted_at":"2026-04-10T18:13:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Pioneer Agent automates the full lifecycle of adapting and continually improving small language models via diagnosis-driven data synthesis and regression-constrained retraining, delivering gains of 1.6-83.8 points on benchmarks and large lifts in production-style tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09069","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"NyayaMind- A Framework for Transparent Legal Reasoning and Judgment Prediction in the Indian Legal System","primary_cat":"cs.CL","submitted_at":"2026-04-10T07:51:42+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"NyayaMind combines RAG retrieval with domain-specific LLMs to generate transparent, structured legal reasoning and judgment predictions for Indian court cases.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.09034","ref_index":10,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"The nextAI Solution to the NeurIPS 2023 LLM Efficiency Challenge","primary_cat":"cs.LG","submitted_at":"2026-04-10T06:52:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A competition entry achieved efficient fine-tuning of LLaMa2 70B on one GPU in 24 hours with competitive QA benchmark performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08457","ref_index":3,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"CrashSight: A Phase-Aware, Infrastructure-Centric Video Benchmark for Traffic Crash Scene Understanding and Reasoning","primary_cat":"cs.CV","submitted_at":"2026-04-09T16:52:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CrashSight is a new infrastructure-focused benchmark showing that state-of-the-art vision-language models can describe crash scenes but fail at temporal and causal reasoning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.08070","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"AtlasOCR: Building the First Open-Source Darija OCR Model with Vision Language Models","primary_cat":"cs.CV","submitted_at":"2026-04-09T10:38:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AtlasOCR delivers the first open-source Darija OCR by fine-tuning Qwen2.5-VL 3B, achieving state-of-the-art results on custom and existing benchmarks for both Darija and Arabic.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07766","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Sensitivity-Positional Co-Localization in GQA Transformers","primary_cat":"cs.CL","submitted_at":"2026-04-09T03:41:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"In Llama 3.1 8B, task-sensitive layers cluster late while RoPE adaptation is strongest early, yet applying both adaptations only to sensitivity-identified layers outperforms other layer choices by 4-16 points on MMLU, GPQA, HumanEval+, MATH, MGSM and ARC.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07754","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"The Art of (Mis)alignment: How Fine-Tuning Methods Effectively Misalign and Realign LLMs in Post-Training","primary_cat":"cs.CR","submitted_at":"2026-04-09T03:20:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ORPO is most effective at misaligning LLMs while DPO excels at realigning them, though it reduces utility, revealing an asymmetry between attack and defense methods.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"alignment and subsequent realignment. We first construct a misalignment dataset namedMisQAand leverage exist- ing open-source datasets for realignment. We then conduct misalignment and subsequent realignment on four safety- aligned LLMs using six fine-tuning methods, including 1 arXiv:2604.07754v1 [cs.CR] 9 Apr 2026 four Supervised Fine-Tuning (SFT) techniques: LoRA [24], QLoRA [13], AdaLoRA [65], and IA3 [38], as well as two Preference Fine-Tuning (PFT) techniques: DPO [48] and ORPO [23]. Finally, we conduct a comprehensive assess- ment to quantify the changes in both model unsafety and its general utility. We summarize key findings below. • Different LLMs exhibit varying degrees of resistance to misalignment. Gemma2 shows the highest resilience"},{"citing_arxiv_id":"2604.07034","ref_index":49,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"KITE: Keyframe-Indexed Tokenized Evidence for VLM-Based Robot Failure Analysis","primary_cat":"cs.RO","submitted_at":"2026-04-08T12:49:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"KITE is a training-free method that uses keyframe-indexed tokenized evidence including BEV schematics to enhance VLM performance on robot failure detection, identification, localization, explanation, and correction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06370","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ForkKV: Scaling Multi-LoRA Agent Serving via Copy-on-Write Disaggregated KV Cache","primary_cat":"cs.DC","submitted_at":"2026-04-07T18:52:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ForkKV uses copy-on-write disaggregated KV cache with DualRadixTree and ResidualAttention kernels to deliver up to 3x throughput over prior multi-LoRA serving systems with negligible quality loss.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"To tailor foundational models for these diverse tasks in a work- flow, Parameter-Efficient Fine-Tuning (PEFT) [41] techniques, par- ticularly Low-Rank Adaptation (LoRA) [21], offer a promising solu- tion. By freezing the pretrained weights and updating only small low-rank matrices known asadapters, LoRA maintains high gener- ation quality while introducing minimal parameter overhead [15, 21, 34]. For example, LoRA adapters with a low-rank dimension of 16 trained on Llama3.1-70B [42] account for only 0.28% of the orig- inal model size (approximately 400MB v.s. 140GB). Such extreme memory efficiency fundamentally shifts how systems manage con- current workloads. Instead of deploying multiple monolithic mod- els, the serving engine can concurrently host multiple specialized"},{"citing_arxiv_id":"2604.05807","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Constraint-Driven Warm-Freeze for Efficient Transfer Learning in Photovoltaic Systems","primary_cat":"cs.NE","submitted_at":"2026-04-07T12:44:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CDWF achieves 90-99% of full fine-tuning performance with up to 120x fewer trainable parameters by dynamically allocating full trainability to gradient-important blocks and LoRA to others for PV cyberattack transfer learning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06253","ref_index":6,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"FLeX: Fourier-based Low-rank EXpansion for multilingual transfer","primary_cat":"cs.LG","submitted_at":"2026-04-06T19:26:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"LoRA fine-tuning of Code Llama with Fourier regularization raises Java pass@1 from 34.2% to 42.1% while using a small high-quality dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.04565","ref_index":30,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"PassiveQA: A Three-Action Framework for Epistemically Calibrated Question Answering via Supervised Finetuning","primary_cat":"cs.CL","submitted_at":"2026-04-06T09:54:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PassiveQA trains models via supervised finetuning to decide Answer, Ask, or Abstain using structured information-state representations and knowledge-graph context, yielding better abstention and lower hallucination on QA datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}