{"total":40,"items":[{"citing_arxiv_id":"2605.22435","ref_index":232,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Assisted Counterspeech Writing at the Crossroads of Hate Speech and Misinformation","primary_cat":"cs.CL","submitted_at":"2026-05-21T13:02:08+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs generate adequate counterspeech for co-occurring hate and misinformation in 40% of cases, with a mixed knowledge strategy from fact-checkers and NGOs proving most effective after expert revision.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22356","ref_index":19,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Modeling Pathology-Like Behavioral Patterns in Language Models Through Behavioral Fine-Tuning","primary_cat":"cs.CL","submitted_at":"2026-05-21T11:42:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Fine-tuning LLMs on structured tasks inspired by maladaptive behaviors produces stable, context-general shifts in next-token distributions and response tendencies consistent with altered behavioral priors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20165","ref_index":29,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"CaMo: Camera Motion Grounded Evaluation and Training for Vision-Language Models","primary_cat":"cs.CV","submitted_at":"2026-05-19T17:50:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Proposes Spatial Narrative Score (SNS) evaluation for VLMs' camera motion understanding and introduces CaMo model achieving consistent performance on SNS and direct QA.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20284","ref_index":41,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"JUDO: A Juxtaposed Domain-Oriented Multimodal Reasoner for Industrial Anomaly QA","primary_cat":"cs.CV","submitted_at":"2026-05-19T05:57:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"JUDO enhances large multimodal models for industrial anomaly QA by juxtaposing query images with normal ones for visual comparison and using SFT plus GRPO with tailored rewards to inject domain knowledge, outperforming Qwen2.5-VL-7B and GPT-4o on the MMAD benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17900","ref_index":27,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"DuIVRS-2: An LLM-based Interactive Voice Response System for Large-scale POI Attribute Acquisition","primary_cat":"cs.AI","submitted_at":"2026-05-18T06:06:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"DuIVRS-2 deploys an LLM-driven IVR pipeline that processes 0.4 million calls per day at 83.9 percent task success rate using FSM-guided augmentation, selective CoT generation, and cooperative policy iteration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17152","ref_index":136,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Multilingual and Multimodal LLMs in the Wild: Building for Low-Resource Languages","primary_cat":"cs.CL","submitted_at":"2026-05-16T20:56:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A tutorial synthesizing foundations, recent models such as PALO and Maya, and low-cost methods for tri-modal multilingual AI in resource-constrained settings.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15665","ref_index":12,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"PRISM: Prompt Reliability via Iterative Simulation and Monitoring for Enterprise Conversational AI","primary_cat":"cs.AI","submitted_at":"2026-05-15T06:43:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PRISM automates continuous prompt creation, simulation-based testing, diagnosis, and repair for enterprise LLM agents, cutting authoring time to under 30 minutes while reaching 99% reliability and catching drift within 24 hours.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14350","ref_index":90,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Distributionally Robust Multi-Task Reinforcement Learning via Adaptive Task Sampling","primary_cat":"cs.LG","submitted_at":"2026-05-14T04:22:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DRATS derives a minimax objective from a feasibility formulation of MTRL to adaptively sample tasks with the largest return gaps, leading to better worst-task performance on MetaWorld benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14220","ref_index":16,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Diagnosing Training Inference Mismatch in LLM Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-05-14T00:27:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Training-inference mismatch in separated rollout and optimization stages of LLM RL can independently cause training collapse.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12379","ref_index":36,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Discrete Flow Matching for Offline-to-Online Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2026-05-12T16:44:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DRIFT enables stable offline-to-online fine-tuning of CTMC policies in discrete RL via advantage-weighted discrete flow matching, path-space regularization, and candidate-set approximation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12288","ref_index":16,"ref_count":4,"confidence":0.35,"is_internal_anchor":false,"paper_title":"TokenRatio: Principled Token-Level Preference Optimization via Ratio Matching","primary_cat":"cs.CL","submitted_at":"2026-05-12T15:44:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TBPO posits a token-level Bradley-Terry model and derives a Bregman-divergence density-ratio matching loss that generalizes DPO while preserving token-level optimality.","context_count":2,"top_context_role":"method","top_context_polarity":"use_method","context_text":"This establishes Proposition 4.2. A.3. Proof for Theorem 4.3 Theorem.Under sufficient model capacity, we haveπ θ∗ =π ˆθ whereπ ˆθ =argmin πθ Dh (Rdata, Rθ). Proof.Letarg min πθ Dh(Rdata∥Rθ) =π ˆθ. We have the following decomposition:: Dh Rdata, Rˆθ \u0001 = 1 T TX t=1 Dt h \u0010 Rt data, Rt ˆθ \u0011 = 1 T TX t=1 Epdata(yw ≤t≻yl ≤t|x) h Bh \u0010 Rt data, Rt ˆθ \u0011i .(16) whereTdenotes the minimum sequence length. By the non-negativity of the Bregman divergence, Bh \u0010 Rt data, Rt ˆθ \u0011 attains its minimum value 0 if and only if Rt data =R t ˆθ for a given(y w <t, yl <t, x). Assuming that the data distribution has full support, the minimizerπ ˆθ therefore satisfiesR t data =R t ˆθ for all(y w <t, yl <t, x). Substituting the definitions ofR t"},{"citing_arxiv_id":"2605.11908","ref_index":20,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Delightful Gradients Accelerate Corner Escape","primary_cat":"cs.LG","submitted_at":"2026-05-12T10:21:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Delightful Policy Gradient removes exponential corner trapping in softmax policy optimization for bandits and tabular MDPs, achieving logarithmic escape times and global O(1/t) convergence.","context_count":1,"top_context_role":"background","top_context_polarity":"unclear","context_text":"Using the EG logit update rules, we have, ∆θ(1)−∆θ(j) =π 1[U1(1−π 1)−π jUj]−π j[Uj(1−π j)−π 1U1](16) =π 1U1(1−π 1 +π j)−π jUj(1−π j +π 1)(17) =π 1U1A−π jUjB,(18) whereA= 1 +π j −π 1 andB= 1 +π 1 −π j. Substituting the expressions forU 1 andU j, ∆θ(1)−∆θ(j) = ∆ 1jC1j + X k̸=1,j ∆jk Cjk ,(19) whereC 1j andC jk are defined as, C1j =π 1[(1−π 1)A+π jB] =π 1[(1−π 1)2 +π j(2−π j)],(20) Cjk =π k(π1A−π jB) =π k(π1 −π j) \u0010 X m̸=1,j πm \u0011 .(21) Note that C1j >0 , and near the corner j where πj > π1, the coefficients Cjk <0 for all k. Consider a policy near corner j such that πj = 1−ϵ . In this regime, the probabilities π1 and πk are of order O(ϵ), and the coefficients simplify to (wherec 1, c2 >0andc 1, c2 ∈O(1)), C1j =c 1 ·π 1 ∈O(ϵ),(22)"},{"citing_arxiv_id":"2605.10582","ref_index":18,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Guaranteed Jailbreaking Defense via Disrupt-and-Rectify Smoothing","primary_cat":"cs.CR","submitted_at":"2026-05-11T13:54:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DR-Smoothing introduces a disrupt-then-rectify prompt processing scheme into smoothing defenses, delivering tight theoretical bounds on success probability against both token- and prompt-level jailbreaks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08409","ref_index":4,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Playing games with knowledge: AI-Induced delusions need game theoretic interventions","primary_cat":"cs.AI","submitted_at":"2026-05-08T19:13:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AI sycophancy creates belief spirals modeled as cheap talk games, mitigated by an Epistemic Mediator that introduces costly signals for type revelation and Belief Versioning for epistemic safety.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06987","ref_index":29,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Response Time Enhances Alignment with Heterogeneous Preferences","primary_cat":"cs.LG","submitted_at":"2026-05-07T22:05:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Response times modeled as drift-diffusion processes enable consistent estimation of population-average preferences from heterogeneous anonymous binary choices.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06869","ref_index":24,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Agentick: A Unified Benchmark for General Sequential Decision-Making Agents","primary_cat":"cs.AI","submitted_at":"2026-05-07T19:12:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Agentick is a new benchmark for sequential decision-making agents that evaluates RL, LLM, VLM, hybrid, and human approaches across 37 tasks and finds no single method dominates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06524","ref_index":53,"ref_count":2,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Process Matters more than Output for Distinguishing Humans from Machines","primary_cat":"cs.AI","submitted_at":"2026-05-07T16:30:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A new battery of 30 cognitive tasks demonstrates that process-level behavioral features distinguish humans from frontier AI agents better than performance metrics (mean AUC 0.88), with process-specific fine-tuning improving mimicry but limited cross-task transfer.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06375","ref_index":2,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"A Unified Pair-GRPO Family: From Implicit to Explicit Preference Constraints for Stable and General RL Alignment","primary_cat":"cs.LG","submitted_at":"2026-05-07T14:56:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"A unified Pair-GRPO framework extends GRPO with soft and hard pairwise preference variants, proving gradient equivalence under Taylor expansion and delivering improved stability and performance in RLHF.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2412.21059","ref_index":17,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"VisionReward: Fine-Grained Multi-Dimensional Human Preference Learning for Image and Video Generation","primary_cat":"cs.CV","submitted_at":"2024-12-30T16:24:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"VisionReward learns multi-dimensional human preferences for image and video generation via hierarchical assessment and linear weighting, outperforming VideoScore by 17.2% in prediction accuracy and yielding 31.6% higher win rates in text-to-video models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2409.12917","ref_index":78,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Training Language Models to Self-Correct via Reinforcement Learning","primary_cat":"cs.LG","submitted_at":"2024-09-19T17:16:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SCoRe uses multi-turn online RL with regularization on self-generated traces to improve LLM self-correction, achieving 15.6% and 9.1% gains on MATH and HumanEval for Gemini models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2408.00118","ref_index":117,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Gemma 2: Improving Open Language Models at a Practical Size","primary_cat":"cs.CL","submitted_at":"2024-07-31T19:13:07+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Gemma 2 models achieve leading performance at their sizes by combining established Transformer modifications with knowledge distillation for the 2B and 9B variants.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2403.08295","ref_index":105,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Gemma: Open Models Based on Gemini Research and Technology","primary_cat":"cs.CL","submitted_at":"2024-03-13T06:59:16+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Gemma introduces open 2B and 7B LLMs derived from Gemini technology that beat comparable open models on 11 of 18 text tasks and come with safety assessments.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2402.13116","ref_index":72,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"A Survey on Knowledge Distillation of Large Language Models","primary_cat":"cs.CL","submitted_at":"2024-02-20T16:17:37+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":3.0,"formal_verification":"none","one_line_summary":"A comprehensive survey of knowledge distillation for LLMs structured around algorithms, skill enhancement, and vertical applications, highlighting data augmentation as a key enabler.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2401.10020","ref_index":1,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Self-Rewarding Language Models","primary_cat":"cs.CL","submitted_at":"2024-01-18T14:43:47+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Iterative self-rewarding via LLM-as-Judge in DPO training on Llama 2 70B improves instruction following and self-evaluation, outperforming GPT-4 on AlpacaEval 2.0.","context_count":1,"top_context_role":"baseline","top_context_polarity":"baseline","context_text":"Fine-grained analysis As described earlier, the overall performance of the model in AlpacaEval improves with each iteration of training. It would be interesting to break down the overall performance improvement to see exactly what type of tasks these improvements come from. Therefore, we cluster the instructions in AlpacaEval test set into different groups based on three perspectives: (1) instruction category (2) instruction complexity (3) expected response length. We achieve this by using GPT-4. The detailed statistical information of the breakdown and the prompting techniques we used for getting this breakdown can be found in Appendix A.6. Results for the instruction category are given in Figure 4, and the other two in Appendix Figure 11."},{"citing_arxiv_id":"2401.03568","ref_index":25,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Agent AI: Surveying the Horizons of Multimodal Interaction","primary_cat":"cs.AI","submitted_at":"2024-01-07T19:11:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The paper defines Agent AI as interactive multimodal systems that perceive grounded data and generate embodied actions, arguing this approach can mitigate hallucinations in foundation models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2401.02954","ref_index":139,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"DeepSeek LLM: Scaling Open-Source Language Models with Longtermism","primary_cat":"cs.CL","submitted_at":"2024-01-05T18:59:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"DeepSeek LLM 67B exceeds LLaMA-2 70B on code, mathematics and reasoning benchmarks after pre-training on 2 trillion tokens and alignment via SFT and DPO.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2311.16867","ref_index":120,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"The Falcon Series of Open Language Models","primary_cat":"cs.CL","submitted_at":"2023-11-28T15:12:47+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Falcon-180B is a 180B-parameter open decoder-only model trained on 3.5 trillion tokens that approaches PaLM-2-Large performance at lower cost and is released with dataset extracts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2312.03732","ref_index":35,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"A Rank Stabilization Scaling Factor for Fine-Tuning with LoRA","primary_cat":"cs.CL","submitted_at":"2023-11-28T03:23:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LoRA adapters should be scaled by 1/sqrt(rank) rather than 1/rank to stabilize learning and enable effective use of higher ranks during fine-tuning of large language models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2311.10122","ref_index":71,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Video-LLaVA: Learning United Visual Representation by Alignment Before Projection","primary_cat":"cs.CV","submitted_at":"2023-11-16T10:59:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Video-LLaVA creates a unified visual representation for images and videos via pre-projection alignment, enabling mutual enhancement from joint training and strong results on image and video benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2310.16828","ref_index":129,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"TD-MPC2: Scalable, Robust World Models for Continuous Control","primary_cat":"cs.LG","submitted_at":"2023-10-25T17:57:07+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"TD-MPC2 scales an implicit world-model RL method to a 317M-parameter agent that masters 80 tasks across four domains with a single hyperparameter configuration.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2310.06114","ref_index":242,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Learning Interactive Real-World Simulators","primary_cat":"cs.AI","submitted_at":"2023-10-09T19:42:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"UniSim learns a universal real-world simulator from orchestrated diverse datasets, enabling zero-shot deployment of policies trained purely in simulation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2310.01852","ref_index":109,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"LanguageBind: Extending Video-Language Pretraining to N-modality by Language-based Semantic Alignment","primary_cat":"cs.CV","submitted_at":"2023-10-03T07:33:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LanguageBind aligns video, infrared, depth, and audio to a frozen language encoder via contrastive learning on the new VIDAL-10M dataset, extending video-language pretraining to N modalities.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2309.11495","ref_index":153,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"Chain-of-Verification Reduces Hallucination in Large Language Models","primary_cat":"cs.CL","submitted_at":"2023-09-20T17:50:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Chain-of-Verification reduces hallucinations in large language models by drafting responses, planning independent verification questions, answering them separately, and generating a final verified output.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2309.08532","ref_index":88,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"EvoPrompt: Connecting LLMs with Evolutionary Algorithms Yields Powerful Prompt Optimizers","primary_cat":"cs.CL","submitted_at":"2023-09-15T16:50:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"EvoPrompt uses LLMs to run evolutionary operators on populations of prompts, outperforming human-engineered prompts by up to 25% on BIG-Bench Hard tasks across 31 datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2309.03883","ref_index":49,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"DoLa: Decoding by Contrasting Layers Improves Factuality in Large Language Models","primary_cat":"cs.CL","submitted_at":"2023-09-07T17:45:31+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DoLa reduces hallucinations in LLMs by contrasting logits from later versus earlier layers during decoding, improving truthfulness on TruthfulQA by 12-17 absolute points without fine-tuning or retrieval.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2309.00267","ref_index":75,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"RLAIF vs. RLHF: Scaling Reinforcement Learning from Human Feedback with AI Feedback","primary_cat":"cs.CL","submitted_at":"2023-09-01T05:53:33+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RLAIF matches RLHF on summarization and dialogue tasks, with a direct-RLAIF variant achieving superior results by using LLM rewards directly during training.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2305.11206","ref_index":1,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"LIMA: Less Is More for Alignment","primary_cat":"cs.CL","submitted_at":"2023-05-18T17:45:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Fine-tuning a 65B model on 1,000 high-quality examples produces output that humans rate as good as or better than GPT-4 in 43% of cases, indicating most capabilities come from pretraining.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2304.13734","ref_index":17,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"The Internal State of an LLM Knows When It's Lying","primary_cat":"cs.CL","submitted_at":"2023-04-26T02:49:38+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Hidden activations in LLMs encode detectable information about statement truthfulness, enabling a classifier to identify true versus false content more reliably than the model's assigned probabilities.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2304.06767","ref_index":93,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"RAFT: Reward rAnked FineTuning for Generative Foundation Model Alignment","primary_cat":"cs.LG","submitted_at":"2023-04-13T18:22:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"RAFT aligns generative models by ranking samples with a reward model and fine-tuning only on the top-ranked outputs, reporting gains on reward scores and automated metrics for LLMs and diffusion models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2304.06364","ref_index":36,"ref_count":1,"confidence":0.35,"is_internal_anchor":false,"paper_title":"AGIEval: A Human-Centric Benchmark for Evaluating Foundation Models","primary_cat":"cs.CL","submitted_at":"2023-04-13T09:39:30+00:00","verdict":"ACCEPT","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AGIEval shows GPT-4 exceeding average human scores on SAT Math at 95% and Chinese college entrance English at 92.5%, while revealing weaker results on complex reasoning tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}