{"total":17,"items":[{"citing_arxiv_id":"2606.27786","ref_index":77,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SHIFT: Gate-Modulated Activation Steering for Knowledge Conflict Mitigation in Retrieval-Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-06-26T07:17:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SHIFT reformulates neuron editing as learnable gate modulation on under 0.01% parameters to let LLMs adaptively balance contextual and parametric knowledge during RAG generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.25402","ref_index":50,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"LibEvoBench: Probing Temporal Knowledge Stratification in Code Generation Models","primary_cat":"cs.SE","submitted_at":"2026-06-24T04:58:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"LibEvoBench benchmark shows LLMs are version-oblivious on evolving APIs, with documentation helping but version specification not.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11172","ref_index":24,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Predicting Future Behaviors in Reasoning Models Enables Better Steering","primary_cat":"cs.LG","submitted_at":"2026-06-09T17:49:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Probes predicting future behaviors from intermediate steps enable Future Probe Controlled Generation for steering large reasoning models with minimal quality degradation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07688","ref_index":52,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"TRACER: Token ReAssignment for Concept ERasure in Generative Recommendation","primary_cat":"cs.IR","submitted_at":"2026-06-05T05:19:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TRACER uses token reassignment for concept-related items plus a coherence regularizer to unlearn specific concepts in generative recommendation while preserving utility better than baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05403","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Trust, but Don't Verify: Epistemic Blind Spots in LLM Source Evaluation","primary_cat":"cs.LG","submitted_at":"2026-06-03T20:15:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs identify fabricated statistics in isolation (rates 0.76-1.00) but ignore numeric validity during synthesis, relying on a methodology-register representation that transfers across domains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30207","ref_index":24,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Persona Conditioning of Brand Recommendations in Retrieval-Augmented Commercial Chat: A Prominence-Stratified Cross-Provider Audit","primary_cat":"cs.AI","submitted_at":"2026-05-28T16:43:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Persona prefixes reduce brand recommendation Jaccard similarity by 0.12-0.20, with mid-market brands swapping up to 75% of recommendations while category leaders remain ~80% consistent across OpenAI and Anthropic models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27157","ref_index":28,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Detecting Is Not Resolving: The Monitoring Control Gap in Retrieval Augmented LLMs","primary_cat":"cs.AI","submitted_at":"2026-05-26T15:18:43+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RAG models exhibit a monitoring-control gap: they acknowledge epistemic conflicts in accumulating documents yet fail to constrain unsafe recommendations, with single-turn tests overestimating safety.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26116","ref_index":13,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Divergent Recommendations, Convergent Diagnoses: Cross-Provider Failure-Mode Convergence in AI Commercial Recommendation","primary_cat":"cs.CY","submitted_at":"2026-05-22T17:19:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Two major AI providers diverge in which brands they recommend but converge on classifying the failure reasons, especially for low-prominence brands.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17301","ref_index":4,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"ConflictRAG: Detecting and Resolving Knowledge Conflicts in Retrieval Augmented Generation","primary_cat":"cs.CL","submitted_at":"2026-05-17T07:25:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ConflictRAG introduces a conflict-aware RAG pipeline with two-stage detection (MLP + selective LLM), Entropy-TOPSIS credibility assessment, and a new CARS metric, reporting 88.7% F1 and 5.3-6.1% gains on benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15156","ref_index":4,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MeMo: Memory as a Model","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:51:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MeMo encodes new knowledge into a separate memory model that integrates with frozen LLMs, showing strong performance on QA benchmarks while avoiding catastrophic forgetting and working without access to model weights.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11574","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Three Regimes of Context-Parametric Conflict: A Predictive Framework and Empirical Validation","primary_cat":"cs.CL","submitted_at":"2026-05-12T06:00:48+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A three-regime framework resolves contradictions in LLM context vs. parametric knowledge conflicts by distinguishing single-source updating, competitive integration, and task-appropriate selection, with empirical confirmation of certainty gradients and task effects across five models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.03255","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Do LLMs have core beliefs?","primary_cat":"cs.LG","submitted_at":"2026-05-05T01:06:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LLMs generally fail to maintain stable worldviews under adversarial conversational pressure, indicating they lack core beliefs akin to those in human cognition.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Random House, New York, NY . Philip R Corlett, Guillermo Horga, Paul C Fletcher, Ben Alderson-Day, Katharina Schmack, and Albert R Powers. 2019. Hallucinations and strong priors. Trends in cognitive sciences, 23(2):114-127. Floris P De Lange, Micha Heilbron, and Peter Kok. 2018. How do expectations shape perception? Trends in cognitive sciences, 22(9):764-779. Aniket Didolkar, Anirudh Goyal, Nan Rosemary Ke, Siyuan Guo, Michal Valko, Timothy Lillicrap, Danilo Jimenez Rezende, Yoshua Bengio, Michael C Mozer, and Sanjeev Arora. 2024. Metacognitive capabilities of llms: An exploration in mathematical problem solving.Advances in Neural Information Processing Systems, 37:19783-19812. Yi Dong, Ronghui Mu, Yanghao Zhang, Siqi Sun, Tianle"},{"citing_arxiv_id":"2604.23750","ref_index":37,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"The Override Gap: A Magnitude Account of Knowledge Conflict Failure in Hypernetwork-Based Instant LLM Adaptation","primary_cat":"cs.LG","submitted_at":"2026-04-26T14:59:14+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Knowledge conflicts in hypernetwork LLM adaptation stem from constant adapter margins losing to frequency-dependent pretrained margins; selective layer boosting and conflict-aware triggering raise deep-conflict accuracy to 71-72.5% on Gemma-2B and Mistral-7B.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"model can retrieve facts that were stated in the internalized document, which is the easy case. What has not been studied is whether a document can update or override knowledge the model already has, which is the situation any real deployment faces when a regulation changes, a product gets a new price, or a commonly believed fact is corrected by a primary source. This is known in the literature as a knowledge conflict [37, 36], and the behavior of language models in the presence of such conflicts is only beginning to be understood. Our starting observation is that current instant internalization methods fail systematically on knowl- edge conflicts, and that the failure gets worse in proportion to how deeply the contradicted fact is engrained in the model. When a document states that the capital of the UK has moved from London"},{"citing_arxiv_id":"2604.15945","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"RAGognizer: Hallucination-Aware Fine-Tuning via Detection Head Integration","primary_cat":"cs.CL","submitted_at":"2026-04-17T11:07:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"RAGognizer adds a detection head to LLMs for joint training on generation and token-level hallucination detection, yielding SOTA detection and fewer hallucinations in RAG while preserving output quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.07981","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"A Decomposition Perspective to Long-context Reasoning for LLMs","primary_cat":"cs.CL","submitted_at":"2026-04-09T08:51:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Decomposing long-context reasoning into atomic skills, synthesizing targeted pseudo-datasets, and applying RL improves LLM performance on long-context benchmarks by an average of 7.7%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14172","ref_index":31,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Tug-of-War within A Decade: Conflict Resolution in Vulnerability Analysis via Teacher-Guided Retrieval-Augmented Generations","primary_cat":"cs.CL","submitted_at":"2026-03-25T07:32:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CRVA-TGRAG combines parent-document segmentation, ensemble retrieval, and teacher-guided fine-tuning to mitigate knowledge conflicts and improve accuracy in LLM-based CVE vulnerability analysis.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2506.22500","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"OR-VSKC: Resolving Visual-Semantic Knowledge Conflicts in Operating Rooms with Synthetic Data-Guided Alignment","primary_cat":"cs.CV","submitted_at":"2025-06-25T07:06:29+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"OR-VSKC provides 28,190 synthetic operating room images plus an expert subset to expose and reduce visual-semantic knowledge conflicts in multimodal models for surgical risk detection.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}