{"total":260,"items":[{"citing_arxiv_id":"2606.26880","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Heterogeneous Neural Predictivity from Language Models During Naturalistic Comprehension","primary_cat":"cs.CL","submitted_at":"2026-06-25T11:08:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":3.0,"formal_verification":"none","one_line_summary":"Frozen language model features predict held-out neural activity in naturalistic comprehension across multiple brain recording modalities with gains over low-level baselines in many but not all sources, after extensive controls.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26775","ref_index":41,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Evaluation Pitfalls and Challenges in Multimedia Event Extraction","primary_cat":"cs.CL","submitted_at":"2026-06-25T09:05:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A systematic analysis of evaluation practices in multimedia event extraction reveals that minor methodological choices cause large performance swings and overestimation of cross-modal grounding ability.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.26749","ref_index":195,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Structure Before Collapse: Transient semantic geometry in next-token prediction","primary_cat":"cs.LG","submitted_at":"2026-06-25T08:33:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Semantic geometry emerges transiently early in next-token prediction training before collapsing to Neural Collapse symmetry in synthetic settings with latent semantic factors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.25759","ref_index":8,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"NEURON-Fabric: Architecture-Runtime Co-Design for Controlled Low-Bit Gradient Communication","primary_cat":"cs.DC","submitted_at":"2026-06-24T12:32:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"NEURON-Fabric provides a profile-guided runtime for controlled low-bit gradient communication that preserves accuracy near full-precision levels while reducing modeled communication traffic across vision, transformer, and language model workloads.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.25674","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"BitNet Text Embeddings","primary_cat":"cs.CL","submitted_at":"2026-06-24T10:37:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BITEMBED converts LLM backbones to ternary BitNet-style encoders, adapts them with contrastive pre-training and teacher distillation, and produces text embeddings at multiple precisions that perform comparably to full-precision baselines on MMTEB.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.25268","ref_index":3,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Learning Interpretable Text Signals for Structured Responses","primary_cat":"stat.AP","submitted_at":"2026-06-24T01:08:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Joint NMF and binomial regression learns response-relevant text signals with competitive performance on simulations and review data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.24655","ref_index":31,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AI-PAVE-Br: Leveraging Large Language Models for Enhanced Product Attribute Value Extraction through a Golden Set Approach","primary_cat":"cs.CL","submitted_at":"2026-06-23T14:48:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"AI-PAVE-Br applies LLMs with prompt engineering to outperform NER baselines on Portuguese product attribute extraction and releases the Golden Set as a new benchmark dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.24387","ref_index":3,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AutoSpecNER: A Fine-Grained Named Entity Recognition Dataset for Vehicle Specification Extraction","primary_cat":"cs.CL","submitted_at":"2026-06-23T10:19:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AutoSpecNER is a new fine-grained NER dataset for vehicle advertisements with 659 examples and 15 categories, where DeBERTa reaches 90% micro-F1 versus 43% for rules and 77.8% for the best LLM.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22976","ref_index":11,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Understanding Parallel Samplers in Masked Diffusion via Random Walks on Graphs","primary_cat":"cs.LG","submitted_at":"2026-06-22T07:56:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Graph random walks provide a verifiable sandbox for diagnosing parallel samplers in masked diffusion models, showing performance depends on graph structure and introducing a new exact bisection sampler.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22771","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Learning Moral Diversity: Modelling Individual Perspectives in Moral Classification of Texts","primary_cat":"cs.CL","submitted_at":"2026-06-22T02:19:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Extending language models with annotator-specific layers improves individual moral annotation predictions and reveals perspective variations hidden by label aggregation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22342","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"How Does Research Evolve? Tracing Cross-Domain Trajectories in NLP, ML, and CV with Claim-Grounded Typed Citations","primary_cat":"cs.CL","submitted_at":"2026-06-21T05:20:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SciTraj is the first claim-grounded typed citation graph with 32,559 papers and 573,126 edges across six relation types, plus a temporally split link-prediction benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.22309","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The $\\alpha$-Index: A Penalized Authorship-Integrity Framework for Position-Weighted Scientific Contribution","primary_cat":"cs.DL","submitted_at":"2026-06-21T02:32:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"The α-index is a conserved position-weighted authorship framework with a senior-author penalty that decreases credit as the number of middle authors increases.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21890","ref_index":12,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Scaling Performance and Low-Resource Annotation with Many-Shot In-Context Learning for Named Entity Recognition","primary_cat":"cs.CL","submitted_at":"2026-06-20T05:39:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Many-shot ICL with LLMs matches or exceeds supervised BERT on NER and generates high-quality labels for low-resource settings, producing ~10% absolute F1 gains when used to fine-tune BERT.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21645","ref_index":73,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Behavioral and Representational Evidence of Binomial Ordering Preferences in Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-06-19T17:56:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs recover dominant binomial orders from corpora but align less closely with exact preference distributions, with preference strength partially encoded in middle-to-late layers and manipulable via steering.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21321","ref_index":83,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Objective-Behavior Alignment: Diagnostics for MORL Policy Selection","primary_cat":"cs.LG","submitted_at":"2026-06-19T11:10:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Proposes an exploratory diagnostic workflow to highlight behavioral variation along MORL Pareto fronts not captured by objective values, with validation on grid and continuous control tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21077","ref_index":12,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"OTTER: A Red-Teaming System for Toxicity-Evading Jailbreak Prompt Optimization","primary_cat":"cs.CR","submitted_at":"2026-06-19T03:55:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"OTTER optimizes prompts to decouple surface toxicity from adversarial intent, raising attack success rates on GPT models from 7% to 84% across 457 AdvBench examples.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.21045","ref_index":32,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"OVIG: Optimistic Verification of AI Training Integrity via Gradient Signals","primary_cat":"cs.CR","submitted_at":"2026-06-19T02:22:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"OVIG introduces an optimistic gradient-based verification framework for outsourced AI post-training that uses stride-sampled interval checks against an honest-replay boundary to achieve 0% attack success rate with low overhead.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.20287","ref_index":111,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"PsyScore: A Psychometrically-Aware Framework for Trait-Adaptive Essay Scoring and ZPD-Scaffolded Feedback","primary_cat":"cs.CL","submitted_at":"2026-06-18T14:29:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PsyScore combines a Trait-Adaptive Neural IRT Scorer using GPCM with a ZPD-Scaffolded Feedback Generator to deliver both competitive scoring and pedagogically aligned feedback on the ASAP++ dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.20089","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"IHUBERT: Vector-Based Semantic Deduplication and Domain-Balanced Pretraining for Persian Resources","primary_cat":"cs.CL","submitted_at":"2026-06-18T11:10:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Trains a 125M-parameter Persian PLM on a curated 45GB corpus using vector semantic deduplication for domain balance, topping QA and NLI benchmarks while remaining competitive on NER and classification.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.19626","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Toten: A Knowledge-Based System For Structure-Preserving Representation Of Physical Quantities And Technical Notation In Brazilian Portuguese","primary_cat":"cs.AI","submitted_at":"2026-06-17T22:06:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"TOTEN is a knowledge-based system for structure-preserving representation of physical quantities and technical notation in Brazilian Portuguese using an ontology of engineering entities and external authorities, outperforming statistical baselines in atomicity and reconstruction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.19264","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Structured Inference with Large Language Gibbs","primary_cat":"cs.LG","submitted_at":"2026-06-17T16:40:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Large Language Gibbs uses LLM next-token conditionals as MCMC transition operators for iterative resampling of structured variables, aiming to produce a stationary distribution that compromises across all local conditionals.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.19412","ref_index":29,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Spectral Retrieval-Augmented Time-Series Forecasting","primary_cat":"cs.LG","submitted_at":"2026-06-17T15:42:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SpecReTF improves time series forecasting by retrieving similar historical patterns using windowed frequency representations with combined amplitude-phase similarity and exponential recency weighting, outperforming time-domain methods on benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18852","ref_index":28,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Aligning Implied Statements for Implicit Hate Speech Generalizability with Context-Bounded Semi-hard Negative Mining","primary_cat":"cs.CL","submitted_at":"2026-06-17T09:33:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"ImpSH improves cross-domain generalization in implicit hate speech classification by aligning posts with implied statements and applying context-bounded semi-hard negative mining within a triplet learning setup.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18782","ref_index":6,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"RedactionBench","primary_cat":"cs.CL","submitted_at":"2026-06-17T07:51:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Introduces a 200-document benchmark and character-level R-Score for contextual PII redaction, with model evaluations and human agreement data showing the task remains unsolved.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18636","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"PEC-Home: Interpretation of Progressively Elliptical Commands in Smart Homes","primary_cat":"cs.CL","submitted_at":"2026-06-17T03:17:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Presents PEC-Home dataset for elliptical smart-home commands and shows LLMs achieve lower execution accuracy on elliptical inputs than complete commands even with dialogue history access.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.18389","ref_index":28,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Want Better Synthetic Data? Steer It: Activation Steering for Low-Resource Language Generation","primary_cat":"cs.CL","submitted_at":"2026-06-16T18:34:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Activation steering on early layers improves diversity of synthetic data for low-resource languages and often boosts downstream classifier performance compared to non-steered prompting.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.17816","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Conservation Laws for Modern Neural Architectures","primary_cat":"cs.LG","submitted_at":"2026-06-16T11:44:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Unified framework characterizes conservation laws for gradient flow in feedforward networks with GELU/SiLU/SwiGLU, multihead attention with positional encodings, and MoE models under various gating.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.17464","ref_index":197,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"CheckMIABench: Firm Foundations For Membership Inference Attacks on Language Models","primary_cat":"cs.LG","submitted_at":"2026-06-16T03:26:15+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CheckMIABench converts LLMs with intermediate checkpoints into clean MIA testbeds by using pre- and post-checkpoint training data from the same distribution and evaluates published attacks on Pythia and OLMo models while releasing an open-source library.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.14031","ref_index":47,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Applicability Condition Extraction for Therapeutic Drug-Disease Relations","primary_cat":"cs.AI","submitted_at":"2026-06-12T02:10:57+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Introduces applicability condition extraction for therapeutic drug-disease relations, creates first annotated dataset of 1,119 pairs, and proposes enhanced LoRA method outperforming baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.13051","ref_index":3,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AAbAAC: An Annotated Corpus for Autoimmunity Information Extraction","primary_cat":"cs.AI","submitted_at":"2026-06-11T08:34:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"The authors created and released AAbAAC, an annotated corpus of 115 abstracts for autoimmunity information extraction, and showed NER performance gains after fine-tuning models on it.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.12708","ref_index":26,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"AfriSUD: A Dependency Treebank Collection for Evaluating Models on African Languages","primary_cat":"cs.CL","submitted_at":"2026-06-10T21:55:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AfriSUD supplies new SUD-annotated dependency treebanks for nine Sub-Saharan African languages and demonstrates that existing models exhibit clear limitations on their syntax.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.12088","ref_index":123,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Debiasing Without Protected Attributes: Latent Concept Erasure from Textual Profiles","primary_cat":"cs.CL","submitted_at":"2026-06-10T13:49:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"H-SAL erases latent concepts from text profiles using self-descriptions as implicit debiasing signals and shows competitive performance on a new multi-domain Stack Exchange helpfulness benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11827","ref_index":8,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Jaguar: Fast Private CNN Inference with Power-of-Two Homomorphic Arithmetic","primary_cat":"cs.CR","submitted_at":"2026-06-10T09:04:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Jaguar replaces prime-modulus HE with power-of-two arithmetic to enable coefficient-domain convolution and local-shift truncation, reporting 2-3.7x lower latency than Cheetah and Rhombus on ResNet-18/50 and MobileNetV2.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11816","ref_index":25,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"WorldReasoner: Evaluating Whether Language Model Agents Forecast Events with Valid Reasoning","primary_cat":"cs.CL","submitted_at":"2026-06-10T08:50:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"WorldReasoner supplies 345 resolved forecasting tasks built from 14,141 articles to score LM agents on outcome quality, evidence quality, and reasoning quality against time-bounded evidence and hindsight graphs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11316","ref_index":51,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Sch\\\"utzen: Evaluating LLM Safety in Bulgarian and German Contexts","primary_cat":"cs.CL","submitted_at":"2026-06-09T18:01:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Schützen is a German-Bulgarian LLM safety dataset showing pronounced cross-language differences in model safety behavior.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.09489","ref_index":10,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"LLM-Orchestrated Conformance Checking in Stroke Care Without Computer-Interpretable Guidelines","primary_cat":"cs.AI","submitted_at":"2026-06-08T13:44:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An LLM-orchestrated framework enables conformance checking in stroke care from unstructured texts, achieving over 86% conformance in hospital data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08854","ref_index":203,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"sGPO: Trading Inference FLOPs for Training Efficiency in RLVR","primary_cat":"cs.LG","submitted_at":"2026-06-07T21:47:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"sGPO uses an initial-policy success-rate profiling pass to adaptively set rollout group sizes, filter data, and build a curriculum, cutting total RLVR training compute by 3x while matching baseline performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08810","ref_index":14,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Continuous Language Diffusion as a Decoder-Interface Problem","primary_cat":"cs.CL","submitted_at":"2026-06-07T20:00:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Continuous language diffusion works by entering high-margin decoder basins where frozen T5 embeddings recover 93-96% of native decisions and linear readouts reach 97.9% agreement, implying models should be evaluated as representation-decoder systems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08090","ref_index":14,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Fast LLM-Based Semantic Filtering: From a Unified Framework to an Adaptive Two-Phase Method","primary_cat":"cs.DB","submitted_at":"2026-06-06T10:32:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"An adaptive two-phase semantic filter using clustering then a hybrid proxy trained on LLM confidence achieves 1.6-2.0x speedup over prior methods at 90% accuracy on 10K document corpora.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.07219","ref_index":33,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Adversarial Creation and Detection of AI-Generated Social Bot Content","primary_cat":"cs.CL","submitted_at":"2026-06-05T12:32:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"An adversarial methodology generates a multilingual cross-platform dataset of paired human-AI social messages, and models trained on it outperform prior detectors on real-world out-of-distribution data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.06267","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Many Circuits, One Mechanism: Input Variation and Evaluation Granularity in Circuit Discovery","primary_cat":"cs.CL","submitted_at":"2026-06-04T15:10:14+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Structurally distinct circuits for literal sequence copying across token frequency bands implement the same computation, shown by broad transfer of band-specific edges, a shared core recovering 99% performance, and interchangeable representations via causal interventions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05864","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Analysis of the Neglect-Zero Effect in Large Language Models","primary_cat":"cs.CL","submitted_at":"2026-06-04T08:39:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LLMs do not exhibit the neglect-zero effect in structural priming tasks unlike humans.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05444","ref_index":103,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Multilingual Coreference Resolution via Cycle-Consistent Machine Translation","primary_cat":"cs.CL","submitted_at":"2026-06-03T21:06:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A cycle-consistent MT pipeline generates and similarity-weights training data for coreference resolution, producing gains on four low-resource languages and enabling the task where no corpora existed.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05054","ref_index":195,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Boosting Self-Consistency with Ranking","primary_cat":"cs.CL","submitted_at":"2026-06-03T16:12:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RISC reformulates self-consistency answer selection as a ranking task solved by a lightweight LambdaRank model with five hand-designed features, yielding better accuracy-efficiency trade-offs than majority voting on QA benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04733","ref_index":20,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Contrastive Learning and Correlation Clustering for Sequences of Network Telescope Data","primary_cat":"cs.LG","submitted_at":"2026-06-03T11:15:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A contrastive learning transformer embeds network flow sequences to enable correlation clustering that groups scanner sources consistently with labels.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04661","ref_index":52,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"CRAFT: Cost-aware Refinement And Front-aware Tuning of Prompts","primary_cat":"cs.CL","submitted_at":"2026-06-03T09:40:03+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CRAFT is a Pareto-front prompt optimizer that allocates scarce LLM validation calls to candidates near the current front using accuracy- and cost-oriented generators plus NSGA-II retention.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04306","ref_index":153,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Organizational Control Layer: Governance Infrastructure at the Execution Boundary of LLM Agent Systems","primary_cat":"cs.MA","submitted_at":"2026-06-03T00:25:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"OCL is a governance layer for LLM agents that cuts unsafe executions from 88% to near-zero and raises valid success from 12% to 96% in adversarial buyer-seller negotiations across frontier LLMs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04274","ref_index":10,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Long Live Fine-Tuning: Task-Specific Transformers Outperform Zero-Shot LLMs for Misinformation Response Classification on Reddit","primary_cat":"cs.CL","submitted_at":"2026-06-02T22:58:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Fine-tuned RoBERTa achieves 0.62 macro-F1 on 900 Reddit comments, outperforming best zero-shot LLM at 0.50, with largest gap on detecting belief propagation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03957","ref_index":28,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Efficient ASR Training with Conversations that Never Happened","primary_cat":"cs.CL","submitted_at":"2026-06-02T17:46:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Mixing 636 hours of LLM-generated synthetic conversations with 67 hours of real data outperforms a model trained on 2700 hours of real Hungarian speech on the BEA-Dialogue benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03817","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Rethinking the Idiomaticity Decomposability Hypothesis: Evidence from Distributional Learning","primary_cat":"cs.CL","submitted_at":"2026-06-02T15:59:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Language models show idiom decomposability correlates weakly with human judgments, negatively with syntactic flexibility, and contributes most strongly to representation stabilization during training alongside surprisal and frequency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}