{"total":23,"items":[{"citing_arxiv_id":"2605.20713","ref_index":21,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SAVER: Selective As-Needed Vision Evidence for Multimodal Information Extraction","primary_cat":"cs.CV","submitted_at":"2026-05-20T05:10:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SAVER proposes a conformal groundability gate plus submodular image selector that activates vision only when needed for multimodal named entity recognition and relation extraction, improving F1 while lowering compute.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19568","ref_index":44,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"m3BERT: A Modern, Multi-lingual, Matryoshka Bidirectional Encoder","primary_cat":"cs.CL","submitted_at":"2026-05-19T09:13:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"m3BERT uses a three-stage Matryoshka pretraining approach on a bidirectional encoder to support variable embedding sizes while outperforming prior models on large-scale retrieval tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17201","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Filter-then-Verify: A Multiphase GNN and ModernBERT Framework for Social Engineering Detection in Email Networks","primary_cat":"cs.CR","submitted_at":"2026-05-17T00:04:08+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A two-stage GNN-plus-ModernBERT framework detects social engineering attacks in email networks by first filtering structural anomalies at 86% recall and then verifying content to reach over 92% precision on augmented Enron data.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.17106","ref_index":17,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"HyDRA: Hybrid Dynamic Routing Architecture for Heterogeneous LLM Pools","primary_cat":"cs.CL","submitted_at":"2026-05-16T18:19:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HyDRA routes queries to cost-effective LLMs by predicting multi-dimensional capability requirements with a multi-head encoder and applying shortfall matching against configuration-defined model profiles, delivering up to 72.5 percent cost savings on coding benchmarks while remaining decoupled from具体","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16991","ref_index":182,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Response-free item difficulty modelling for multiple-choice items with fine-tuned transformers: Component-wise representation and multi-task learning","primary_cat":"cs.CL","submitted_at":"2026-05-16T13:22:57+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Fine-tuned transformers with multi-task learning recover substantial wording-derived signal for item difficulty at small sample sizes typical in applied testing.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.16035","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Who Owns This Agent? Tracing AI Agents Back to Their Owners","primary_cat":"cs.CR","submitted_at":"2026-05-15T15:10:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"A canary injection protocol for linking observed AI agent behavior to the responsible account at the hosting vendor, with robust variants for adversarial filtering.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07982","ref_index":17,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"GLiGuard: Schema-Conditioned Classification for LLM Safeguard","primary_cat":"cs.CL","submitted_at":"2026-05-08T16:44:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"GLiGuard is a compact schema-conditioned bidirectional encoder that matches 7B-27B guard models on safety benchmarks while delivering up to 16x higher throughput and 17x lower latency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07622","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Is She Even Relevant? When BERT Ignores Explicit Gender Cues","primary_cat":"cs.CL","submitted_at":"2026-05-08T11:48:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A Dutch BERT model encodes gender linearly by epoch 20 but does not dynamically update its representations when explicit female cues contradict learned stereotypical associations in short sentence templates.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.07554","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"ProteinJEPA: Latent prediction complements protein language models","primary_cat":"cs.LG","submitted_at":"2026-05-08T10:30:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Masked-position MLM plus JEPA latent prediction outperforms MLM-only pretraining on 10-11 of 16 downstream tasks for 35M-150M protein models while JEPA alone fails.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.08254","ref_index":10,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"HyperTransport: Amortized Conditioning of T2I Generative Models","primary_cat":"cs.LG","submitted_at":"2026-05-07T19:38:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"HyperTransport amortizes activation steering for T2I models via a hypernetwork that predicts intervention parameters from CLIP embeddings, delivering 3600-7000x speedup and matching per-concept baselines on 167 unseen concepts.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00086","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"NorBERTo: A ModernBERT Model Trained for Portuguese with 331 Billion Tokens Corpus","primary_cat":"cs.CL","submitted_at":"2026-04-30T17:16:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"NorBERTo, a ModernBERT encoder trained on the largest open Portuguese corpus of 331B tokens, reports top encoder results on several PLUE and ASSIN 2 tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.26483","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Efficient Listwise Reranking with Compressed Document Representations","primary_cat":"cs.IR","submitted_at":"2026-04-29T09:48:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"RRK compresses documents to multi-token embeddings for efficient listwise reranking, enabling an 8B model to achieve 3x-18x speedups over smaller models with comparable or better effectiveness.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"By integrating compressed document rep- resentation with listwise reranking, our 8B parameter RRK reranker outpaces all other rerankers in speed while maintaining robust effectiveness. Captions indi- cate document max-length. RRK compresses a 512- token document into an 8-token compressed version. However, improving their efficiency is still an open chal- lenge [34] as LLM-based rerankers remain much less efficient than traditional cross-encoder rerankers [9, 35]. Recent works have explored more efficient listwise reranking. A key step, introduced by Gangi Reddy et al. [10],Zhuang et al.[35], is to reduce latency by about 50% by producing the full ranking in a single forward simply from the first-token logits."},{"citing_arxiv_id":"2604.23488","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Do Synthetic Trajectories Reflect Real Reward Hacking? A Systematic Study on Monitoring In-the-Wild Hacking in Code Generation","primary_cat":"cs.LG","submitted_at":"2026-04-26T01:26:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Synthetic reward hacking data does not capture natural hacking behaviors in code generation RL, causing monitors trained on it to generalize poorly compared to those trained on in-the-wild trajectories.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19921","ref_index":7,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Commonsense Knowledge with Negation: A Resource to Enhance Negation Understanding","primary_cat":"cs.CL","submitted_at":"2026-04-21T19:00:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Augmenting commonsense knowledge corpora with negation produces over 2M new triples that benefit LLM negation understanding when used for pre-training.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.18603","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Dual Triangle Attention: Effective Bidirectional Attention Without Positional Embeddings","primary_cat":"q-bio.QM","submitted_at":"2026-04-09T19:32:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Dual Triangle Attention achieves effective bidirectional attention with built-in positional inductive bias via dual triangular masks, outperforming standard bidirectional attention on position-sensitive tasks and showing strong masked language modeling results with or without positional embeddings.","context_count":1,"top_context_role":"method","top_context_polarity":"use_method","context_text":"For nucleotide modeling, where functional and structural dependencies can span large genomic distances, DTA's context extension capability may complement existing long-range approaches. Future work should focus on MLM-specific variants of position dropping, potentially enabling robust long-context extension in bidirectional settings without full long-context pretraining. Methods Data sources Argmax position probe.Synthetic sequences were generated by sampling integers uniformly from[0,v)wherev= 64is the vocabulary size. Labels were the 0-indexed position of the first occurrence of the maximum value. Sequence length was fixed atl= 64. Batches of 1,024 sequences were generated on-the-fly during training; evaluation used 16 batches of 1,024 sequences each. Natural language.We used FineWeb-Edu (45), a large-scale filtered web corpus designed for language model pretraining. Text was tokenized using a custom Byte-Pair Encoding (BPE) tokenizer (51) with a vocabulary of 4,096 tokens, chosen to reduce vocabulary size relative to standard tokenizers while preserving reasonable subword granularity. Training sequences were truncated or padded to 256 tokens. Validation and test sets were constructed by filtering documents with at least 1,024 tokens, then splitting the remaining documents into 1,000 documents each for validation and testing. Training data was streamed and filtered to exclude validation and test documents. Halleeet al.| arXiv | April 22, 2026 | 5-12 Fig. 5.DroPE recovery analysis. (a) NLP extended-context validation loss, accuracy, MCC, and F1 before and after dropping positional embeddings at 70% of training. (b) Protein extended-context validation loss, accuracy, MCC, and F1. The vertical dashed line marks the drop point. Shaded regions represent±1 standard deviation across three seeds. (c) NLP final test loss, accuracy, MCC, and F1 comparing RoPE (kept throughout) vs. RoPE-off (dropped at 70%). (d) Protein final test loss, accuracy, MCC, and F1. Signi"},{"citing_arxiv_id":"2604.07985","ref_index":41,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Rag Performance Prediction for Question Answering","primary_cat":"cs.CL","submitted_at":"2026-04-09T08:55:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A novel supervised predictor modeling semantic relationships among question, retrieved passages, and generated answer best forecasts when RAG improves QA performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.06193","ref_index":33,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Depression Detection at the Point of Care: Automated Analysis of Linguistic Signals from Routine Primary Care Encounters","primary_cat":"cs.CL","submitted_at":"2026-03-11T21:08:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Zero-shot GPT-OSS detects depression from 1,108 primary care encounter transcripts with AUPRC 0.51 and AUROC 0.77, with meaningful signals in the first 128 patient tokens and added value from dyadic mirroring.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.11108","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Explanation Bias is a Product: Revealing the Hidden Lexical and Position Preferences in Post-Hoc Feature Attribution","primary_cat":"cs.CL","submitted_at":"2025-12-11T20:48:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Explanation biases in feature attribution methods are systematic products of lexical and positional preferences, with observed trade-offs across models and higher bias in anomalous explanations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2509.00798","ref_index":45,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Progressive Multimodal Search and Reasoning for Knowledge-Intensive Visual Question Answering","primary_cat":"cs.CV","submitted_at":"2025-08-31T11:14:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PMSR progressively constructs structured reasoning trajectories with dual-scope queries and compositional reasoning to improve knowledge acquisition and answer accuracy in knowledge-intensive VQA.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.20993","ref_index":42,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Annotation-Assisted Learning of Treatment Policies From Multimodal Electronic Health Records","primary_cat":"cs.LG","submitted_at":"2025-07-28T16:52:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AACE is an annotation-assisted method for causal policy learning from multimodal EHRs that outperforms risk-based and representation-based baselines on synthetic, semi-synthetic, and real datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.00994","ref_index":43,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Should We Still Pretrain Encoders with Masked Language Modeling?","primary_cat":"cs.CL","submitted_at":"2025-07-01T17:45:48+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Controlled ablations of 38 models find MLM superior to CLM on representation benchmarks while CLM offers better data efficiency and stability; a biphasic CLM-then-MLM schedule is optimal under fixed compute and improves when initialized from pretrained CLM models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2505.20414","ref_index":51,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"RetroMotion: Retrocausal Motion Forecasting Models are Instructable","primary_cat":"cs.CV","submitted_at":"2025-05-26T18:05:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Retrocausal transformer decomposes multi-agent motion forecasts into marginals and pairwise joints, models uncertainty with compressed exponentials, achieves strong Waymo results, generalizes to Argoverse 2 and V2X-Seq, and enables implicit instruction following from standard training.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2404.13004","ref_index":51,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"A Unified Framework for Modeling Heterogeneous Financial Data via Dual-Granularity Prompting","primary_cat":"cs.CE","submitted_at":"2024-04-19T17:01:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"FinLangNet applies dual-granularity prompting in a sequential model to heterogeneous financial data, reporting 6.3 pp KS improvement and 9.9% bad debt reduction in real-world deployment.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}