{"total":13,"items":[{"citing_arxiv_id":"2606.06941","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Quantum-Inspired Trace-Augmented Evidence Selection for Reasoning over Structured Hypothesis Spaces","primary_cat":"cs.AI","submitted_at":"2026-06-05T06:12:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"EP-HUBO treats CoT evidence selection as higher-order unconstrained binary optimization over per-hypothesis pools with quality weights to improve aggregation on legal benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03544","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SAGE: A Quantitative Evaluation of Socialized Evolution in Agent Ecosystems","primary_cat":"cs.AI","submitted_at":"2026-06-02T12:08:38+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SAGE compares social co-evolution against matched self-evolution across three arenas and finds peer history enables breakthroughs only for agents that plateau under self-improvement, with abstraction of traces mattering more than raw volume.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03014","ref_index":169,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MOSAIC: Efficient Mixture-of-Agent Scheduling via Adaptive Aggregation and Inference Concurrency","primary_cat":"cs.LG","submitted_at":"2026-06-02T01:40:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MOSAIC uses an Integer Linear Program scheduler for expert placement and prompt assignment plus adaptive aggregation to achieve 1.7-2.3x end-to-end speedup on 4-GPU MoA workloads while keeping accuracy within 0.1pp.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02646","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"The Ringelmann Effect in Multi-Agent LLM Systems: A Scaling Law for Effective Team Size","primary_cat":"physics.soc-ph","submitted_at":"2026-05-31T16:19:54+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A derived scaling law R(N) = 1/(1 + c(N-1)N^{-β}) fits answer diversity and correctness across 44 LLM multi-agent conditions with R² > 0.99, classifying regimes by β and showing only heterogeneous teams escape hard-ceiling saturation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00405","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"From Talking Words to Sharing Thoughts: Scalable Multi-LLM Aggregation via Structured Message Passing","primary_cat":"cs.GT","submitted_at":"2026-05-29T22:47:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"UNKNOWN","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A bipartite factor graph with message-passing protocol and asymmetric damping aggregates multi-LLM predictions, cutting token use by 97% and API calls by 6X while outperforming baselines on MMLU, MMLU-Pro, GPQA, and MedMCQA.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19418","ref_index":23,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Conflict-Resilient Multi-Agent Reasoning via Signed Graph Modeling","primary_cat":"cs.AI","submitted_at":"2026-05-19T06:11:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SIGMA builds a signed relational graph among LLM agents and uses conflict-aware message passing plus weighted aggregation to produce more consistent predictions than prior cooperative-assumption baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09121","ref_index":39,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"A Communication-Theoretic Framework for LLM Agents: Cost-Aware Adaptive Reliability","primary_cat":"cs.LG","submitted_at":"2026-05-09T19:14:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"UNKNOWN","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLM reliability techniques are unified as communication channel operators, with a new cost-aware router achieving superior quality-cost tradeoffs on hard tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.21950","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Feedback Over Form: Why Execution Feedback Matters More Than Pipeline Topology in 1-3B Code Generation","primary_cat":"cs.SE","submitted_at":"2026-04-23T00:34:54+00:00","verdict":"ACCEPT","verdict_confidence":"MODERATE","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Execution feedback in refinement loops improves 1-3B code generation performance far more than complex pipeline topologies discovered via evolutionary search on HumanEval and sanitized MBPP.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19049","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Refute-or-Promote: An Adversarial Stage-Gated Multi-Agent Review Methodology for High-Precision LLM-Assisted Defect Discovery","primary_cat":"cs.CR","submitted_at":"2026-04-21T03:55:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Refute-or-Promote applies adversarial multi-agent review with kill gates and empirical verification to filter LLM defect candidates, killing 79-83% before disclosure and yielding 4 CVEs plus multiple accepted fixes across libraries, C++ standard, and compilers.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Song [11, 12] independently showed that fresh-context review outper- forms same-session review (p = 0 .008) and that addi- tional roundsdegradequality-directly supporting our stage-gated design. POPPER [13] is a Popperian falsifica- tion framework with formal statistical control. Mixture- of-Agents [36] aggregates heterogeneous outputs; Self- MoA [35] counters that same-model repeated sampling beats heterogeneous MoA by 6.6%. Our kill-mandate gating is structurally distinct from aggregation-style en- sembles. Kim et al. [30] empirically document that LLMs agree∼60% of the time when both err, with correlation increasingwith model capability-establishing the empir- ical basis for our cross-family (not merely cross-instance)"},{"citing_arxiv_id":"2602.19509","ref_index":5,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Pyramid MoA: A Probabilistic Framework for Cost-Optimized Anytime Inference","primary_cat":"cs.CL","submitted_at":"2026-02-23T04:47:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Pyramid MoA is a hierarchical Mixture-of-Agents system with a decision-theoretic router that achieves up to 42.9% compute savings while nearly matching oracle accuracy on MBPP, GSM8K, MMLU, HumanEval, and MATH.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.22579","ref_index":32,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SANet: A Semantic-aware Agentic AI Networking Framework for Cross-layer Optimization in 6G","primary_cat":"cs.AI","submitted_at":"2025-12-27T12:42:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SANet uses semantic-aware AI agents for cross-layer 6G optimization, achieving up to 14.61% performance gains with 44.37% of the FLOPs of prior methods via model partitioning and decentralized multi-objective algorithms.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.21159","ref_index":19,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"MAC: Masked Agent Collaboration Boosts Large Language Model Medical Decision-Making","primary_cat":"cs.AI","submitted_at":"2025-07-25T04:21:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"MAC framework selects Pareto-optimal LLM agents and masks low cross-consistency outputs for adaptive collaboration in medical decision-making.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.14200","ref_index":37,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"A Scalable Multi-LLM Collaboration System with Retrieval-based Selection and Exploration-Exploitation-Driven Enhancement","primary_cat":"cs.CL","submitted_at":"2025-07-14T16:17:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SMCS coordinates 15 open-source LLMs via retrieval-based prior selection and exploration-exploitation posterior enhancement, outperforming GPT-4.1 by 5.36% and GPT-o3-mini by 5.28% on eight benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}