{"total":20,"items":[{"citing_arxiv_id":"2606.24689","ref_index":19,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Automated Summarization of Software Documents: An LLM-based Multi-Agent Approach","primary_cat":"cs.SE","submitted_at":"2026-06-23T15:18:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Metagente is an LLM multi-agent system using Teacher-Student collaboration that outperforms baselines on real-world software documentation summarization for requirements analysis and technical docs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.04967","ref_index":13,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"From Prompt to Process: a Process Taxonomy and Comparative Assessment of Frameworks Supporting AI Software Development Agents","primary_cat":"cs.SE","submitted_at":"2026-06-03T14:49:15+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A new six-dimension process taxonomy for AI software development frameworks shows convergence on artifact persistence and human oversight but reveals that no framework covers all dimensions strongly, indicating a depth-portability trade-off.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02282","ref_index":1,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"POIROT: Interrogating Agents for Failure Detection in Multi-Agent Systems","primary_cat":"cs.AI","submitted_at":"2026-06-01T14:05:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"POIROT protocol repurposes agents in LLM multi-agent systems as an internal diagnostic layer for failure detection, outperforming single-LLM evaluators with gains that increase with complexity, agent count, and fault types.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00804","ref_index":4,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Dynamic Coordination Strategy Selection for Enterprise Multi-Agent Systems","primary_cat":"cs.MA","submitted_at":"2026-05-30T16:43:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Large-scale experiment with 1440 task executions finds dynamic routing of coordination strategies achieves near-best quality scores across models and classes but does not reliably identify exact winners.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00097","ref_index":23,"ref_count":2,"confidence":0.88,"is_internal_anchor":false,"paper_title":"RocketSmith: Agentic Additive Manufacturing of High-Powered Rockets","primary_cat":"cs.RO","submitted_at":"2026-05-25T14:37:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"RocketSmith is an LLM-based agentic system that designs four high-powered rockets via additive manufacturing, with two achieving stable launches and recovery after reaching 80% of simulated apogee.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.24453","ref_index":7,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Code2UML: Agentic LLMs with context engineering for scalable software visualization","primary_cat":"cs.SE","submitted_at":"2026-05-23T08:01:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Agentic architecture with context engineering enables scalable UML diagram generation from source code across multiple languages and diagram types.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20548","ref_index":25,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"What Do Agents Communicate? Characterizing Information Exchange in Multi-Agent Systems","primary_cat":"cs.MA","submitted_at":"2026-05-19T22:51:52+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Systematic study of inter-agent communication in LLM multi-agent systems shows reasoning and verification are critical for performance, with a new augmentation technique recovering 86.2% of failures.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.18153","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Three Heads Are Better Than One: A Multi-perspective Reasoning Framework for Enhanced Vulnerability Detection","primary_cat":"cs.SE","submitted_at":"2026-05-18T09:57:22+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ReasonVul deploys three LLM agents with independent analysis and structured debate to achieve 40% PairAcc and 72.52% F1 on PrimeVul, outperforming baselines by 81% in PairAcc.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.06279","ref_index":16,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Correct Code, Vulnerable Dependencies: A Large Scale Measurement Study of LLM-Specified Library Versions","primary_cat":"cs.SE","submitted_at":"2026-05-07T13:52:59+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"LLMs frequently specify library versions with known CVEs in generated code (36-56% of tasks), show low compatibility (20-63%), and converge on the same risky versions across models.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Publication rights licensed to ACM. ACM XXXX-XXXX/2026/5-ART https://doi.org/xxxx/xxxx , Vol. 1, No. 1, Article . Publication date: May 2026. arXiv:2605.06279v1 [cs.SE] 7 May 2026 2 Chengjie Wang, Jingzheng Wu, Xiang Ling, Tianyue Luo, and Chen Zhao assistance has moved from an optional productivity tool to an expected part of the developer workflow [16, 73], and with that shift comes new, largely unaudited risks in the code being produced. The development of modern software projects depends on third-party libraries (TPLs). Studies of large open-source ecosystems document pervasive TPL adoption, with the transitive dependency graph of a typical application spanning dozens to hundreds of packages [ 48, 54]."},{"citing_arxiv_id":"2605.04532","ref_index":18,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Accountable Agents in Software Engineering: An Analysis of Terms of Service and a Research Roadmap","primary_cat":"cs.SE","submitted_at":"2026-05-06T06:18:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Comparative review of AI coding tool ToS shows responsibility for code quality and compliance shifted to users, with policy misalignment for autonomous agents, plus a research roadmap.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"ity, but also with new needs for coordination and review [ 5, 27]. More recent work links these workflow changes to concrete devel- opment outcomes, including changes in the effects of efficiency, comprehension, and long-term maintenance [6, 26, 29, 32, 38]. In parallel, research has shifted towardagenticsystems that plan and execute multi-step changes with less direct supervision [18, 35]. These systems are typically evaluated on end-to-end software en- gineering tasks (e.g., issue resolution) rather than isolated com- pletions [36, 37]. In addition to academic prototypes, practitioner- facing systems and announcements illustrate how quickly this space is moving [12]. arXiv:2605.04532v1 [cs.SE] 6 May 2026 Treude 2.2 Quality, security, and legal risk"},{"citing_arxiv_id":"2604.26590","ref_index":16,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Recommendations for Efficient and Responsible LLM Adoption within Industrial Software Development","primary_cat":"cs.SE","submitted_at":"2026-04-29T12:15:31+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A multi-case study plus survey produces seven actionable recommendations for efficient and responsible LLM use in industrial software engineering.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.20261","ref_index":33,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Memory-Augmented LLM-based Multi-Agent System for Automated Feature Generation on Tabular Data","primary_cat":"cs.AI","submitted_at":"2026-04-22T07:09:30+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"MALMAS is a memory-augmented multi-agent LLM system that generates diverse, high-quality features for tabular data via agent decomposition, routing, and iterative memory-guided refinement.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.17699","ref_index":33,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"SelfHeal: Empirical Fix Pattern Analysis and Bug Repair in LLM Agents","primary_cat":"cs.SE","submitted_at":"2026-04-20T01:28:15+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SelfHeal uses two ReAct agents and empirical fix patterns to repair bugs in LLM agents, outperforming baselines on a new 37-instance benchmark.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[ 76] demonstrated that providing LLMs with a custom interface via SWE-agent leads to state-of-the-art results in autonomous code repair on the SWE-bench dataset. Zhanget al.[ 34] proposed MLE-Agent, an autonomous assistant that stream- lines ML development through a combination of research tools and automated debugging. Beyond single-agent systems, Heet al.[ 33] examined the role of multi-agent LLM systems in software engineer- ing and outlined a path toward scalable, autonomous development. Terragniet al.[ 63] explored the future of human-AI synergy in cod- ing while highlighting the technical challenges ahead. Furthermore, Lianget al.[ 49] presented the RepoCod benchmark to evaluate Python generation, demonstrating that current models often fail"},{"citing_arxiv_id":"2604.13103","ref_index":23,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Fairness in Multi-Agent Systems for Software Engineering: An SDLC-Oriented Rapid Review","primary_cat":"cs.SE","submitted_at":"2026-04-10T13:49:49+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":2.0,"formal_verification":"none","one_line_summary":"A rapid review of fairness in LLM-enabled multi-agent systems for the software development lifecycle concludes that the field lacks standardized evaluations, broad coverage, and effective governance, leaving it unprepared for deployable fair systems.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"Corey Yang-Smith, Ronnie de Souza Santos, and Ahmad Abdellatif much a single model can support development and the resulting changes in reliability, maintainability, and control [28]. Recent research increasingly adopts multi-agent architectures that emulate software teams by assigning roles or coordinating agent pipelines [33]. Systems such as ChatDev [46] and MetaGPT [23] implement waterfall-style processes while AgileCoder [ 40] emphasizes iterative collaboration. Other work targets specific de- velopment activities such as IaC generation [30] and unit test syn- thesis and evaluation [58]. These MAS-based approaches align with AI-native \"Software 3.0\" development [20, 21], but there remains a gap in understanding fairness, bias, and equitable treatment in"},{"citing_arxiv_id":"2604.07192","ref_index":2,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Compact Constraint Encoding for LLM Code Generation: An Empirical Study of Token Economics and Constraint Compliance","primary_cat":"cs.SE","submitted_at":"2026-04-08T15:18:33+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Compact constraint headers reduce prompt tokens by 25-30% with no significant change in constraint compliance rates across tested models and tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.05289","ref_index":15,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"FLARE: Agentic Coverage-Guided Fuzzing for LLM-Based Multi-Agent Systems","primary_cat":"cs.SE","submitted_at":"2026-04-07T00:47:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"FLARE extracts specifications from multi-agent LLM code and applies coverage-guided fuzzing to achieve 96.9% inter-agent and 91.1% intra-agent coverage while uncovering 56 new failures across 16 applications.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"2024. AutoGen Multi-Agent Workflow: AI-powered YouTube Shorts Creation. https://github.com/gswithjeff/ autogen-multi-agent-workflow. Accessed: 2025-12-22. [14] Jeff (gswithjeff). 2025. autogen-multi-agent-workflow: AI-powered YouTube Shorts creation with AutoGen 0.4. https://github.com/gswithjeff/autogen-multi-agent-workflow. Accessed: 2025-08-06. [15] Junda He, Christoph Treude, and David Lo. 2025. LLM-Based Multi-Agent Systems for Software Engineering: Literature Review, Vision, and the Road Ahead.ACM Trans. Softw. Eng. Methodol.34, 5 (2025), 124:1-124:30. doi:10.1145/3712003 [16] Sirui Hong, Mingchen Zhuge, Jonathan Chen, Xiawu Zheng, Yuheng Cheng, Jinlin Wang, Ceyao Zhang, Zili Wang, Steven Ka Shing Yau, Zijuan Lin, Liyang Zhou, Chenyu Ran, Lingfeng Xiao, Chenglin Wu, and Jürgen Schmidhuber."},{"citing_arxiv_id":"2603.18916","ref_index":3,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Agentic Business Process Management: A Research Manifesto","primary_cat":"cs.AI","submitted_at":"2026-03-19T13:52:17+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Agentic Business Process Management reframes BPM around autonomous agents that must exhibit framed autonomy, explainability, conversational actionability, and self-modification to keep their actions aligned with organizational objectives.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.11109","ref_index":20,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"Vision-as-Inverse-Graphics Agent via Interleaved Multimodal Reasoning","primary_cat":"cs.CV","submitted_at":"2026-01-16T09:11:55+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"VIGA introduces a training-free interleaved multimodal reasoning loop that improves vision-as-inverse-graphics accuracy over one-shot baselines on BlenderGym, SlideBench, and new BlenderBench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.24428","ref_index":17,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"CodeWiki: Evaluating AI's Ability to Generate Holistic Documentation for Large-Scale Codebases","primary_cat":"cs.SE","submitted_at":"2025-10-28T13:52:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CodeWiki presents a unified framework for repository-level documentation across seven languages using hierarchical decomposition, recursive multi-agent processing, and multi-modal synthesis, outperforming DeepWiki by 4.73% on CodeWikiBench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2507.15003","ref_index":20,"ref_count":1,"confidence":0.88,"is_internal_anchor":false,"paper_title":"The Rise of AI Teammates in Software Engineering (SE) 3.0: How Autonomous Coding Agents Are Reshaping Software Engineering","primary_cat":"cs.SE","submitted_at":"2025-07-20T15:15:58+00:00","verdict":"CONDITIONAL","verdict_confidence":"UNKNOWN","novelty_score":8.0,"formal_verification":"none","one_line_summary":"AIDev is a new open dataset of 456k AI-agent pull requests showing agents submit code faster than humans but with lower acceptance rates and simpler changes.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}