{"total":16,"items":[{"citing_arxiv_id":"2606.28279","ref_index":4,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Agentic Hardware Design as Repository-Level Code Evolution","primary_cat":"cs.AR","submitted_at":"2026-06-26T17:21:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"HORIZON applies repository-level self-evolution to hardware design artifacts and reports 100% completion on ChipBench, RTLLM, Verilog-Eval, and nine CVDP categories using a hands-free agent loop.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.27350","ref_index":83,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CHIA: An open-source framework for principled, agentic AI-driven hardware/software co-design research","primary_cat":"cs.AR","submitted_at":"2026-06-25T17:55:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CHIA introduces a framework for building and deploying agentic AI co-design flows as CHIA loops with tool nodes, reliability mechanisms, and five case-study demonstrations.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.19387","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Interpretable and Verifiable Hardware Generation with LLM-Driven Stepwise Refinement","primary_cat":"cs.SE","submitted_at":"2026-06-16T01:28:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Framework uses LLM-driven stepwise application of transformation rules to generate verifiable RTL hardware designs from specifications.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.12983","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Structured Testbench Generation for LLM-Driven HDL Design and Verification-Oriented Data Curation","primary_cat":"cs.AI","submitted_at":"2026-06-11T07:19:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"STG generates deterministic testbenches 720x faster than iterative LLM flows with higher coverage and fewer false passes, while serving as an 11x faster data curation engine with 127x less energy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.08976","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"RTL-BenchLS: A Large-Scale Benchmark for RTL Reasoning and Generation with Large Language Models","primary_cat":"cs.AI","submitted_at":"2026-06-08T03:21:33+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"RTL-BenchLS supplies a large-scale formally verified benchmark and three novel tasks that expose low performance of frontier LLMs on realistic RTL reasoning and generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.05680","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CASS-RTL: Correctness-Aware Subspace Steering for RTL Generation with LLMs","primary_cat":"cs.PL","submitted_at":"2026-06-04T04:02:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CASS-RTL identifies correctness-linked attention heads, builds a steering subspace from them, and applies a geometry-aware intervention that raises pass@1/5/10 accuracy 10-20% on VerilogEval and 5% on CVDP across multiple LLMs without retraining or extra labels.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27472","ref_index":17,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"AssertLLM2: A Comprehensive LLM Benchmark for Assertion Generation from Design Specifications","primary_cat":"cs.AR","submitted_at":"2026-05-26T10:49:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"AssertLLM2 introduces a benchmark of 83 designs supporting bug-prevention and bug-hunting assertion generation tasks with evaluation across syntactic, formal, coverage, and mutation-based metrics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.15537","ref_index":15,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"RTL-BenchMT: Dynamic Maintenance of RTL Generation Benchmark Through Agent-Assisted Analysis and Revision","primary_cat":"cs.AI","submitted_at":"2026-05-15T02:17:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RTL-BenchMT is an agent-assisted framework for dynamically maintaining RTL generation benchmarks by fixing flaws and reducing overfitting in LLM-based EDA applications.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.12857","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"ChipMATE: Multi-Agent Training via Reinforcement Learning for Enhanced RTL Generation","primary_cat":"cs.MA","submitted_at":"2026-05-13T01:04:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A self-trained multi-agent RL framework pairs Verilog and Python agents for oracle-free mutual verification in RTL generation and reports 75.0% / 80.1% pass@1 on VerilogEval V2 using 4B / 9B models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27780","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"RuC: HDL-Agnostic Rule Completion Benchmark Generation","primary_cat":"cs.AR","submitted_at":"2026-04-30T12:19:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RuC generates language-agnostic, grammar-based benchmarks for evaluating LLMs on RTL code completion at controllable granularities, demonstrated on SystemVerilog designs from Tiny Tapeout and a RISC-V core where Fill-in-the-Middle prompting performed best.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27238","ref_index":16,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SafeTune: Mitigating Data Poisoning in LLM Fine-Tuning for RTL Code Generation","primary_cat":"cs.CR","submitted_at":"2026-04-29T22:26:00+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SafeTune uses GNN-based structural anomaly detection and semantic prompt classification to filter poisoned data in LLM fine-tuning for RTL generation, enhancing robustness against hardware Trojan insertion without altering the base model.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.19856","ref_index":2,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"ChipCraftBrain: Validation-First RTL Generation via Multi-Agent Orchestration","primary_cat":"cs.AR","submitted_at":"2026-04-21T17:20:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"ChipCraftBrain achieves 97.2% pass rate on VerilogEval and 94.7% on CVDP benchmarks for generating functional RTL code using adaptive multi-agent orchestration and hybrid reasoning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15657","ref_index":27,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Understanding Inference-Time Token Allocation and Coverage Limits in Agentic Hardware Verification","primary_cat":"cs.AR","submitted_at":"2026-04-17T03:15:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Domain-specialized LLM agents for hardware verification close 95-99% coverage using 4-13x fewer tokens and 2-4x faster convergence than general-purpose agents by reallocating tokens toward coverage-directed reasoning.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15606","ref_index":10,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Spec2Cov: An Agentic Framework for Code Coverage Closure of Digital Hardware Designs","primary_cat":"cs.AR","submitted_at":"2026-04-17T01:08:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Spec2Cov uses an LLM agent in a feedback loop with a hardware simulator to generate tests from specs, achieving 100% coverage on simple designs and up to 49% on complex ones across 26 benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14989","ref_index":29,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Dr. RTL: Autonomous Agentic RTL Optimization through Tool-Grounded Self-Improvement","primary_cat":"cs.AI","submitted_at":"2026-04-16T13:18:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Dr. RTL's multi-agent framework with group-relative skill learning achieves 21% WNS and 17% TNS timing improvements plus 6% area reduction on 20 real-world RTL designs over commercial synthesis tools.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.14709","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"HWE-Bench: Benchmarking LLM Agents on Real-World Hardware Bug Repair Tasks","primary_cat":"cs.AI","submitted_at":"2026-04-16T07:19:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":8.0,"formal_verification":"none","one_line_summary":"HWE-Bench is the first repository-level benchmark for LLM agents on real hardware bug repair, where the best agent fixes 70.7% of 417 tasks but drops below 65% on complex SoC projects.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}