{"total":15,"items":[{"citing_arxiv_id":"2607.01468","ref_index":9,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CADENZA in Action: Breaking the Monolith with Intent-Dependent Plan Spaces for Semantic Queries","primary_cat":"cs.DB","submitted_at":"2026-07-01T21:00:40+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CADENZA demonstrates an optimizer that breaks semantic query intents into alternative plans, selects implementations per step, and optimizes under user preferences via a web interface.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2607.00254","ref_index":21,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Query-Centric Optimization of AI Workflows via Approximate Query Processing and Proxy Models","primary_cat":"cs.DB","submitted_at":"2026-06-30T23:05:59+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Query-centric AQP and proxy-model strategies reduce expensive model calls by 60-90% with under 10% error on TPC-DS and LLM tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.31808","ref_index":49,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Large Databases Need Small, Open-Weight Language Models","primary_cat":"cs.AI","submitted_at":"2026-06-30T15:25:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"Quantized open-weight LMs on consumer hardware match closed-source API accuracy for LM-enhanced relational operators while delivering 390x lower cost and 3.8x lower latency in the BlendSQL framework.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.29151","ref_index":26,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CADENZA: Compiling Natural-Language Intent into Task-Specific Operator DAGs for Semantic Query Processing","primary_cat":"cs.DB","submitted_at":"2026-06-28T02:13:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CADENZA introduces TxRA and dual planners to compile semantic operator intents into optimized task DAGs, claiming large gains in quality, latency, and cost on SemBench.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.28365","ref_index":37,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"CAMI: Cost-Aware Agent-Guided Multi-Indexing for Semantic Retrieval","primary_cat":"cs.IR","submitted_at":"2026-06-14T16:59:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"CAMI frames multi-index construction for semantic retrieval as a budgeted multi-objective portfolio problem and uses agent-guided search plus confidence-aware pruning to find high-recall configurations with reduced evaluation cost.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00774","ref_index":26,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SCOPE: Cost-Efficient Model Selection for Compound AI Systems under Quality Constraints","primary_cat":"cs.DB","submitted_at":"2026-05-30T15:33:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SCOPE is a new optimization method that uses per-query estimates and confidence bounds to select cost-efficient LLM combinations for compound AI systems under quality constraints, with claimed theoretical guarantees and up to 20x lower search cost than baselines on data tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.23477","ref_index":40,"ref_count":2,"confidence":0.9,"is_internal_anchor":false,"paper_title":"SEMA-SQL: Beyond Traditional Relational Querying with Large Language Models","primary_cat":"cs.DB","submitted_at":"2026-04-26T00:05:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SEMA-SQL automates natural language to efficient hybrid queries combining relational algebra with LLM semantic operations via a new Hybrid Relational Algebra abstraction.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15233","ref_index":14,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Blue Data Intelligence Layer: Streaming Data and Agents for Multi-source Multi-modal Data-Centric Applications","primary_cat":"cs.AI","submitted_at":"2026-04-16T17:10:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Blue DIL is a new architecture that unifies structured enterprise data, LLM world knowledge, and personal context through declarative query plans and agents for multi-source multi-modal applications.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.15184","ref_index":28,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Agent-Aided Design for Dynamic CAD Models","primary_cat":"cs.AI","submitted_at":"2026-04-16T16:15:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AADvark extends agent-aided CAD design to dynamic 3D assemblies with movable parts by integrating constraint solvers and visual feedback to create a verification signal for the agent.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"[26] Danila Rukhovich, Elona Dupont, Dimitrios Mallis, Kseniya Cherenkova, Anis Kacem, and Djamila Aouada. 2025. CAD-Recode: Reverse Engineering CAD Code from Point Clouds. arXiv:2412.14042 [cs.CV] https://arxiv.org/abs/2412.14042 [27] Matthew Russo and Tim Kraska. 2025. Deep Research is the New An- alytics System: Towards Building the Runtime for AI-Driven Analytics. arXiv:2509.02751 [cs.AI] https://arxiv.org/abs/2509.02751 [28] Matthew Russo, Sivaprasad Sudhir, Gerardo Vitagliano, Chunwei Liu, Tim Kraska, Samuel Madden, and Michael Cafarella. 2025. Abacus: A Cost-Based Optimizer for Semantic Operator Systems. arXiv:2505.14661 [cs.DB] https://arxiv.org/abs/ 2505.14661 [29] Shreya Shankar, Tristan Chambers, Tarak Shah, Aditya G. Parameswaran, and Eugene Wu. 2025. DocETL: Agentic Query Rewriting and Evaluation for Complex"},{"citing_arxiv_id":"2604.09944","ref_index":27,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"PLOP: Cost-Based Placement of Semantic Operators in Hybrid Query Plans","primary_cat":"cs.DB","submitted_at":"2026-04-10T22:51:41+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PLOP is a cost-based optimizer that finds optimal placements for semantic LLM operators in hybrid query plans via dynamic programming, delivering up to 1.5x speedup and 4.29x cost reduction on 44 benchmark queries while preserving accuracy.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.02655","ref_index":50,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Semantic Data Processing with Holistic Data Understanding","primary_cat":"cs.DB","submitted_at":"2026-04-03T02:37:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HoldUp uses LLM-guided clustering to provide holistic dataset context for semantic operators, yielding up to 33% higher classification accuracy and 30% higher scoring accuracy than row-by-row LLM processing across 15 datasets.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"B (f) MedrxivS2S |Si| = 40 |Si| = 80 |Si| = 160 Figure 11: Effect of𝑚and|𝑆 𝑖 |. calibration and assessment. InProceedings of the 36th international ACM SIGIR conference on Research and development in information retrieval. 623-632. [49] Norbert Schwarz. 1999. Self-reports: How the questions shape the answers. American psychologist54, 2 (1999), 93. [50] Norbert Schwarz and Seymour Sudman. 2012.Context effects in social and psychological research. Springer Science & Business Media. [51] Shreya Shankar, Tristan Chambers, Tarak Shah, Aditya G Parameswaran, and Eugene Wu. 2024. DocETL: Agentic Query Rewriting and Evaluation for Complex Document Processing.arXiv preprint arXiv:2410.12189(2024). [52] Shreya Shankar, Sepanta Zeighami, and Aditya Parameswaran."},{"citing_arxiv_id":"2603.15970","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"100x Cost & Latency Reduction: Performance Analysis of AI Query Approximation using Lightweight Proxy Models","primary_cat":"cs.DB","submitted_at":"2026-03-16T22:42:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Lightweight proxy models deliver over 100x cost and latency savings for semantic AI queries in databases with accuracy preserved or improved on benchmarks up to 10M rows.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2603.02537","ref_index":46,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Large Language Model-Enhanced Relational Operators: Taxonomy, Benchmark, and Analysis","primary_cat":"cs.DB","submitted_at":"2026-03-03T02:51:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"The authors define a taxonomy for LLM-enhanced relational operators categorized into Select, Match, Impute, Cluster and Order, and release LROBench to evaluate single and multi-operator queries on semantic database processing.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2602.21480","ref_index":44,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Both Ends Count! Just How Good are LLM Agents at \"Text-to-Big SQL\"?","primary_cat":"cs.DB","submitted_at":"2026-02-25T01:12:35+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"New Text-to-Big SQL metrics show that LLM agents must balance accuracy with cost and speed at scale, where GPT-4o trades some accuracy for up to 12x speedup and GPT-5.2 proves more cost-effective than Gemini 3 Pro on large inputs.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2508.05012","ref_index":10,"ref_count":1,"confidence":0.9,"is_internal_anchor":false,"paper_title":"Making Prompts First-Class Citizens for Adaptive LLM Pipelines","primary_cat":"cs.DB","submitted_at":"2025-08-07T03:49:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SPEAR proposes structured prompt views, runtime adaptive refinement, and policy rules to make prompts first-class, versioned, and evolvable components in complex LLM applications.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}