{"work":{"id":"22aa3d2a-9edd-44c4-b8b8-1442ea805e01","openalex_id":null,"doi":null,"arxiv_id":"2502.13138","raw_key":null,"title":"AIDE: AI-Driven Exploration in the Space of Code","authors":null,"authors_text":"Zhengyao Jiang, Dominik Schmidt, Dhruv Srikanth, Dixing Xu, Ian Kaplan, Deniss Jacenko","year":2025,"venue":"cs.AI","abstract":"Machine learning, the foundation of modern artificial intelligence, has driven innovations that have fundamentally transformed the world. Yet, behind advancements lies a complex and often tedious process requiring labor and compute intensive iteration and experimentation. Engineers and scientists developing machine learning models spend much of their time on trial-and-error tasks instead of conceptualizing innovative solutions or research hypotheses. To address this challenge, we introduce AI-Driven Exploration (AIDE), a machine learning engineering agent powered by large language models (LLMs). AIDE frames machine learning engineering as a code optimization problem, and formulates trial-and-error as a tree search in the space of potential solutions. By strategically reusing and refining promising solutions, AIDE effectively trades computational resources for enhanced performance, achieving state-of-the-art results on multiple machine learning engineering benchmarks, including our Kaggle evaluations, OpenAI MLE-Bench and METRs RE-Bench.","external_url":"https://arxiv.org/abs/2502.13138","cited_by_count":null,"metadata_source":"pith","metadata_fetched_at":"2026-05-25T07:20:28.700154+00:00","pith_arxiv_id":"2502.13138","created_at":"2026-05-10T05:51:10.154030+00:00","updated_at":"2026-06-05T21:23:00.469572+00:00","title_quality_ok":true,"display_title":"AIDE: AI-Driven Exploration in the Space of Code","render_title":"AIDE: AI-Driven Exploration in the Space of Code"},"hub":{"state":{"work_id":"22aa3d2a-9edd-44c4-b8b8-1442ea805e01","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":29,"external_cited_by_count":null,"distinct_field_count":5,"first_pith_cited_at":"2025-09-08T10:08:36+00:00","last_pith_cited_at":"2026-05-20T16:41:51+00:00","author_build_status":"not_needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"not_needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-09T13:25:06.069498+00:00","tier_text":"hub"},"tier":"hub","role_counts":[{"context_role":"background","n":3},{"context_role":"baseline","n":1}],"polarity_counts":[{"context_polarity":"background","n":3},{"context_polarity":"baseline","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}