{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:NPY6YP34APZ5MI354ZUMCQLRI7","short_pith_number":"pith:NPY6YP34","canonical_record":{"source":{"id":"2605.11518","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T04:42:35Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"b826a7e59a678a96e962a656cf72aa557db0f8b51b7936393f02a44729e3955f","abstract_canon_sha256":"7d9902c2e5ce6bdcad9011a826d0381e46c519a8a068eda9c02f43d935297960"},"schema_version":"1.0"},"canonical_sha256":"6bf1ec3f7c03f3d6237de668c1417147f4984f5eb2c54e186dcd8fb39d7e817a","source":{"kind":"arxiv","id":"2605.11518","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.11518","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"arxiv_version","alias_value":"2605.11518v2","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.11518","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"pith_short_12","alias_value":"NPY6YP34APZ5","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"pith_short_16","alias_value":"NPY6YP34APZ5MI35","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"pith_short_8","alias_value":"NPY6YP34","created_at":"2026-05-20T00:03:17Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:NPY6YP34APZ5MI354ZUMCQLRI7","target":"record","payload":{"canonical_record":{"source":{"id":"2605.11518","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T04:42:35Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"b826a7e59a678a96e962a656cf72aa557db0f8b51b7936393f02a44729e3955f","abstract_canon_sha256":"7d9902c2e5ce6bdcad9011a826d0381e46c519a8a068eda9c02f43d935297960"},"schema_version":"1.0"},"canonical_sha256":"6bf1ec3f7c03f3d6237de668c1417147f4984f5eb2c54e186dcd8fb39d7e817a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:17.857090Z","signature_b64":"wjhtVj2FRm7ac8/WbUw4C5OaSR37CgO+KzuUD5yDGXFt85yFxDa1oDuorE7bYkHpTzf9i7ox98jbg4BubLJJAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6bf1ec3f7c03f3d6237de668c1417147f4984f5eb2c54e186dcd8fb39d7e817a","last_reissued_at":"2026-05-20T00:03:17.856081Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:17.856081Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.11518","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"efuJrMnLOsaIdlZxYcGLADBXqlEl3LPPdG10Y8948ks840BkVLsY3jRxRfDRD2nEZsh4Wqn3J0j0An3ffPf0CA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T06:16:16.650180Z"},"content_sha256":"0879397bfc3e222329f1b0ae3465ed1c29c33d1636315a4f1de4c4552c76203b","schema_version":"1.0","event_id":"sha256:0879397bfc3e222329f1b0ae3465ed1c29c33d1636315a4f1de4c4552c76203b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:NPY6YP34APZ5MI354ZUMCQLRI7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"AutoLLMResearch: Training Research Agents for Automating LLM Experiment Configuration - Learning from Cheap, Optimizing Expensive","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"AutoLLMResearch trains agents to learn LLM configuration principles from cheap low-fidelity experiments and extrapolate them to expensive high-fidelity settings.","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Nitesh V. Chawla, Olaf Wiest, Taicheng Guo, Xiangliang Zhang","submitted_at":"2026-05-12T04:42:35Z","abstract_excerpt":"Effectively configuring scalable large language model (LLM) experiments, spanning architecture design, hyperparameter tuning, and beyond, is crucial for advancing LLM research, as poor configuration choices can waste substantial computational resources and prevent models from realizing their full potential. Prior automated methods are designed for low-cost settings where repeated trial and error is feasible, but scalable LLM experiments are too expensive for such extensive iteration. To our knowledge, no work has addressed the automation of high-cost LLM experiment configurations, leaving this"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Extensive evaluation against diverse strong baselines on held-out experiments demonstrates the effectiveness, generalization, and interpretability of our framework, supporting its potential as a practical and general solution for scalable real-world LLM experiment automation.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The multi-fidelity experimental environment captures the structure of the LLM configuration landscape in a way that permits reliable cross-fidelity extrapolation from cheap to expensive settings.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"AutoLLMResearch trains agents via a multi-fidelity environment and MDP pipeline to extrapolate configuration principles from inexpensive to costly LLM experiments.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"AutoLLMResearch trains agents to learn LLM configuration principles from cheap low-fidelity experiments and extrapolate them to expensive high-fidelity settings.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"8b2de54329a0b26111eaa4646277f87e41a8658a221996bf090522a75541c6cd"},"source":{"id":"2605.11518","kind":"arxiv","version":2},"verdict":{"id":"05ef3df2-cac9-445d-972f-4ccb3b7dd3b2","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-13T01:12:48.706472Z","strongest_claim":"Extensive evaluation against diverse strong baselines on held-out experiments demonstrates the effectiveness, generalization, and interpretability of our framework, supporting its potential as a practical and general solution for scalable real-world LLM experiment automation.","one_line_summary":"AutoLLMResearch trains agents via a multi-fidelity environment and MDP pipeline to extrapolate configuration principles from inexpensive to costly LLM experiments.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The multi-fidelity experimental environment captures the structure of the LLM configuration landscape in a way that permits reliable cross-fidelity extrapolation from cheap to expensive settings.","pith_extraction_headline":"AutoLLMResearch trains agents to learn LLM configuration principles from cheap low-fidelity experiments and extrapolate them to expensive high-fidelity settings."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.11518/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T12:34:40.519045Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T09:31:18.944757Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T08:20:51.825102Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"f51a54399b0d1343d257f70ad1ced85f154c090362badca21c66e2270142c1b8"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"05ef3df2-cac9-445d-972f-4ccb3b7dd3b2"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:17Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A3ZB+JRStB4ekq4N1OjUi7TidhPeCMQa6QACkPfjtJkktpci/P9+V0IcRlMLjIuY1ApOOV2l7SkCTu8tztGaBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T06:16:16.651211Z"},"content_sha256":"72479d568b96f3922467f5d2c32d9d11ce22c48e414bae701eafb85a93fa6355","schema_version":"1.0","event_id":"sha256:72479d568b96f3922467f5d2c32d9d11ce22c48e414bae701eafb85a93fa6355"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NPY6YP34APZ5MI354ZUMCQLRI7/bundle.json","state_url":"https://pith.science/pith/NPY6YP34APZ5MI354ZUMCQLRI7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NPY6YP34APZ5MI354ZUMCQLRI7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T06:16:16Z","links":{"resolver":"https://pith.science/pith/NPY6YP34APZ5MI354ZUMCQLRI7","bundle":"https://pith.science/pith/NPY6YP34APZ5MI354ZUMCQLRI7/bundle.json","state":"https://pith.science/pith/NPY6YP34APZ5MI354ZUMCQLRI7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NPY6YP34APZ5MI354ZUMCQLRI7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:NPY6YP34APZ5MI354ZUMCQLRI7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7d9902c2e5ce6bdcad9011a826d0381e46c519a8a068eda9c02f43d935297960","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T04:42:35Z","title_canon_sha256":"b826a7e59a678a96e962a656cf72aa557db0f8b51b7936393f02a44729e3955f"},"schema_version":"1.0","source":{"id":"2605.11518","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.11518","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"arxiv_version","alias_value":"2605.11518v2","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.11518","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"pith_short_12","alias_value":"NPY6YP34APZ5","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"pith_short_16","alias_value":"NPY6YP34APZ5MI35","created_at":"2026-05-20T00:03:17Z"},{"alias_kind":"pith_short_8","alias_value":"NPY6YP34","created_at":"2026-05-20T00:03:17Z"}],"graph_snapshots":[{"event_id":"sha256:72479d568b96f3922467f5d2c32d9d11ce22c48e414bae701eafb85a93fa6355","target":"graph","created_at":"2026-05-20T00:03:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Extensive evaluation against diverse strong baselines on held-out experiments demonstrates the effectiveness, generalization, and interpretability of our framework, supporting its potential as a practical and general solution for scalable real-world LLM experiment automation."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The multi-fidelity experimental environment captures the structure of the LLM configuration landscape in a way that permits reliable cross-fidelity extrapolation from cheap to expensive settings."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"AutoLLMResearch trains agents via a multi-fidelity environment and MDP pipeline to extrapolate configuration principles from inexpensive to costly LLM experiments."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"AutoLLMResearch trains agents to learn LLM configuration principles from cheap low-fidelity experiments and extrapolate them to expensive high-fidelity settings."}],"snapshot_sha256":"8b2de54329a0b26111eaa4646277f87e41a8658a221996bf090522a75541c6cd"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T12:34:40.519045Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T09:31:18.944757Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T08:20:51.825102Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.11518/integrity.json","findings":[],"snapshot_sha256":"f51a54399b0d1343d257f70ad1ced85f154c090362badca21c66e2270142c1b8","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Effectively configuring scalable large language model (LLM) experiments, spanning architecture design, hyperparameter tuning, and beyond, is crucial for advancing LLM research, as poor configuration choices can waste substantial computational resources and prevent models from realizing their full potential. Prior automated methods are designed for low-cost settings where repeated trial and error is feasible, but scalable LLM experiments are too expensive for such extensive iteration. To our knowledge, no work has addressed the automation of high-cost LLM experiment configurations, leaving this","authors_text":"Nitesh V. Chawla, Olaf Wiest, Taicheng Guo, Xiangliang Zhang","cross_cats":["cs.CL","cs.LG"],"headline":"AutoLLMResearch trains agents to learn LLM configuration principles from cheap low-fidelity experiments and extrapolate them to expensive high-fidelity settings.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T04:42:35Z","title":"AutoLLMResearch: Training Research Agents for Automating LLM Experiment Configuration - Learning from Cheap, Optimizing Expensive"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.11518","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-13T01:12:48.706472Z","id":"05ef3df2-cac9-445d-972f-4ccb3b7dd3b2","model_set":{"reader":"grok-4.3"},"one_line_summary":"AutoLLMResearch trains agents via a multi-fidelity environment and MDP pipeline to extrapolate configuration principles from inexpensive to costly LLM experiments.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"AutoLLMResearch trains agents to learn LLM configuration principles from cheap low-fidelity experiments and extrapolate them to expensive high-fidelity settings.","strongest_claim":"Extensive evaluation against diverse strong baselines on held-out experiments demonstrates the effectiveness, generalization, and interpretability of our framework, supporting its potential as a practical and general solution for scalable real-world LLM experiment automation.","weakest_assumption":"The multi-fidelity experimental environment captures the structure of the LLM configuration landscape in a way that permits reliable cross-fidelity extrapolation from cheap to expensive settings."}},"verdict_id":"05ef3df2-cac9-445d-972f-4ccb3b7dd3b2"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0879397bfc3e222329f1b0ae3465ed1c29c33d1636315a4f1de4c4552c76203b","target":"record","created_at":"2026-05-20T00:03:17Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7d9902c2e5ce6bdcad9011a826d0381e46c519a8a068eda9c02f43d935297960","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-12T04:42:35Z","title_canon_sha256":"b826a7e59a678a96e962a656cf72aa557db0f8b51b7936393f02a44729e3955f"},"schema_version":"1.0","source":{"id":"2605.11518","kind":"arxiv","version":2}},"canonical_sha256":"6bf1ec3f7c03f3d6237de668c1417147f4984f5eb2c54e186dcd8fb39d7e817a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6bf1ec3f7c03f3d6237de668c1417147f4984f5eb2c54e186dcd8fb39d7e817a","first_computed_at":"2026-05-20T00:03:17.856081Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:17.856081Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wjhtVj2FRm7ac8/WbUw4C5OaSR37CgO+KzuUD5yDGXFt85yFxDa1oDuorE7bYkHpTzf9i7ox98jbg4BubLJJAg==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:17.857090Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.11518","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0879397bfc3e222329f1b0ae3465ed1c29c33d1636315a4f1de4c4552c76203b","sha256:72479d568b96f3922467f5d2c32d9d11ce22c48e414bae701eafb85a93fa6355"],"state_sha256":"20ff778ed859c412ad1b07f8bae4931dbcbd87827e4fe6370af881451062c4f3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"sBcdJY/vqPeQgCH/m6co7b7aVh1VANEDRr5UN7NA16ie0RLT5kee5iXI7z3jfkUxZbG5N0USumP9yRVzN28ADw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T06:16:16.655658Z","bundle_sha256":"87367fd5ade5cef9420a473a03676728671e7e634d2b3a38c68df9c944772acb"}}