{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:5HRNFCI66BKHCCG5FPVYGXCAD3","short_pith_number":"pith:5HRNFCI6","schema_version":"1.0","canonical_sha256":"e9e2d2891ef0547108dd2beb835c401ef35b3a29141c2c4274fdbb837b0a792f","source":{"kind":"arxiv","id":"2605.25773","version":1},"attestation_state":"computed","paper":{"title":"Efficient Benchmarking Is Just Feature Selection and Multiple Regression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.LG"],"primary_cat":"stat.ML","authors_text":"Acyr Locatelli, Kris Cao, Sam Bowyer","submitted_at":"2026-05-25T12:23:31Z","abstract_excerpt":"Efficient benchmarking techniques aim to lower the computational cost of evaluating LLMs by predicting full benchmark scores using only a subset of a benchmark's questions. By reframing this problem as an instance of multiple regression with feature selection, we find that existing efficient benchmarking methods can be greatly improved by simply using kernel ridge regression at the prediction stage. Additionally, using an information-theoretic feature-selection algorithm called minimum redundancy maximum relevance (mRMR), we can further improve upon these methods by selecting question subsets "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.25773","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ML","submitted_at":"2026-05-25T12:23:31Z","cross_cats_sorted":["cs.AI","cs.CL","cs.LG"],"title_canon_sha256":"3e499a652f7d52b2c8d79d54cbf3175286431727139223651e80f790ecd67182","abstract_canon_sha256":"5fffd110a9b3fb52ce61e0c8c3d8c6d8b929ff61308a263512b5b61d56cefdff"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:04:54.127601Z","signature_b64":"WbeWvDXC0aik2w0b4hlHWA74/YmURxXQopmn9GFlulzX2vQwrYfY7yAwRX+SoLK/La4KKTXPH+PpMKMXQ4W1CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e9e2d2891ef0547108dd2beb835c401ef35b3a29141c2c4274fdbb837b0a792f","last_reissued_at":"2026-05-26T02:04:54.127156Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:04:54.127156Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Efficient Benchmarking Is Just Feature Selection and Multiple Regression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.LG"],"primary_cat":"stat.ML","authors_text":"Acyr Locatelli, Kris Cao, Sam Bowyer","submitted_at":"2026-05-25T12:23:31Z","abstract_excerpt":"Efficient benchmarking techniques aim to lower the computational cost of evaluating LLMs by predicting full benchmark scores using only a subset of a benchmark's questions. By reframing this problem as an instance of multiple regression with feature selection, we find that existing efficient benchmarking methods can be greatly improved by simply using kernel ridge regression at the prediction stage. Additionally, using an information-theoretic feature-selection algorithm called minimum redundancy maximum relevance (mRMR), we can further improve upon these methods by selecting question subsets "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.25773","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.25773/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.25773","created_at":"2026-05-26T02:04:54.127218+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.25773v1","created_at":"2026-05-26T02:04:54.127218+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.25773","created_at":"2026-05-26T02:04:54.127218+00:00"},{"alias_kind":"pith_short_12","alias_value":"5HRNFCI66BKH","created_at":"2026-05-26T02:04:54.127218+00:00"},{"alias_kind":"pith_short_16","alias_value":"5HRNFCI66BKHCCG5","created_at":"2026-05-26T02:04:54.127218+00:00"},{"alias_kind":"pith_short_8","alias_value":"5HRNFCI6","created_at":"2026-05-26T02:04:54.127218+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3","json":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3.json","graph_json":"https://pith.science/api/pith-number/5HRNFCI66BKHCCG5FPVYGXCAD3/graph.json","events_json":"https://pith.science/api/pith-number/5HRNFCI66BKHCCG5FPVYGXCAD3/events.json","paper":"https://pith.science/paper/5HRNFCI6"},"agent_actions":{"view_html":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3","download_json":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3.json","view_paper":"https://pith.science/paper/5HRNFCI6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.25773&json=true","fetch_graph":"https://pith.science/api/pith-number/5HRNFCI66BKHCCG5FPVYGXCAD3/graph.json","fetch_events":"https://pith.science/api/pith-number/5HRNFCI66BKHCCG5FPVYGXCAD3/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3/action/storage_attestation","attest_author":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3/action/author_attestation","sign_citation":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3/action/citation_signature","submit_replication":"https://pith.science/pith/5HRNFCI66BKHCCG5FPVYGXCAD3/action/replication_record"}},"created_at":"2026-05-26T02:04:54.127218+00:00","updated_at":"2026-05-26T02:04:54.127218+00:00"}