{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:UDUU2MOVI6KVDDTYFJTQDAJHDU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"29d75e57a332bdea892d43d4a7fdf4a0d53361a051416b349a6f9b2c61b530aa","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-30T11:28:58Z","title_canon_sha256":"2ab7fc16618b56181159d2cdd1b801059fa6fe67c7bc0d9f4dd5e658f389a9f6"},"schema_version":"1.0","source":{"id":"2510.26384","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.26384","created_at":"2026-05-20T00:04:16Z"},{"alias_kind":"arxiv_version","alias_value":"2510.26384v2","created_at":"2026-05-20T00:04:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.26384","created_at":"2026-05-20T00:04:16Z"},{"alias_kind":"pith_short_12","alias_value":"UDUU2MOVI6KV","created_at":"2026-05-20T00:04:16Z"},{"alias_kind":"pith_short_16","alias_value":"UDUU2MOVI6KVDDTY","created_at":"2026-05-20T00:04:16Z"},{"alias_kind":"pith_short_8","alias_value":"UDUU2MOV","created_at":"2026-05-20T00:04:16Z"}],"graph_snapshots":[{"event_id":"sha256:af0f888439407730771a7bb5dfaa3906d86b2255eca7610f5854ffdfcdc48320","target":"graph","created_at":"2026-05-20T00:04:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.26384/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The prohibitive cost of evaluating large language models (LLMs) on comprehensive benchmarks necessitates the creation of small yet representative data subsets (i.e., tiny benchmarks) that enable efficient assessment while retaining predictive fidelity. Current methods for this task operate under a model-centric paradigm, selecting benchmarking items based on the collective performance of existing models. Such approaches are limited by large upfront costs, an inability to immediately handle new benchmarks (\"cold-start\"), and the fragile assumption that future models will share the failure patte","authors_text":"Andrew M. Bean, Jonathan Richard Schwarz, Nabeel Seedat, Shengzhuang Chen","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-30T11:28:58Z","title":"Scales++: Compute Efficient Evaluation Subset Selection with Cognitive Scales Embeddings"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.26384","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d3b86c304b5489cd1643de2d9bb4d10165f92e6612d49f17d7907e3e31870415","target":"record","created_at":"2026-05-20T00:04:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"29d75e57a332bdea892d43d4a7fdf4a0d53361a051416b349a6f9b2c61b530aa","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-30T11:28:58Z","title_canon_sha256":"2ab7fc16618b56181159d2cdd1b801059fa6fe67c7bc0d9f4dd5e658f389a9f6"},"schema_version":"1.0","source":{"id":"2510.26384","kind":"arxiv","version":2}},"canonical_sha256":"a0e94d31d54795518e782a670181271d2d9f6da5b6d1a9bff976e84e5b7c29df","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a0e94d31d54795518e782a670181271d2d9f6da5b6d1a9bff976e84e5b7c29df","first_computed_at":"2026-05-20T00:04:16.490647Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:16.490647Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"MI+vC+Lf043EmauC6rwE1fGQihP1bQTTZd3I8BI44Xbu4OzR+Ys8FA9FPkZsMtr829WYeiBL+TnaDIvjWqE4CA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:16.491373Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.26384","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d3b86c304b5489cd1643de2d9bb4d10165f92e6612d49f17d7907e3e31870415","sha256:af0f888439407730771a7bb5dfaa3906d86b2255eca7610f5854ffdfcdc48320"],"state_sha256":"d9bf4152e38b97e290e5d5a5041c11774f8fcabf6e92058a2cab3ce9e2a3e067"}