{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TG2D35KIQXXMDYNE57VUU5XOVM","short_pith_number":"pith:TG2D35KI","schema_version":"1.0","canonical_sha256":"99b43df54885eec1e1a4efeb4a76eeab00f47fdb798c47cb9ce61a8aa3e8dd5f","source":{"kind":"arxiv","id":"2605.29448","version":1},"attestation_state":"computed","paper":{"title":"How Much Is a Dataset Worth? Scaling Laws, the Vendi Score, and Matrix Spectral Functions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.IT","math.IT"],"primary_cat":"cs.LG","authors_text":"Arnav M. Das, Gantavya Bhatt, Jeff A. Bilmes","submitted_at":"2026-05-28T06:40:29Z","abstract_excerpt":"Neural scaling laws appraise data through dataset size, while the Vendi Score uses quantum entropy to measure dataset value. We show both that common neural-scaling-law objectives and the Vendi Score are submodular. We further show that the Vendi Score is a special case of a broader class of submodular objectives that we call matrix spectral functions. This also includes determinantal (DPP) objectives, as well as many others. We also introduce weakly matrix monotone functions and show how they lead to weakly submodular matrix spectral functions, yielding a broad family of practical objectives "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.29448","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-28T06:40:29Z","cross_cats_sorted":["cs.AI","cs.CV","cs.IT","math.IT"],"title_canon_sha256":"4c9f226b7899da37378209855a1bfe87983493f81afe201fbea152e8f3138cbd","abstract_canon_sha256":"2bb03a160ff7b78b92923c30deb899de06544b6adcb27b38d02d45d990866276"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:05:39.818931Z","signature_b64":"W3sHFN8bdFvsJoNv2Qr0AQvtwxUJnMuNiMeLSHxdBiL6LjsPT5OcxdyBQ/nJhhyiCrZ6c/j9h4VCITaT3OvdBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"99b43df54885eec1e1a4efeb4a76eeab00f47fdb798c47cb9ce61a8aa3e8dd5f","last_reissued_at":"2026-05-29T01:05:39.818065Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:05:39.818065Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"How Much Is a Dataset Worth? Scaling Laws, the Vendi Score, and Matrix Spectral Functions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.IT","math.IT"],"primary_cat":"cs.LG","authors_text":"Arnav M. Das, Gantavya Bhatt, Jeff A. Bilmes","submitted_at":"2026-05-28T06:40:29Z","abstract_excerpt":"Neural scaling laws appraise data through dataset size, while the Vendi Score uses quantum entropy to measure dataset value. We show both that common neural-scaling-law objectives and the Vendi Score are submodular. We further show that the Vendi Score is a special case of a broader class of submodular objectives that we call matrix spectral functions. This also includes determinantal (DPP) objectives, as well as many others. We also introduce weakly matrix monotone functions and show how they lead to weakly submodular matrix spectral functions, yielding a broad family of practical objectives "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.29448","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.29448/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.29448","created_at":"2026-05-29T01:05:39.818215+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.29448v1","created_at":"2026-05-29T01:05:39.818215+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.29448","created_at":"2026-05-29T01:05:39.818215+00:00"},{"alias_kind":"pith_short_12","alias_value":"TG2D35KIQXXM","created_at":"2026-05-29T01:05:39.818215+00:00"},{"alias_kind":"pith_short_16","alias_value":"TG2D35KIQXXMDYNE","created_at":"2026-05-29T01:05:39.818215+00:00"},{"alias_kind":"pith_short_8","alias_value":"TG2D35KI","created_at":"2026-05-29T01:05:39.818215+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM","json":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM.json","graph_json":"https://pith.science/api/pith-number/TG2D35KIQXXMDYNE57VUU5XOVM/graph.json","events_json":"https://pith.science/api/pith-number/TG2D35KIQXXMDYNE57VUU5XOVM/events.json","paper":"https://pith.science/paper/TG2D35KI"},"agent_actions":{"view_html":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM","download_json":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM.json","view_paper":"https://pith.science/paper/TG2D35KI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.29448&json=true","fetch_graph":"https://pith.science/api/pith-number/TG2D35KIQXXMDYNE57VUU5XOVM/graph.json","fetch_events":"https://pith.science/api/pith-number/TG2D35KIQXXMDYNE57VUU5XOVM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM/action/storage_attestation","attest_author":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM/action/author_attestation","sign_citation":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM/action/citation_signature","submit_replication":"https://pith.science/pith/TG2D35KIQXXMDYNE57VUU5XOVM/action/replication_record"}},"created_at":"2026-05-29T01:05:39.818215+00:00","updated_at":"2026-05-29T01:05:39.818215+00:00"}