{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:C3KCFWXIO2KT4ISXFL2VXA4BV2","short_pith_number":"pith:C3KCFWXI","schema_version":"1.0","canonical_sha256":"16d422dae876953e22572af55b8381ae9d57200ca75ce5eb7eeebceaba5dde69","source":{"kind":"arxiv","id":"1904.02868","version":2},"attestation_state":"computed","paper":{"title":"Data Shapley: Equitable Valuation of Data for Machine Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Amirata Ghorbani, James Zou","submitted_at":"2019-04-05T04:54:10Z","abstract_excerpt":"As data becomes the fuel driving technological and economic growth, a fundamental challenge is how to quantify the value of data in algorithmic predictions and decisions. For example, in healthcare and consumer markets, it has been suggested that individuals should be compensated for the data that they generate, but it is not clear what is an equitable valuation for individual data. In this work, we develop a principled framework to address data valuation in the context of supervised machine learning. Given a learning algorithm trained on $n$ data points to produce a predictor, we propose data"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.02868","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2019-04-05T04:54:10Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"362a6f546e0e5e62321e740c88b3058177c54f3c2be5cce0ea1a6849428249ad","abstract_canon_sha256":"3c289d7df6b1e289c4686cc5eb315e8997119fb86f814448f99118f9c34d78af"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:49.587697Z","signature_b64":"ILqa8Gh8hW3iMo0QCubYl6QnMH1robgyyAr5dfYp7mTv65MKS9IO/xosSWBBaYPJas7dSiLkFsJcIrwPX7duDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"16d422dae876953e22572af55b8381ae9d57200ca75ce5eb7eeebceaba5dde69","last_reissued_at":"2026-05-17T23:43:49.587107Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:49.587107Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Data Shapley: Equitable Valuation of Data for Machine Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"stat.ML","authors_text":"Amirata Ghorbani, James Zou","submitted_at":"2019-04-05T04:54:10Z","abstract_excerpt":"As data becomes the fuel driving technological and economic growth, a fundamental challenge is how to quantify the value of data in algorithmic predictions and decisions. For example, in healthcare and consumer markets, it has been suggested that individuals should be compensated for the data that they generate, but it is not clear what is an equitable valuation for individual data. In this work, we develop a principled framework to address data valuation in the context of supervised machine learning. Given a learning algorithm trained on $n$ data points to produce a predictor, we propose data"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.02868","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.02868","created_at":"2026-05-17T23:43:49.587196+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.02868v2","created_at":"2026-05-17T23:43:49.587196+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.02868","created_at":"2026-05-17T23:43:49.587196+00:00"},{"alias_kind":"pith_short_12","alias_value":"C3KCFWXIO2KT","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_16","alias_value":"C3KCFWXIO2KT4ISX","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_8","alias_value":"C3KCFWXI","created_at":"2026-05-18T12:33:12.712433+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":3,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2605.23887","citing_title":"CHRONOS: Temporally-Aware Multi-Agent Coordination for Evolving Data Marketplaces","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2505.11771","citing_title":"Residual Feature Integration is Sufficient to Prevent Negative Transfer","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16197","citing_title":"Sketching the Readout of Large Language Models for Scalable Data Attribution and Valuation","ref_index":14,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2","json":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2.json","graph_json":"https://pith.science/api/pith-number/C3KCFWXIO2KT4ISXFL2VXA4BV2/graph.json","events_json":"https://pith.science/api/pith-number/C3KCFWXIO2KT4ISXFL2VXA4BV2/events.json","paper":"https://pith.science/paper/C3KCFWXI"},"agent_actions":{"view_html":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2","download_json":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2.json","view_paper":"https://pith.science/paper/C3KCFWXI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.02868&json=true","fetch_graph":"https://pith.science/api/pith-number/C3KCFWXIO2KT4ISXFL2VXA4BV2/graph.json","fetch_events":"https://pith.science/api/pith-number/C3KCFWXIO2KT4ISXFL2VXA4BV2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2/action/storage_attestation","attest_author":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2/action/author_attestation","sign_citation":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2/action/citation_signature","submit_replication":"https://pith.science/pith/C3KCFWXIO2KT4ISXFL2VXA4BV2/action/replication_record"}},"created_at":"2026-05-17T23:43:49.587196+00:00","updated_at":"2026-05-17T23:43:49.587196+00:00"}