{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:HJFXGWJTFENV5K4MFZ2F33QKUQ","short_pith_number":"pith:HJFXGWJT","canonical_record":{"source":{"id":"2602.00056","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2026-01-20T00:54:37Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c3d8d7fb91b710b8fcbeff3670f4e0a684102c5de433390c3d2b9a2e3fc318b3","abstract_canon_sha256":"16393a5049da71e3fa936d22d6ac96346bcddddbf77ca377557fb2fe2c328910"},"schema_version":"1.0"},"canonical_sha256":"3a4b735933291b5eab8c2e745dee0aa424cb2c65a295f95e2ba43422800b0b6a","source":{"kind":"arxiv","id":"2602.00056","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.00056","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"arxiv_version","alias_value":"2602.00056v4","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.00056","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_12","alias_value":"HJFXGWJTFENV","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_16","alias_value":"HJFXGWJTFENV5K4M","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_8","alias_value":"HJFXGWJT","created_at":"2026-06-09T02:07:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:HJFXGWJTFENV5K4MFZ2F33QKUQ","target":"record","payload":{"canonical_record":{"source":{"id":"2602.00056","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2026-01-20T00:54:37Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c3d8d7fb91b710b8fcbeff3670f4e0a684102c5de433390c3d2b9a2e3fc318b3","abstract_canon_sha256":"16393a5049da71e3fa936d22d6ac96346bcddddbf77ca377557fb2fe2c328910"},"schema_version":"1.0"},"canonical_sha256":"3a4b735933291b5eab8c2e745dee0aa424cb2c65a295f95e2ba43422800b0b6a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:18.798358Z","signature_b64":"gFyCKzSMIudKeElsO8bHDnIDXM1R0LwG/e18NGZ4Obx2BXOxLict9EtMVvhQ27KXFE/Phjaoz0AryHp9tmUBDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3a4b735933291b5eab8c2e745dee0aa424cb2c65a295f95e2ba43422800b0b6a","last_reissued_at":"2026-06-09T02:07:18.797464Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:18.797464Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.00056","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TGz+unrm/f+7Egdfoy+AAASiaBQZa2jGM4hjRpJoCdHwMAfHvR4eo6zk0FG+Qt9PfrRZ4IHBiZavnZjVPtJJAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T09:51:13.179806Z"},"content_sha256":"563c53b83c2a3022fcb09ad1422522b50e6bc9632bd0daa0ade33d2f70e821ed","schema_version":"1.0","event_id":"sha256:563c53b83c2a3022fcb09ad1422522b50e6bc9632bd0daa0ade33d2f70e821ed"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:HJFXGWJTFENV5K4MFZ2F33QKUQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"How Hyper-Datafication Impacts the Sustainability Costs in Frontier AI","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Hyper-datafication in frontier AI redistributes environmental burdens, labor risks, and representational harms toward the Global South and precarious workers.","cross_cats":["cs.AI"],"primary_cat":"cs.CY","authors_text":"Erik B. Dam, Janin Koch, Mophat Okinyi, Raghavendra Selvan, Sebastian Mair, Sophia N. Wilson","submitted_at":"2026-01-20T00:54:37Z","abstract_excerpt":"Large-scale data has fuelled the success of frontier artificial intelligence (AI) models over the past decade. This expansion has relied on sustained efforts by large technology corporations to aggregate and curate internet-scale datasets. In this work, we examine the environmental, social, and economic costs of large-scale data in AI through a sustainability lens. We argue that the field is shifting from building models from data to actively creating data for building models. We characterise this transition as hyper-datafication, which marks a critical juncture for the future of frontier AI a"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our analyses reveal that hyper-datafication does not merely increase resource consumption but systematically redistributes environmental burdens, labour risks, and representational harms toward the Global South, precarious data workers, and under-represented cultures.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the sample of approximately 550,000 Hugging Face Hub datasets combined with qualitative responses from data workers in Kenya sufficiently represents the global data practices and impacts of frontier AI models.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Hyper-datafication in frontier AI increases resource consumption and redistributes environmental burdens, labor risks, and representational harms toward the Global South, data workers, and under-represented cultures, based on analysis of 550,000 Hugging Face datasets and Kenyan worker responses.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Hyper-datafication in frontier AI redistributes environmental burdens, labor risks, and representational harms toward the Global South and precarious workers.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"4f1fae4ec4291bf90f2751c1967f39c783dcc7f383596741f6ff5e3c57f0cc37"},"source":{"id":"2602.00056","kind":"arxiv","version":4},"verdict":{"id":"c44390a9-459c-421d-9382-4906c25de50d","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T13:17:14.677358Z","strongest_claim":"Our analyses reveal that hyper-datafication does not merely increase resource consumption but systematically redistributes environmental burdens, labour risks, and representational harms toward the Global South, precarious data workers, and under-represented cultures.","one_line_summary":"Hyper-datafication in frontier AI increases resource consumption and redistributes environmental burdens, labor risks, and representational harms toward the Global South, data workers, and under-represented cultures, based on analysis of 550,000 Hugging Face datasets and Kenyan worker responses.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the sample of approximately 550,000 Hugging Face Hub datasets combined with qualitative responses from data workers in Kenya sufficiently represents the global data practices and impacts of frontier AI models.","pith_extraction_headline":"Hyper-datafication in frontier AI redistributes environmental burdens, labor risks, and representational harms toward the Global South and precarious workers."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.00056/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"1a554095d2cab27f6fe6670f90f4c01122ba6917f130ab5801e0a3ca25845224"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"c44390a9-459c-421d-9382-4906c25de50d"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-09T02:07:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xnw8LSYnxS8M0GNybfrbbSuPE3aKSHGYL03AAEqexOi2sDNACRDGb55sW2cAmLBonev1g+f957dsaqWq2hZDDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-12T09:51:13.180287Z"},"content_sha256":"d4bd6557eec85bdeac702f3c2c8716f09a045b5313d9dd440312155b2e6f8ef5","schema_version":"1.0","event_id":"sha256:d4bd6557eec85bdeac702f3c2c8716f09a045b5313d9dd440312155b2e6f8ef5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ/bundle.json","state_url":"https://pith.science/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-12T09:51:13Z","links":{"resolver":"https://pith.science/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ","bundle":"https://pith.science/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ/bundle.json","state":"https://pith.science/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HJFXGWJTFENV5K4MFZ2F33QKUQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:HJFXGWJTFENV5K4MFZ2F33QKUQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"16393a5049da71e3fa936d22d6ac96346bcddddbf77ca377557fb2fe2c328910","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2026-01-20T00:54:37Z","title_canon_sha256":"c3d8d7fb91b710b8fcbeff3670f4e0a684102c5de433390c3d2b9a2e3fc318b3"},"schema_version":"1.0","source":{"id":"2602.00056","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.00056","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"arxiv_version","alias_value":"2602.00056v4","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.00056","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_12","alias_value":"HJFXGWJTFENV","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_16","alias_value":"HJFXGWJTFENV5K4M","created_at":"2026-06-09T02:07:18Z"},{"alias_kind":"pith_short_8","alias_value":"HJFXGWJT","created_at":"2026-06-09T02:07:18Z"}],"graph_snapshots":[{"event_id":"sha256:d4bd6557eec85bdeac702f3c2c8716f09a045b5313d9dd440312155b2e6f8ef5","target":"graph","created_at":"2026-06-09T02:07:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our analyses reveal that hyper-datafication does not merely increase resource consumption but systematically redistributes environmental burdens, labour risks, and representational harms toward the Global South, precarious data workers, and under-represented cultures."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the sample of approximately 550,000 Hugging Face Hub datasets combined with qualitative responses from data workers in Kenya sufficiently represents the global data practices and impacts of frontier AI models."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Hyper-datafication in frontier AI increases resource consumption and redistributes environmental burdens, labor risks, and representational harms toward the Global South, data workers, and under-represented cultures, based on analysis of 550,000 Hugging Face datasets and Kenyan worker responses."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Hyper-datafication in frontier AI redistributes environmental burdens, labor risks, and representational harms toward the Global South and precarious workers."}],"snapshot_sha256":"4f1fae4ec4291bf90f2751c1967f39c783dcc7f383596741f6ff5e3c57f0cc37"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"1a554095d2cab27f6fe6670f90f4c01122ba6917f130ab5801e0a3ca25845224"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.00056/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large-scale data has fuelled the success of frontier artificial intelligence (AI) models over the past decade. This expansion has relied on sustained efforts by large technology corporations to aggregate and curate internet-scale datasets. In this work, we examine the environmental, social, and economic costs of large-scale data in AI through a sustainability lens. We argue that the field is shifting from building models from data to actively creating data for building models. We characterise this transition as hyper-datafication, which marks a critical juncture for the future of frontier AI a","authors_text":"Erik B. Dam, Janin Koch, Mophat Okinyi, Raghavendra Selvan, Sebastian Mair, Sophia N. Wilson","cross_cats":["cs.AI"],"headline":"Hyper-datafication in frontier AI redistributes environmental burdens, labor risks, and representational harms toward the Global South and precarious workers.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2026-01-20T00:54:37Z","title":"How Hyper-Datafication Impacts the Sustainability Costs in Frontier AI"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.00056","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-16T13:17:14.677358Z","id":"c44390a9-459c-421d-9382-4906c25de50d","model_set":{"reader":"grok-4.3"},"one_line_summary":"Hyper-datafication in frontier AI increases resource consumption and redistributes environmental burdens, labor risks, and representational harms toward the Global South, data workers, and under-represented cultures, based on analysis of 550,000 Hugging Face datasets and Kenyan worker responses.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Hyper-datafication in frontier AI redistributes environmental burdens, labor risks, and representational harms toward the Global South and precarious workers.","strongest_claim":"Our analyses reveal that hyper-datafication does not merely increase resource consumption but systematically redistributes environmental burdens, labour risks, and representational harms toward the Global South, precarious data workers, and under-represented cultures.","weakest_assumption":"That the sample of approximately 550,000 Hugging Face Hub datasets combined with qualitative responses from data workers in Kenya sufficiently represents the global data practices and impacts of frontier AI models."}},"verdict_id":"c44390a9-459c-421d-9382-4906c25de50d"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:563c53b83c2a3022fcb09ad1422522b50e6bc9632bd0daa0ade33d2f70e821ed","target":"record","created_at":"2026-06-09T02:07:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"16393a5049da71e3fa936d22d6ac96346bcddddbf77ca377557fb2fe2c328910","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CY","submitted_at":"2026-01-20T00:54:37Z","title_canon_sha256":"c3d8d7fb91b710b8fcbeff3670f4e0a684102c5de433390c3d2b9a2e3fc318b3"},"schema_version":"1.0","source":{"id":"2602.00056","kind":"arxiv","version":4}},"canonical_sha256":"3a4b735933291b5eab8c2e745dee0aa424cb2c65a295f95e2ba43422800b0b6a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3a4b735933291b5eab8c2e745dee0aa424cb2c65a295f95e2ba43422800b0b6a","first_computed_at":"2026-06-09T02:07:18.797464Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-09T02:07:18.797464Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gFyCKzSMIudKeElsO8bHDnIDXM1R0LwG/e18NGZ4Obx2BXOxLict9EtMVvhQ27KXFE/Phjaoz0AryHp9tmUBDA==","signature_status":"signed_v1","signed_at":"2026-06-09T02:07:18.798358Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.00056","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:563c53b83c2a3022fcb09ad1422522b50e6bc9632bd0daa0ade33d2f70e821ed","sha256:d4bd6557eec85bdeac702f3c2c8716f09a045b5313d9dd440312155b2e6f8ef5"],"state_sha256":"650e460959be489a1f84d53910e037dd38e4bc191b64d07effb41cf641e2fa69"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A/olL0QeFjpGvrUNlY2Qagovf6prZWAh3zkVNAGgfE6+uWtSkz/+4nYvyS42Mgy8/QI5JjumTFLJPqS2d8WrCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-12T09:51:13.182842Z","bundle_sha256":"64c2178e88a1dfd48cadad1f4e9970de8c4df63eef996631dd05f25a37549a07"}}