{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:HNRPMO4GJHYSDBKIUT5P3R6YUY","short_pith_number":"pith:HNRPMO4G","canonical_record":{"source":{"id":"1904.10761","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2019-04-22T23:57:07Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"f0ca3d313066e7ddb1f822f60e6ec71bee4326e98ce4085fbcb5393e56545a0b","abstract_canon_sha256":"bc1bf36accc8c3def92b6a7a7a495eee37836cac590ac4d123880f6e66962a67"},"schema_version":"1.0"},"canonical_sha256":"3b62f63b8649f1218548a4fafdc7d8a60ec71796b99e1c494c6c0167fb789de3","source":{"kind":"arxiv","id":"1904.10761","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.10761","created_at":"2026-05-17T23:47:50Z"},{"alias_kind":"arxiv_version","alias_value":"1904.10761v1","created_at":"2026-05-17T23:47:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.10761","created_at":"2026-05-17T23:47:50Z"},{"alias_kind":"pith_short_12","alias_value":"HNRPMO4GJHYS","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"HNRPMO4GJHYSDBKI","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"HNRPMO4G","created_at":"2026-05-18T12:33:18Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:HNRPMO4GJHYSDBKIUT5P3R6YUY","target":"record","payload":{"canonical_record":{"source":{"id":"1904.10761","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2019-04-22T23:57:07Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"f0ca3d313066e7ddb1f822f60e6ec71bee4326e98ce4085fbcb5393e56545a0b","abstract_canon_sha256":"bc1bf36accc8c3def92b6a7a7a495eee37836cac590ac4d123880f6e66962a67"},"schema_version":"1.0"},"canonical_sha256":"3b62f63b8649f1218548a4fafdc7d8a60ec71796b99e1c494c6c0167fb789de3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:47:50.033423Z","signature_b64":"S5KdPBEOL88l5sGkaOnEX3D/D7z7+mq+mpokpNWMw3o0+X2v8bQPWfDDjrHU900lLuP/Skl0rUStZX16w8EmAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3b62f63b8649f1218548a4fafdc7d8a60ec71796b99e1c494c6c0167fb789de3","last_reissued_at":"2026-05-17T23:47:50.032870Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:47:50.032870Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1904.10761","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:47:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dhrtE2fOptLs+f/QVKM0IFN8Ng5dlr+yp1VIj50XVmWKYRsf/DC32CF5ubeagLp6C3eMCxKk3wc3aGS+IewqBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T16:38:47.222488Z"},"content_sha256":"f05d25b095e8159a91effe77d4ead070b84d6434b86a6cd0d02cc4013afda472","schema_version":"1.0","event_id":"sha256:f05d25b095e8159a91effe77d4ead070b84d6434b86a6cd0d02cc4013afda472"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:HNRPMO4GJHYSDBKIUT5P3R6YUY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Data Cleaning for Accurate, Fair, and Robust Models: A Big Data - AI Integration Approach","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.DB","authors_text":"Hyunsu Kim, Ki Hyun Tae, Steven Euijong Whang, Young Hun Oh, Yuji Roh","submitted_at":"2019-04-22T23:57:07Z","abstract_excerpt":"The wide use of machine learning is fundamentally changing the software development paradigm (a.k.a. Software 2.0) where data becomes a first-class citizen, on par with code. As machine learning is used in sensitive applications, it becomes imperative that the trained model is accurate, fair, and robust to attacks. While many techniques have been proposed to improve the model training process (in-processing approach) or the trained model itself (post-processing), we argue that the most effective method is to clean the root cause of error: the data the model is trained on (pre-processing). Hist"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.10761","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:47:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YBGGI64NvmzG9F7X+1vlP6RBD879hCJagDrfXlmjmo15ngeH74qmJC/TSBXtMrglxm6c+uh3ILZSng30XlcqAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T16:38:47.223189Z"},"content_sha256":"e121e8e2753d985924067a04f076cb4547890b014eae2140f8745564fdc93c21","schema_version":"1.0","event_id":"sha256:e121e8e2753d985924067a04f076cb4547890b014eae2140f8745564fdc93c21"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY/bundle.json","state_url":"https://pith.science/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T16:38:47Z","links":{"resolver":"https://pith.science/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY","bundle":"https://pith.science/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY/bundle.json","state":"https://pith.science/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HNRPMO4GJHYSDBKIUT5P3R6YUY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:HNRPMO4GJHYSDBKIUT5P3R6YUY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bc1bf36accc8c3def92b6a7a7a495eee37836cac590ac4d123880f6e66962a67","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2019-04-22T23:57:07Z","title_canon_sha256":"f0ca3d313066e7ddb1f822f60e6ec71bee4326e98ce4085fbcb5393e56545a0b"},"schema_version":"1.0","source":{"id":"1904.10761","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1904.10761","created_at":"2026-05-17T23:47:50Z"},{"alias_kind":"arxiv_version","alias_value":"1904.10761v1","created_at":"2026-05-17T23:47:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.10761","created_at":"2026-05-17T23:47:50Z"},{"alias_kind":"pith_short_12","alias_value":"HNRPMO4GJHYS","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_16","alias_value":"HNRPMO4GJHYSDBKI","created_at":"2026-05-18T12:33:18Z"},{"alias_kind":"pith_short_8","alias_value":"HNRPMO4G","created_at":"2026-05-18T12:33:18Z"}],"graph_snapshots":[{"event_id":"sha256:e121e8e2753d985924067a04f076cb4547890b014eae2140f8745564fdc93c21","target":"graph","created_at":"2026-05-17T23:47:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The wide use of machine learning is fundamentally changing the software development paradigm (a.k.a. Software 2.0) where data becomes a first-class citizen, on par with code. As machine learning is used in sensitive applications, it becomes imperative that the trained model is accurate, fair, and robust to attacks. While many techniques have been proposed to improve the model training process (in-processing approach) or the trained model itself (post-processing), we argue that the most effective method is to clean the root cause of error: the data the model is trained on (pre-processing). Hist","authors_text":"Hyunsu Kim, Ki Hyun Tae, Steven Euijong Whang, Young Hun Oh, Yuji Roh","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2019-04-22T23:57:07Z","title":"Data Cleaning for Accurate, Fair, and Robust Models: A Big Data - AI Integration Approach"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.10761","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f05d25b095e8159a91effe77d4ead070b84d6434b86a6cd0d02cc4013afda472","target":"record","created_at":"2026-05-17T23:47:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bc1bf36accc8c3def92b6a7a7a495eee37836cac590ac4d123880f6e66962a67","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2019-04-22T23:57:07Z","title_canon_sha256":"f0ca3d313066e7ddb1f822f60e6ec71bee4326e98ce4085fbcb5393e56545a0b"},"schema_version":"1.0","source":{"id":"1904.10761","kind":"arxiv","version":1}},"canonical_sha256":"3b62f63b8649f1218548a4fafdc7d8a60ec71796b99e1c494c6c0167fb789de3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3b62f63b8649f1218548a4fafdc7d8a60ec71796b99e1c494c6c0167fb789de3","first_computed_at":"2026-05-17T23:47:50.032870Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:47:50.032870Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"S5KdPBEOL88l5sGkaOnEX3D/D7z7+mq+mpokpNWMw3o0+X2v8bQPWfDDjrHU900lLuP/Skl0rUStZX16w8EmAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:47:50.033423Z","signed_message":"canonical_sha256_bytes"},"source_id":"1904.10761","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f05d25b095e8159a91effe77d4ead070b84d6434b86a6cd0d02cc4013afda472","sha256:e121e8e2753d985924067a04f076cb4547890b014eae2140f8745564fdc93c21"],"state_sha256":"eab72174646742e7f71eef792d9177d3e266a38e7ef181366c4b771c7dc5b210"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mqoZnyb9n9IUjgSqMyAvmtJH5uPc8qamOUmbAeG24/kRCHSNJWTaSHO1BwSikBlfO4+JstsqvlBS9mBZCc7OAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T16:38:47.227428Z","bundle_sha256":"47fa1b0adde9d291395efeea9fc279d0a58b56fe85f00d9a455320c859cda375"}}