{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:B4QJRCTNC5ERBKPTW5SDNOJ4OS","short_pith_number":"pith:B4QJRCTN","canonical_record":{"source":{"id":"1711.01299","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-11-03T18:50:08Z","cross_cats_sorted":[],"title_canon_sha256":"d9000515c4e0366251b865876048a067928b2d8b8ae80d0ba73614140727adae","abstract_canon_sha256":"26174391e8bf893bab683fc6f2121824c63d4a4179de05e483af871c0205ba19"},"schema_version":"1.0"},"canonical_sha256":"0f20988a6d174910a9f3b76436b93c748dad6e9f7ef338fc5beee76f0a486167","source":{"kind":"arxiv","id":"1711.01299","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.01299","created_at":"2026-05-18T00:31:18Z"},{"alias_kind":"arxiv_version","alias_value":"1711.01299v1","created_at":"2026-05-18T00:31:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.01299","created_at":"2026-05-18T00:31:18Z"},{"alias_kind":"pith_short_12","alias_value":"B4QJRCTNC5ER","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"B4QJRCTNC5ERBKPT","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"B4QJRCTN","created_at":"2026-05-18T12:31:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:B4QJRCTNC5ERBKPTW5SDNOJ4OS","target":"record","payload":{"canonical_record":{"source":{"id":"1711.01299","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-11-03T18:50:08Z","cross_cats_sorted":[],"title_canon_sha256":"d9000515c4e0366251b865876048a067928b2d8b8ae80d0ba73614140727adae","abstract_canon_sha256":"26174391e8bf893bab683fc6f2121824c63d4a4179de05e483af871c0205ba19"},"schema_version":"1.0"},"canonical_sha256":"0f20988a6d174910a9f3b76436b93c748dad6e9f7ef338fc5beee76f0a486167","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:31:18.240927Z","signature_b64":"hhDj4m5jpYV2dtw+mlLLlZwG+Cg5ixHmSn0DBUjYdcIfmYCQniUIxyqPuBOnrE0XMMPJCSTtjIzaGRrNsNt/DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0f20988a6d174910a9f3b76436b93c748dad6e9f7ef338fc5beee76f0a486167","last_reissued_at":"2026-05-18T00:31:18.240340Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:31:18.240340Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1711.01299","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:31:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Z8BEjDkLhHieGJzIcJr63dmNHsp5XKYloXzT8dKMAM99cSp/fr/fjrMc5GjXyHrnPKe7TF7OjIo9end6MTZ2DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T21:39:51.779137Z"},"content_sha256":"edfd044dd75d97199fdf1f8e1382fef2af4fbf211fd96b9322ac25d884cbe6a5","schema_version":"1.0","event_id":"sha256:edfd044dd75d97199fdf1f8e1382fef2af4fbf211fd96b9322ac25d884cbe6a5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:B4QJRCTNC5ERBKPTW5SDNOJ4OS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"BoostClean: Automated Error Detection and Repair for Machine Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Eugene Wu, Ken Goldberg, Michael J. Franklin, Sanjay Krishnan","submitted_at":"2017-11-03T18:50:08Z","abstract_excerpt":"Predictive models based on machine learning can be highly sensitive to data error. Training data are often combined with a variety of different sources, each susceptible to different types of inconsistencies, and new data streams during prediction time, the model may encounter previously unseen inconsistencies. An important class of such inconsistencies is domain value violations that occur when an attribute value is outside of an allowed domain. We explore automatically detecting and repairing such violations by leveraging the often available clean test labels to determine whether a given det"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.01299","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:31:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ouuIiZYMQzCMBL99B3Tg7lmoVF9mb2CVQoiAMdAWoDlM3wVO36/To8D2Jyy3n8LFFnxGqtt1YEyw6KAN6gI9Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T21:39:51.779543Z"},"content_sha256":"b0684b8db5c87b9c7b000094a55eccebd4eeef80b7774c1612f1a64e469ddb44","schema_version":"1.0","event_id":"sha256:b0684b8db5c87b9c7b000094a55eccebd4eeef80b7774c1612f1a64e469ddb44"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS/bundle.json","state_url":"https://pith.science/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T21:39:51Z","links":{"resolver":"https://pith.science/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS","bundle":"https://pith.science/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS/bundle.json","state":"https://pith.science/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/B4QJRCTNC5ERBKPTW5SDNOJ4OS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:B4QJRCTNC5ERBKPTW5SDNOJ4OS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"26174391e8bf893bab683fc6f2121824c63d4a4179de05e483af871c0205ba19","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-11-03T18:50:08Z","title_canon_sha256":"d9000515c4e0366251b865876048a067928b2d8b8ae80d0ba73614140727adae"},"schema_version":"1.0","source":{"id":"1711.01299","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.01299","created_at":"2026-05-18T00:31:18Z"},{"alias_kind":"arxiv_version","alias_value":"1711.01299v1","created_at":"2026-05-18T00:31:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.01299","created_at":"2026-05-18T00:31:18Z"},{"alias_kind":"pith_short_12","alias_value":"B4QJRCTNC5ER","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"B4QJRCTNC5ERBKPT","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"B4QJRCTN","created_at":"2026-05-18T12:31:08Z"}],"graph_snapshots":[{"event_id":"sha256:b0684b8db5c87b9c7b000094a55eccebd4eeef80b7774c1612f1a64e469ddb44","target":"graph","created_at":"2026-05-18T00:31:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Predictive models based on machine learning can be highly sensitive to data error. Training data are often combined with a variety of different sources, each susceptible to different types of inconsistencies, and new data streams during prediction time, the model may encounter previously unseen inconsistencies. An important class of such inconsistencies is domain value violations that occur when an attribute value is outside of an allowed domain. We explore automatically detecting and repairing such violations by leveraging the often available clean test labels to determine whether a given det","authors_text":"Eugene Wu, Ken Goldberg, Michael J. Franklin, Sanjay Krishnan","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-11-03T18:50:08Z","title":"BoostClean: Automated Error Detection and Repair for Machine Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.01299","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:edfd044dd75d97199fdf1f8e1382fef2af4fbf211fd96b9322ac25d884cbe6a5","target":"record","created_at":"2026-05-18T00:31:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"26174391e8bf893bab683fc6f2121824c63d4a4179de05e483af871c0205ba19","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2017-11-03T18:50:08Z","title_canon_sha256":"d9000515c4e0366251b865876048a067928b2d8b8ae80d0ba73614140727adae"},"schema_version":"1.0","source":{"id":"1711.01299","kind":"arxiv","version":1}},"canonical_sha256":"0f20988a6d174910a9f3b76436b93c748dad6e9f7ef338fc5beee76f0a486167","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0f20988a6d174910a9f3b76436b93c748dad6e9f7ef338fc5beee76f0a486167","first_computed_at":"2026-05-18T00:31:18.240340Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:31:18.240340Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hhDj4m5jpYV2dtw+mlLLlZwG+Cg5ixHmSn0DBUjYdcIfmYCQniUIxyqPuBOnrE0XMMPJCSTtjIzaGRrNsNt/DQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:31:18.240927Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.01299","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:edfd044dd75d97199fdf1f8e1382fef2af4fbf211fd96b9322ac25d884cbe6a5","sha256:b0684b8db5c87b9c7b000094a55eccebd4eeef80b7774c1612f1a64e469ddb44"],"state_sha256":"14d647fe22672ab6d6f6271b1740aea4678fba2deb2ed94d707cf65ac01a1902"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pRO8yOzCrPstDnsna7wi40frfPdRpqRTkrOC7qd7S0XE/iwPWrXyYSdNaUYF0f1pP14c+AKSZfPcQzMXiMmKDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T21:39:51.781431Z","bundle_sha256":"ea4326466de1914f23ba77c11cc91ef70010485e813fcba3ba058429afd92fee"}}