{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:OQM4BTHFWW4WK3ZBIAQEUPN3PQ","short_pith_number":"pith:OQM4BTHF","canonical_record":{"source":{"id":"1601.03797","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-01-15T02:02:00Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"63c5d725d48842682241c6545ab2011e6b4b14095aed406f53ddcfcede55281a","abstract_canon_sha256":"6b3023d229fd87098103839a97a33a4e241ec5d4c2e652c670fe920a4139f7dc"},"schema_version":"1.0"},"canonical_sha256":"7419c0cce5b5b9656f2140204a3dbb7c1b9ac26d8e8ac0d5bb190a1d0105ba10","source":{"kind":"arxiv","id":"1601.03797","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1601.03797","created_at":"2026-05-18T01:22:50Z"},{"alias_kind":"arxiv_version","alias_value":"1601.03797v1","created_at":"2026-05-18T01:22:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1601.03797","created_at":"2026-05-18T01:22:50Z"},{"alias_kind":"pith_short_12","alias_value":"OQM4BTHFWW4W","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_16","alias_value":"OQM4BTHFWW4WK3ZB","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_8","alias_value":"OQM4BTHF","created_at":"2026-05-18T12:30:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:OQM4BTHFWW4WK3ZBIAQEUPN3PQ","target":"record","payload":{"canonical_record":{"source":{"id":"1601.03797","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-01-15T02:02:00Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"63c5d725d48842682241c6545ab2011e6b4b14095aed406f53ddcfcede55281a","abstract_canon_sha256":"6b3023d229fd87098103839a97a33a4e241ec5d4c2e652c670fe920a4139f7dc"},"schema_version":"1.0"},"canonical_sha256":"7419c0cce5b5b9656f2140204a3dbb7c1b9ac26d8e8ac0d5bb190a1d0105ba10","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:22:50.019659Z","signature_b64":"qL8t2jC6hkn2LZ7Z7cGI7WcjLeR9T/2u1gTjc1Axyn2ebGcocIMxhy3ygr7d8uiwAheHxY3lXVjrvnkmHXFHDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7419c0cce5b5b9656f2140204a3dbb7c1b9ac26d8e8ac0d5bb190a1d0105ba10","last_reissued_at":"2026-05-18T01:22:50.019232Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:22:50.019232Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1601.03797","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:22:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Sp1GieTtDA9I2vwD9liFzqpUGoXnpeOstHxQcLButTgzpivzSVdzkMz/HQGllT2nSVEMNUdLUL2Bq0dM09WnCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T18:46:25.369816Z"},"content_sha256":"fc9bb278e161f7faf077f7647ba5e13761b624ce14520fe0a73016efa1502304","schema_version":"1.0","event_id":"sha256:fc9bb278e161f7faf077f7647ba5e13761b624ce14520fe0a73016efa1502304"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:OQM4BTHFWW4WK3ZBIAQEUPN3PQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ActiveClean: Interactive Data Cleaning While Learning Convex Loss Models","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.DB","authors_text":"Eugene Wu, Jiannan Wang, Ken Goldberg, Michael J. Franklin, Sanjay Krishnan","submitted_at":"2016-01-15T02:02:00Z","abstract_excerpt":"Data cleaning is often an important step to ensure that predictive models, such as regression and classification, are not affected by systematic errors such as inconsistent, out-of-date, or outlier data. Identifying dirty data is often a manual and iterative process, and can be challenging on large datasets. However, many data cleaning workflows can introduce subtle biases into the training processes due to violation of independence assumptions. We propose ActiveClean, a progressive cleaning approach where the model is updated incrementally instead of re-training and can guarantee accuracy on "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1601.03797","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:22:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"c4ll/k5dCZI6ghVhN94Nu9shBf9P+iz2Wob3VcDVr4SpsslJ95kQdQ1GjvwVeVCZatGQQtewf1E9418ssfVdCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T18:46:25.370175Z"},"content_sha256":"3bcbd8a2596c256f5280aa1dcf9e2a05eaf6ca79423cab02960df01e1df97d7b","schema_version":"1.0","event_id":"sha256:3bcbd8a2596c256f5280aa1dcf9e2a05eaf6ca79423cab02960df01e1df97d7b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ/bundle.json","state_url":"https://pith.science/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T18:46:25Z","links":{"resolver":"https://pith.science/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ","bundle":"https://pith.science/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ/bundle.json","state":"https://pith.science/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OQM4BTHFWW4WK3ZBIAQEUPN3PQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:OQM4BTHFWW4WK3ZBIAQEUPN3PQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6b3023d229fd87098103839a97a33a4e241ec5d4c2e652c670fe920a4139f7dc","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-01-15T02:02:00Z","title_canon_sha256":"63c5d725d48842682241c6545ab2011e6b4b14095aed406f53ddcfcede55281a"},"schema_version":"1.0","source":{"id":"1601.03797","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1601.03797","created_at":"2026-05-18T01:22:50Z"},{"alias_kind":"arxiv_version","alias_value":"1601.03797v1","created_at":"2026-05-18T01:22:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1601.03797","created_at":"2026-05-18T01:22:50Z"},{"alias_kind":"pith_short_12","alias_value":"OQM4BTHFWW4W","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_16","alias_value":"OQM4BTHFWW4WK3ZB","created_at":"2026-05-18T12:30:36Z"},{"alias_kind":"pith_short_8","alias_value":"OQM4BTHF","created_at":"2026-05-18T12:30:36Z"}],"graph_snapshots":[{"event_id":"sha256:3bcbd8a2596c256f5280aa1dcf9e2a05eaf6ca79423cab02960df01e1df97d7b","target":"graph","created_at":"2026-05-18T01:22:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Data cleaning is often an important step to ensure that predictive models, such as regression and classification, are not affected by systematic errors such as inconsistent, out-of-date, or outlier data. Identifying dirty data is often a manual and iterative process, and can be challenging on large datasets. However, many data cleaning workflows can introduce subtle biases into the training processes due to violation of independence assumptions. We propose ActiveClean, a progressive cleaning approach where the model is updated incrementally instead of re-training and can guarantee accuracy on ","authors_text":"Eugene Wu, Jiannan Wang, Ken Goldberg, Michael J. Franklin, Sanjay Krishnan","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-01-15T02:02:00Z","title":"ActiveClean: Interactive Data Cleaning While Learning Convex Loss Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1601.03797","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fc9bb278e161f7faf077f7647ba5e13761b624ce14520fe0a73016efa1502304","target":"record","created_at":"2026-05-18T01:22:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6b3023d229fd87098103839a97a33a4e241ec5d4c2e652c670fe920a4139f7dc","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2016-01-15T02:02:00Z","title_canon_sha256":"63c5d725d48842682241c6545ab2011e6b4b14095aed406f53ddcfcede55281a"},"schema_version":"1.0","source":{"id":"1601.03797","kind":"arxiv","version":1}},"canonical_sha256":"7419c0cce5b5b9656f2140204a3dbb7c1b9ac26d8e8ac0d5bb190a1d0105ba10","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7419c0cce5b5b9656f2140204a3dbb7c1b9ac26d8e8ac0d5bb190a1d0105ba10","first_computed_at":"2026-05-18T01:22:50.019232Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:22:50.019232Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qL8t2jC6hkn2LZ7Z7cGI7WcjLeR9T/2u1gTjc1Axyn2ebGcocIMxhy3ygr7d8uiwAheHxY3lXVjrvnkmHXFHDA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:22:50.019659Z","signed_message":"canonical_sha256_bytes"},"source_id":"1601.03797","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fc9bb278e161f7faf077f7647ba5e13761b624ce14520fe0a73016efa1502304","sha256:3bcbd8a2596c256f5280aa1dcf9e2a05eaf6ca79423cab02960df01e1df97d7b"],"state_sha256":"38c843308b61fd9a68b0f584371cd4e50aae124663864774403e3d6ad7fc9244"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"L86k7FTLOQO2AMoDjXAun2AnU8uFdPhNsMT44sump14EqoHQllaeyoy0c6bMU93Vmojj5oA2TAg97kQUKD3jDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T18:46:25.372144Z","bundle_sha256":"3109aae1ed206c6a4ac97f35f783bd013f4768fe61ed2578bd1845b5f232bb5f"}}