{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:WZ5IY6KIY7JJMWR7N4ZPR7OBO6","short_pith_number":"pith:WZ5IY6KI","canonical_record":{"source":{"id":"1801.05627","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-01-17T11:48:18Z","cross_cats_sorted":[],"title_canon_sha256":"2e22db49e67c265b1994e601d3debb40356778e3a9835493e39758c6db82d520","abstract_canon_sha256":"0617d272ed28bdbde0b8ef1bf9e5a302541539c8e503e23f79dae9d2c6e1ea99"},"schema_version":"1.0"},"canonical_sha256":"b67a8c7948c7d2965a3f6f32f8fdc1778571c464667de7c575ef731f25df1c1f","source":{"kind":"arxiv","id":"1801.05627","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1801.05627","created_at":"2026-05-18T00:19:34Z"},{"alias_kind":"arxiv_version","alias_value":"1801.05627v2","created_at":"2026-05-18T00:19:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.05627","created_at":"2026-05-18T00:19:34Z"},{"alias_kind":"pith_short_12","alias_value":"WZ5IY6KIY7JJ","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"WZ5IY6KIY7JJMWR7","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"WZ5IY6KI","created_at":"2026-05-18T12:33:01Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:WZ5IY6KIY7JJMWR7N4ZPR7OBO6","target":"record","payload":{"canonical_record":{"source":{"id":"1801.05627","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-01-17T11:48:18Z","cross_cats_sorted":[],"title_canon_sha256":"2e22db49e67c265b1994e601d3debb40356778e3a9835493e39758c6db82d520","abstract_canon_sha256":"0617d272ed28bdbde0b8ef1bf9e5a302541539c8e503e23f79dae9d2c6e1ea99"},"schema_version":"1.0"},"canonical_sha256":"b67a8c7948c7d2965a3f6f32f8fdc1778571c464667de7c575ef731f25df1c1f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:19:34.700069Z","signature_b64":"WxoDiPzyqP9imjY2o+8dAxMr/GFB8apotJjh3NR6o0KCC39cxQXbauyel2N6aJ0rKvVAFybEO+jb4ko95easDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b67a8c7948c7d2965a3f6f32f8fdc1778571c464667de7c575ef731f25df1c1f","last_reissued_at":"2026-05-18T00:19:34.699624Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:19:34.699624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1801.05627","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:19:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wdEXXTzAkcTri3avvqnRDpnSJ7UD9wLwXAzjio6N1a9Dhvg3ZvYrE4fKwqY/alQ7zkVuhkNIeRGgrMD7jqP/AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T00:08:43.513659Z"},"content_sha256":"0539e1d514b890501fd121cff6c23159d795241adb0a8c9f6b0f24dfbda12725","schema_version":"1.0","event_id":"sha256:0539e1d514b890501fd121cff6c23159d795241adb0a8c9f6b0f24dfbda12725"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:WZ5IY6KIY7JJMWR7N4ZPR7OBO6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"On the Reduction of Biases in Big Data Sets for the Detection of Irregular Power Usage","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Diogo Duarte, Patrick Glauner, Petko Valtchev, Radu State","submitted_at":"2018-01-17T11:48:18Z","abstract_excerpt":"In machine learning, a bias occurs whenever training sets are not representative for the test data, which results in unreliable models. The most common biases in data are arguably class imbalance and covariate shift. In this work, we aim to shed light on this topic in order to increase the overall attention to this issue in the field of machine learning. We propose a scalable novel framework for reducing multiple biases in high-dimensional data sets in order to train more reliable predictors. We apply our methodology to the detection of irregular power usage from real, noisy industrial data. I"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.05627","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:19:34Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fhXY4z6oQk1fr9x0wWNNoFwLQFS/PS1pznIuf3XWXPMUkCQvSJQJq5/ow/77oMNwH+FWjnOkLGzdCfXELjpcCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T00:08:43.514009Z"},"content_sha256":"2dc18405978c433cc19bbf6185321e5cb191dba4bf4e8e831f627aae5348b1d7","schema_version":"1.0","event_id":"sha256:2dc18405978c433cc19bbf6185321e5cb191dba4bf4e8e831f627aae5348b1d7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6/bundle.json","state_url":"https://pith.science/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T00:08:43Z","links":{"resolver":"https://pith.science/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6","bundle":"https://pith.science/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6/bundle.json","state":"https://pith.science/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WZ5IY6KIY7JJMWR7N4ZPR7OBO6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:WZ5IY6KIY7JJMWR7N4ZPR7OBO6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0617d272ed28bdbde0b8ef1bf9e5a302541539c8e503e23f79dae9d2c6e1ea99","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-01-17T11:48:18Z","title_canon_sha256":"2e22db49e67c265b1994e601d3debb40356778e3a9835493e39758c6db82d520"},"schema_version":"1.0","source":{"id":"1801.05627","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1801.05627","created_at":"2026-05-18T00:19:34Z"},{"alias_kind":"arxiv_version","alias_value":"1801.05627v2","created_at":"2026-05-18T00:19:34Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.05627","created_at":"2026-05-18T00:19:34Z"},{"alias_kind":"pith_short_12","alias_value":"WZ5IY6KIY7JJ","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_16","alias_value":"WZ5IY6KIY7JJMWR7","created_at":"2026-05-18T12:33:01Z"},{"alias_kind":"pith_short_8","alias_value":"WZ5IY6KI","created_at":"2026-05-18T12:33:01Z"}],"graph_snapshots":[{"event_id":"sha256:2dc18405978c433cc19bbf6185321e5cb191dba4bf4e8e831f627aae5348b1d7","target":"graph","created_at":"2026-05-18T00:19:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In machine learning, a bias occurs whenever training sets are not representative for the test data, which results in unreliable models. The most common biases in data are arguably class imbalance and covariate shift. In this work, we aim to shed light on this topic in order to increase the overall attention to this issue in the field of machine learning. We propose a scalable novel framework for reducing multiple biases in high-dimensional data sets in order to train more reliable predictors. We apply our methodology to the detection of irregular power usage from real, noisy industrial data. I","authors_text":"Diogo Duarte, Patrick Glauner, Petko Valtchev, Radu State","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-01-17T11:48:18Z","title":"On the Reduction of Biases in Big Data Sets for the Detection of Irregular Power Usage"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.05627","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0539e1d514b890501fd121cff6c23159d795241adb0a8c9f6b0f24dfbda12725","target":"record","created_at":"2026-05-18T00:19:34Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0617d272ed28bdbde0b8ef1bf9e5a302541539c8e503e23f79dae9d2c6e1ea99","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-01-17T11:48:18Z","title_canon_sha256":"2e22db49e67c265b1994e601d3debb40356778e3a9835493e39758c6db82d520"},"schema_version":"1.0","source":{"id":"1801.05627","kind":"arxiv","version":2}},"canonical_sha256":"b67a8c7948c7d2965a3f6f32f8fdc1778571c464667de7c575ef731f25df1c1f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b67a8c7948c7d2965a3f6f32f8fdc1778571c464667de7c575ef731f25df1c1f","first_computed_at":"2026-05-18T00:19:34.699624Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:19:34.699624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"WxoDiPzyqP9imjY2o+8dAxMr/GFB8apotJjh3NR6o0KCC39cxQXbauyel2N6aJ0rKvVAFybEO+jb4ko95easDQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:19:34.700069Z","signed_message":"canonical_sha256_bytes"},"source_id":"1801.05627","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0539e1d514b890501fd121cff6c23159d795241adb0a8c9f6b0f24dfbda12725","sha256:2dc18405978c433cc19bbf6185321e5cb191dba4bf4e8e831f627aae5348b1d7"],"state_sha256":"44fa8f96c774a74cb2e9b6f160dcdd917bb32196d36ca3498ec6d7c53af3a261"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"19YWIf43gMs2VqSrerDjDFauYkWTTcwo2FPM3XNjiT5cENE75OVnPNB6JhoqPSQso+4BM1wSGOvlbkyxJdfSBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T00:08:43.516014Z","bundle_sha256":"ba25edd99c341343dea81acac172f6ecf2e6c03877dd4b10c24eeb5154c98b2d"}}