{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:STF244AREI5N6RVDI4M2OE2O5F","short_pith_number":"pith:STF244AR","canonical_record":{"source":{"id":"1803.00897","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T15:35:18Z","cross_cats_sorted":[],"title_canon_sha256":"306c6c61a99d65f06507642eb4314f0d2e2a633fb7e8f959af6387c2dda59955","abstract_canon_sha256":"ceafb7e3c85d48c6ac60095953d083355f1636b6aa94ab32d149556c04af6196"},"schema_version":"1.0"},"canonical_sha256":"94cbae7011223adf46a34719a7134ee964ac297790b54c4cb4ea453271b30a46","source":{"kind":"arxiv","id":"1803.00897","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00897","created_at":"2026-05-18T00:22:07Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00897v1","created_at":"2026-05-18T00:22:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00897","created_at":"2026-05-18T00:22:07Z"},{"alias_kind":"pith_short_12","alias_value":"STF244AREI5N","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"STF244AREI5N6RVD","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"STF244AR","created_at":"2026-05-18T12:32:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:STF244AREI5N6RVDI4M2OE2O5F","target":"record","payload":{"canonical_record":{"source":{"id":"1803.00897","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T15:35:18Z","cross_cats_sorted":[],"title_canon_sha256":"306c6c61a99d65f06507642eb4314f0d2e2a633fb7e8f959af6387c2dda59955","abstract_canon_sha256":"ceafb7e3c85d48c6ac60095953d083355f1636b6aa94ab32d149556c04af6196"},"schema_version":"1.0"},"canonical_sha256":"94cbae7011223adf46a34719a7134ee964ac297790b54c4cb4ea453271b30a46","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:22:07.555618Z","signature_b64":"Xil3SHD6EIafIvFFs3lGsJJ801kM36fcLZv//jR5VOowyhEcBJwrksDuLct+UcQ1cV+DgMMD6txKWCtTBJCkBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"94cbae7011223adf46a34719a7134ee964ac297790b54c4cb4ea453271b30a46","last_reissued_at":"2026-05-18T00:22:07.555041Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:22:07.555041Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.00897","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:22:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iJzY498lJHol3LzQad3O/isvtdanONX40krzvm7cR4SRNNNuEFNMJsLsUib3ZFT6CycgPpoOIug0/qV1PAqmCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T14:42:22.828911Z"},"content_sha256":"7295976179dd77e6edf02552d8c5754f91e9659e9950f031269d7402a2fe1983","schema_version":"1.0","event_id":"sha256:7295976179dd77e6edf02552d8c5754f91e9659e9950f031269d7402a2fe1983"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:STF244AREI5N6RVDI4M2OE2O5F","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Impact of Biases in Big Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Patrick Glauner, Petko Valtchev, Radu State","submitted_at":"2018-03-02T15:35:18Z","abstract_excerpt":"The underlying paradigm of big data-driven machine learning reflects the desire of deriving better conclusions from simply analyzing more data, without the necessity of looking at theory and models. Is having simply more data always helpful? In 1936, The Literary Digest collected 2.3M filled in questionnaires to predict the outcome of that year's US presidential election. The outcome of this big data prediction proved to be entirely wrong, whereas George Gallup only needed 3K handpicked people to make an accurate prediction. Generally, biases occur in machine learning whenever the distribution"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00897","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:22:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9hAhePpMJSHGSMOyNzb1BNiCiwBLIy0IPrxv+5PvnQku2K9jq14+3hQJBBqSVb/Wvl2KdmLaNK5kgz2JvtwdDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-01T14:42:22.829249Z"},"content_sha256":"053fdf6df20712a665df4cf4f97c90c0d38a7a58510713c671fd7a181a16c286","schema_version":"1.0","event_id":"sha256:053fdf6df20712a665df4cf4f97c90c0d38a7a58510713c671fd7a181a16c286"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/STF244AREI5N6RVDI4M2OE2O5F/bundle.json","state_url":"https://pith.science/pith/STF244AREI5N6RVDI4M2OE2O5F/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/STF244AREI5N6RVDI4M2OE2O5F/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-01T14:42:22Z","links":{"resolver":"https://pith.science/pith/STF244AREI5N6RVDI4M2OE2O5F","bundle":"https://pith.science/pith/STF244AREI5N6RVDI4M2OE2O5F/bundle.json","state":"https://pith.science/pith/STF244AREI5N6RVDI4M2OE2O5F/state.json","well_known_bundle":"https://pith.science/.well-known/pith/STF244AREI5N6RVDI4M2OE2O5F/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:STF244AREI5N6RVDI4M2OE2O5F","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ceafb7e3c85d48c6ac60095953d083355f1636b6aa94ab32d149556c04af6196","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T15:35:18Z","title_canon_sha256":"306c6c61a99d65f06507642eb4314f0d2e2a633fb7e8f959af6387c2dda59955"},"schema_version":"1.0","source":{"id":"1803.00897","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.00897","created_at":"2026-05-18T00:22:07Z"},{"alias_kind":"arxiv_version","alias_value":"1803.00897v1","created_at":"2026-05-18T00:22:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.00897","created_at":"2026-05-18T00:22:07Z"},{"alias_kind":"pith_short_12","alias_value":"STF244AREI5N","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"STF244AREI5N6RVD","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"STF244AR","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:053fdf6df20712a665df4cf4f97c90c0d38a7a58510713c671fd7a181a16c286","target":"graph","created_at":"2026-05-18T00:22:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The underlying paradigm of big data-driven machine learning reflects the desire of deriving better conclusions from simply analyzing more data, without the necessity of looking at theory and models. Is having simply more data always helpful? In 1936, The Literary Digest collected 2.3M filled in questionnaires to predict the outcome of that year's US presidential election. The outcome of this big data prediction proved to be entirely wrong, whereas George Gallup only needed 3K handpicked people to make an accurate prediction. Generally, biases occur in machine learning whenever the distribution","authors_text":"Patrick Glauner, Petko Valtchev, Radu State","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T15:35:18Z","title":"Impact of Biases in Big Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.00897","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7295976179dd77e6edf02552d8c5754f91e9659e9950f031269d7402a2fe1983","target":"record","created_at":"2026-05-18T00:22:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ceafb7e3c85d48c6ac60095953d083355f1636b6aa94ab32d149556c04af6196","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-02T15:35:18Z","title_canon_sha256":"306c6c61a99d65f06507642eb4314f0d2e2a633fb7e8f959af6387c2dda59955"},"schema_version":"1.0","source":{"id":"1803.00897","kind":"arxiv","version":1}},"canonical_sha256":"94cbae7011223adf46a34719a7134ee964ac297790b54c4cb4ea453271b30a46","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"94cbae7011223adf46a34719a7134ee964ac297790b54c4cb4ea453271b30a46","first_computed_at":"2026-05-18T00:22:07.555041Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:22:07.555041Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Xil3SHD6EIafIvFFs3lGsJJ801kM36fcLZv//jR5VOowyhEcBJwrksDuLct+UcQ1cV+DgMMD6txKWCtTBJCkBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:22:07.555618Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.00897","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7295976179dd77e6edf02552d8c5754f91e9659e9950f031269d7402a2fe1983","sha256:053fdf6df20712a665df4cf4f97c90c0d38a7a58510713c671fd7a181a16c286"],"state_sha256":"ffa1aee27018a9dd06379dcf20aeed36bb60c15a2c273e3e0fd63167802cd4e9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TaP+sGwQKHc5ScYHEJJaj69AXayU99NwGL6tNj5aMjELk4O+mxTGy0HuCLprL61AAO2LWZ5VuG7pWofk88UTAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-01T14:42:22.831160Z","bundle_sha256":"16725873d8243f0ea7c11e81691db78cd85f10d7eb0b6781ac06c0999181a565"}}