{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:SQX4BEE2AQNVLZONS6AB2EQVC7","short_pith_number":"pith:SQX4BEE2","canonical_record":{"source":{"id":"1811.11242","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-11-27T20:26:33Z","cross_cats_sorted":[],"title_canon_sha256":"0ae5f91c09b3ddf9e97394e7553b864af67a791fbd1503ed31d3aff4a4f9e428","abstract_canon_sha256":"56644f63c279649d2917e700bd97ecce58f9f37041bedec7540c4df7d463ced0"},"schema_version":"1.0"},"canonical_sha256":"942fc0909a041b55e5cd97801d121517e246b4665e8319d368ecb8934962c216","source":{"kind":"arxiv","id":"1811.11242","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.11242","created_at":"2026-05-17T23:39:32Z"},{"alias_kind":"arxiv_version","alias_value":"1811.11242v1","created_at":"2026-05-17T23:39:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.11242","created_at":"2026-05-17T23:39:32Z"},{"alias_kind":"pith_short_12","alias_value":"SQX4BEE2AQNV","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"SQX4BEE2AQNVLZON","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"SQX4BEE2","created_at":"2026-05-18T12:32:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:SQX4BEE2AQNVLZONS6AB2EQVC7","target":"record","payload":{"canonical_record":{"source":{"id":"1811.11242","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-11-27T20:26:33Z","cross_cats_sorted":[],"title_canon_sha256":"0ae5f91c09b3ddf9e97394e7553b864af67a791fbd1503ed31d3aff4a4f9e428","abstract_canon_sha256":"56644f63c279649d2917e700bd97ecce58f9f37041bedec7540c4df7d463ced0"},"schema_version":"1.0"},"canonical_sha256":"942fc0909a041b55e5cd97801d121517e246b4665e8319d368ecb8934962c216","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:32.027283Z","signature_b64":"EVDxzdoA0rspnJK7tnrWFHiE93HgavUBiOqnwLxbPyNMNWUc6EA0O5YEHBCs3xiNHwzWauGyifBY/9nugBitAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"942fc0909a041b55e5cd97801d121517e246b4665e8319d368ecb8934962c216","last_reissued_at":"2026-05-17T23:39:32.026549Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:32.026549Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.11242","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fjRskhgiNMj+0aoC0BuKWYHVevg2K9BjVpAL56zKp5qSEJ3UspPZpmhokmdvCoeXrF3z12sfLJsWGgRZiAjADw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T22:49:24.452507Z"},"content_sha256":"33fa7544a878a5e4f779f75aff0255e6175267204860ebdedae9b4845f34b4a4","schema_version":"1.0","event_id":"sha256:33fa7544a878a5e4f779f75aff0255e6175267204860ebdedae9b4845f34b4a4"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:SQX4BEE2AQNVLZONS6AB2EQVC7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Wrangling Messy CSV Files by Detecting Row and Type Patterns","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Alfredo Nazabal, Charles Sutton, Gerrit J.J. van den Burg","submitted_at":"2018-11-27T20:26:33Z","abstract_excerpt":"It is well known that data scientists spend the majority of their time on preparing data for analysis. One of the first steps in this preparation phase is to load the data from the raw storage format. Comma-separated value (CSV) files are a popular format for tabular data due to their simplicity and ostensible ease of use. However, formatting standards for CSV files are not followed consistently, so each file requires manual inspection and potentially repair before the data can be loaded, an enormous waste of human effort for a task that should be one of the simplest parts of data science. The"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.11242","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"j5ONvFHAqizYgYR2HvfuDzuQG2AquwP99byZe8oHmRNk9TwHgfQeKHpPPvlA9NZDdooISn0xZ0BzBEqnTkslDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T22:49:24.452876Z"},"content_sha256":"9e6e20a02b23782fc880e99685df0d8e0b1c2c051d58b8669af549b4e1c104fe","schema_version":"1.0","event_id":"sha256:9e6e20a02b23782fc880e99685df0d8e0b1c2c051d58b8669af549b4e1c104fe"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/SQX4BEE2AQNVLZONS6AB2EQVC7/bundle.json","state_url":"https://pith.science/pith/SQX4BEE2AQNVLZONS6AB2EQVC7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/SQX4BEE2AQNVLZONS6AB2EQVC7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T22:49:24Z","links":{"resolver":"https://pith.science/pith/SQX4BEE2AQNVLZONS6AB2EQVC7","bundle":"https://pith.science/pith/SQX4BEE2AQNVLZONS6AB2EQVC7/bundle.json","state":"https://pith.science/pith/SQX4BEE2AQNVLZONS6AB2EQVC7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/SQX4BEE2AQNVLZONS6AB2EQVC7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:SQX4BEE2AQNVLZONS6AB2EQVC7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"56644f63c279649d2917e700bd97ecce58f9f37041bedec7540c4df7d463ced0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-11-27T20:26:33Z","title_canon_sha256":"0ae5f91c09b3ddf9e97394e7553b864af67a791fbd1503ed31d3aff4a4f9e428"},"schema_version":"1.0","source":{"id":"1811.11242","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.11242","created_at":"2026-05-17T23:39:32Z"},{"alias_kind":"arxiv_version","alias_value":"1811.11242v1","created_at":"2026-05-17T23:39:32Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.11242","created_at":"2026-05-17T23:39:32Z"},{"alias_kind":"pith_short_12","alias_value":"SQX4BEE2AQNV","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_16","alias_value":"SQX4BEE2AQNVLZON","created_at":"2026-05-18T12:32:53Z"},{"alias_kind":"pith_short_8","alias_value":"SQX4BEE2","created_at":"2026-05-18T12:32:53Z"}],"graph_snapshots":[{"event_id":"sha256:9e6e20a02b23782fc880e99685df0d8e0b1c2c051d58b8669af549b4e1c104fe","target":"graph","created_at":"2026-05-17T23:39:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"It is well known that data scientists spend the majority of their time on preparing data for analysis. One of the first steps in this preparation phase is to load the data from the raw storage format. Comma-separated value (CSV) files are a popular format for tabular data due to their simplicity and ostensible ease of use. However, formatting standards for CSV files are not followed consistently, so each file requires manual inspection and potentially repair before the data can be loaded, an enormous waste of human effort for a task that should be one of the simplest parts of data science. The","authors_text":"Alfredo Nazabal, Charles Sutton, Gerrit J.J. van den Burg","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-11-27T20:26:33Z","title":"Wrangling Messy CSV Files by Detecting Row and Type Patterns"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.11242","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:33fa7544a878a5e4f779f75aff0255e6175267204860ebdedae9b4845f34b4a4","target":"record","created_at":"2026-05-17T23:39:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"56644f63c279649d2917e700bd97ecce58f9f37041bedec7540c4df7d463ced0","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2018-11-27T20:26:33Z","title_canon_sha256":"0ae5f91c09b3ddf9e97394e7553b864af67a791fbd1503ed31d3aff4a4f9e428"},"schema_version":"1.0","source":{"id":"1811.11242","kind":"arxiv","version":1}},"canonical_sha256":"942fc0909a041b55e5cd97801d121517e246b4665e8319d368ecb8934962c216","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"942fc0909a041b55e5cd97801d121517e246b4665e8319d368ecb8934962c216","first_computed_at":"2026-05-17T23:39:32.026549Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:32.026549Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EVDxzdoA0rspnJK7tnrWFHiE93HgavUBiOqnwLxbPyNMNWUc6EA0O5YEHBCs3xiNHwzWauGyifBY/9nugBitAw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:32.027283Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.11242","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:33fa7544a878a5e4f779f75aff0255e6175267204860ebdedae9b4845f34b4a4","sha256:9e6e20a02b23782fc880e99685df0d8e0b1c2c051d58b8669af549b4e1c104fe"],"state_sha256":"30b2c788ff6f3a07deb578d4e53c63002ab8eab7a017570c52d96f49c3518a79"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MjEKsLeyY7L4FwGiP06bCpCQtbrf6MGQQoc7bT/CUV2jpU2HgpyaZz9nGxZOhmcWQS0YwsWwHho3OIV7yFf6DA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T22:49:24.455764Z","bundle_sha256":"09687d4ac409904ae7687c2c7fc6d4a68a2a54883daf865cce2b8a9c5b6acf33"}}