{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:NJK2UK654XHEVOU5ZUA32VJQL4","short_pith_number":"pith:NJK2UK65","canonical_record":{"source":{"id":"1810.05784","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-10-13T01:59:49Z","cross_cats_sorted":[],"title_canon_sha256":"5840edbbe0121160ced86dd68e2acfc58fb22660b5b3b7bb43fafa514727108d","abstract_canon_sha256":"63341d6c7a82e2a11bd90c8f404e542ecfa9dff4b3fda92e9af4b82cb27d6e7d"},"schema_version":"1.0"},"canonical_sha256":"6a55aa2bdde5ce4aba9dcd01bd55305f3dbe0cd00c40ea9fb52932444c834338","source":{"kind":"arxiv","id":"1810.05784","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.05784","created_at":"2026-05-18T00:03:26Z"},{"alias_kind":"arxiv_version","alias_value":"1810.05784v1","created_at":"2026-05-18T00:03:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.05784","created_at":"2026-05-18T00:03:26Z"},{"alias_kind":"pith_short_12","alias_value":"NJK2UK654XHE","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NJK2UK654XHEVOU5","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NJK2UK65","created_at":"2026-05-18T12:32:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:NJK2UK654XHEVOU5ZUA32VJQL4","target":"record","payload":{"canonical_record":{"source":{"id":"1810.05784","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-10-13T01:59:49Z","cross_cats_sorted":[],"title_canon_sha256":"5840edbbe0121160ced86dd68e2acfc58fb22660b5b3b7bb43fafa514727108d","abstract_canon_sha256":"63341d6c7a82e2a11bd90c8f404e542ecfa9dff4b3fda92e9af4b82cb27d6e7d"},"schema_version":"1.0"},"canonical_sha256":"6a55aa2bdde5ce4aba9dcd01bd55305f3dbe0cd00c40ea9fb52932444c834338","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:03:26.957842Z","signature_b64":"Iht0hHK38DL1g1J5ZQ563PS/1GluB3xqt10a1ZUSdgQZjpYycjm+AROudHaBd6O2XFTwTG2+t8jbgKAmQ9BvCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6a55aa2bdde5ce4aba9dcd01bd55305f3dbe0cd00c40ea9fb52932444c834338","last_reissued_at":"2026-05-18T00:03:26.957435Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:03:26.957435Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1810.05784","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:03:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UiThAa8+4lF3XHi3RrWi8TfExH1mPtyYuKrrsNoQvkJGLUA1qocUK2iHZiy83ssvXIYIqy+jGFpBIzK/h0E+Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T21:09:10.947907Z"},"content_sha256":"2f68654b45c1438a10510e157627ef28f906d80c6ff29c06212b958012874e0c","schema_version":"1.0","event_id":"sha256:2f68654b45c1438a10510e157627ef28f906d80c6ff29c06212b958012874e0c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:NJK2UK654XHEVOU5ZUA32VJQL4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Measuring Swampiness: Quantifying Chaos in Large Heterogeneous Data Repositories","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Aaron Elmore, Brendan Whitaker, Kyle Chard, Luann Jung","submitted_at":"2018-10-13T01:59:49Z","abstract_excerpt":"As scientific data repositories and filesystems grow in size and complexity, they become increasingly disorganized. The coupling of massive quantities of data with poor organization makes it challenging for scientists to locate and utilize relevant data, thus slowing the process of analyzing data of interest. To address these issues, we explore an automated clustering approach for quantifying the organization of data repositories. Our parallel pipeline processes heterogeneous filetypes (e.g., text and tabular data), automatically clusters files based on content and metadata similarities, and c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.05784","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:03:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lo8J3K8kHuA5Z3DhNE0OHAjNRNxG0J9QJt58MPHjSAZuh+/ZpcQiW7Gd1id27Dt84fnOH8G23TsKC+kcGsqPDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T21:09:10.948260Z"},"content_sha256":"0e7e56c01b92eee845812004e2ca079391ef1b6107a78317057f86e06824ae10","schema_version":"1.0","event_id":"sha256:0e7e56c01b92eee845812004e2ca079391ef1b6107a78317057f86e06824ae10"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NJK2UK654XHEVOU5ZUA32VJQL4/bundle.json","state_url":"https://pith.science/pith/NJK2UK654XHEVOU5ZUA32VJQL4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NJK2UK654XHEVOU5ZUA32VJQL4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T21:09:10Z","links":{"resolver":"https://pith.science/pith/NJK2UK654XHEVOU5ZUA32VJQL4","bundle":"https://pith.science/pith/NJK2UK654XHEVOU5ZUA32VJQL4/bundle.json","state":"https://pith.science/pith/NJK2UK654XHEVOU5ZUA32VJQL4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NJK2UK654XHEVOU5ZUA32VJQL4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:NJK2UK654XHEVOU5ZUA32VJQL4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"63341d6c7a82e2a11bd90c8f404e542ecfa9dff4b3fda92e9af4b82cb27d6e7d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-10-13T01:59:49Z","title_canon_sha256":"5840edbbe0121160ced86dd68e2acfc58fb22660b5b3b7bb43fafa514727108d"},"schema_version":"1.0","source":{"id":"1810.05784","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1810.05784","created_at":"2026-05-18T00:03:26Z"},{"alias_kind":"arxiv_version","alias_value":"1810.05784v1","created_at":"2026-05-18T00:03:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1810.05784","created_at":"2026-05-18T00:03:26Z"},{"alias_kind":"pith_short_12","alias_value":"NJK2UK654XHE","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_16","alias_value":"NJK2UK654XHEVOU5","created_at":"2026-05-18T12:32:40Z"},{"alias_kind":"pith_short_8","alias_value":"NJK2UK65","created_at":"2026-05-18T12:32:40Z"}],"graph_snapshots":[{"event_id":"sha256:0e7e56c01b92eee845812004e2ca079391ef1b6107a78317057f86e06824ae10","target":"graph","created_at":"2026-05-18T00:03:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"As scientific data repositories and filesystems grow in size and complexity, they become increasingly disorganized. The coupling of massive quantities of data with poor organization makes it challenging for scientists to locate and utilize relevant data, thus slowing the process of analyzing data of interest. To address these issues, we explore an automated clustering approach for quantifying the organization of data repositories. Our parallel pipeline processes heterogeneous filetypes (e.g., text and tabular data), automatically clusters files based on content and metadata similarities, and c","authors_text":"Aaron Elmore, Brendan Whitaker, Kyle Chard, Luann Jung","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-10-13T01:59:49Z","title":"Measuring Swampiness: Quantifying Chaos in Large Heterogeneous Data Repositories"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1810.05784","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2f68654b45c1438a10510e157627ef28f906d80c6ff29c06212b958012874e0c","target":"record","created_at":"2026-05-18T00:03:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"63341d6c7a82e2a11bd90c8f404e542ecfa9dff4b3fda92e9af4b82cb27d6e7d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2018-10-13T01:59:49Z","title_canon_sha256":"5840edbbe0121160ced86dd68e2acfc58fb22660b5b3b7bb43fafa514727108d"},"schema_version":"1.0","source":{"id":"1810.05784","kind":"arxiv","version":1}},"canonical_sha256":"6a55aa2bdde5ce4aba9dcd01bd55305f3dbe0cd00c40ea9fb52932444c834338","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6a55aa2bdde5ce4aba9dcd01bd55305f3dbe0cd00c40ea9fb52932444c834338","first_computed_at":"2026-05-18T00:03:26.957435Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:03:26.957435Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Iht0hHK38DL1g1J5ZQ563PS/1GluB3xqt10a1ZUSdgQZjpYycjm+AROudHaBd6O2XFTwTG2+t8jbgKAmQ9BvCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:03:26.957842Z","signed_message":"canonical_sha256_bytes"},"source_id":"1810.05784","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2f68654b45c1438a10510e157627ef28f906d80c6ff29c06212b958012874e0c","sha256:0e7e56c01b92eee845812004e2ca079391ef1b6107a78317057f86e06824ae10"],"state_sha256":"a6c1169838f3ef385196bf056a5edc96f0a57dd534c3b94f73771c0c496b538b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7Yh88GXB3X+1+2y1AnHrnfYth1pzqK8f69Ah8wo3ODBGWu27PMMO1W7kfHtoHOJrCsWiMY//4VZPx/FSnV5iAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T21:09:10.950377Z","bundle_sha256":"3120bf7e36408c0e476790575f8e9f47798654353020455939eb8c16f7f4a38e"}}