{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:IJBVV6ZHPZWF7FNJOLFVLHUCWE","short_pith_number":"pith:IJBVV6ZH","canonical_record":{"source":{"id":"1702.01015","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2017-02-03T14:17:02Z","cross_cats_sorted":["cs.DB"],"title_canon_sha256":"330de2313cf6d3b0b3b87fbf5a4ddc7fad58db8d7636efe748c3ce08cdfe1473","abstract_canon_sha256":"c6e8cf1001f6bf82880bc9268f3e562395cf7dd33d2e55fa3111aad2accc8905"},"schema_version":"1.0"},"canonical_sha256":"42435afb277e6c5f95a972cb559e82b123d2a29729f905768e47f7661e705574","source":{"kind":"arxiv","id":"1702.01015","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1702.01015","created_at":"2026-05-18T00:51:29Z"},{"alias_kind":"arxiv_version","alias_value":"1702.01015v1","created_at":"2026-05-18T00:51:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.01015","created_at":"2026-05-18T00:51:29Z"},{"alias_kind":"pith_short_12","alias_value":"IJBVV6ZHPZWF","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_16","alias_value":"IJBVV6ZHPZWF7FNJ","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_8","alias_value":"IJBVV6ZH","created_at":"2026-05-18T12:31:21Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:IJBVV6ZHPZWF7FNJOLFVLHUCWE","target":"record","payload":{"canonical_record":{"source":{"id":"1702.01015","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2017-02-03T14:17:02Z","cross_cats_sorted":["cs.DB"],"title_canon_sha256":"330de2313cf6d3b0b3b87fbf5a4ddc7fad58db8d7636efe748c3ce08cdfe1473","abstract_canon_sha256":"c6e8cf1001f6bf82880bc9268f3e562395cf7dd33d2e55fa3111aad2accc8905"},"schema_version":"1.0"},"canonical_sha256":"42435afb277e6c5f95a972cb559e82b123d2a29729f905768e47f7661e705574","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:51:29.198730Z","signature_b64":"BqRkeGo/PcRCYstyyRIFAzx2eojR2Bgf7uzn+19edn5cPTo+AqkBshseCTvOyZikR4Ben7SKGTxbsZgWYlu/Aw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"42435afb277e6c5f95a972cb559e82b123d2a29729f905768e47f7661e705574","last_reissued_at":"2026-05-18T00:51:29.198261Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:51:29.198261Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1702.01015","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:51:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Mp+U04PYXnI7W9lau1Wbgppw0r+0VmImQyScZyCgqL4tjQh+PAbQG8+TaMiiSfgvUT6g0eMTGZcdYNBSnRDeCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T16:15:47.414872Z"},"content_sha256":"fd10e64e752025a14c0b6fb1dfee9e1870d4fc23dda64442a0d0c8ba2b10af6d","schema_version":"1.0","event_id":"sha256:fd10e64e752025a14c0b6fb1dfee9e1870d4fc23dda64442a0d0c8ba2b10af6d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:IJBVV6ZHPZWF7FNJOLFVLHUCWE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"ArchiveSpark: Efficient Web Archive Access, Extraction and Derivation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DB"],"primary_cat":"cs.DL","authors_text":"Avishek Anand, Helge Holzmann, Vinay Goel","submitted_at":"2017-02-03T14:17:02Z","abstract_excerpt":"Web archives are a valuable resource for researchers of various disciplines. However, to use them as a scholarly source, researchers require a tool that provides efficient access to Web archive data for extraction and derivation of smaller datasets. Besides efficient access we identify five other objectives based on practical researcher needs such as ease of use, extensibility and reusability.\n  Towards these objectives we propose ArchiveSpark, a framework for efficient, distributed Web archive processing that builds a research corpus by working on existing and standardized data formats common"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.01015","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:51:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I4Jt0luV9Kl2P4eWCSB/rE50HcECkPFRk8jFR5dWMz9f0KSbP1amJj0YOVQfIp0XrgiMZdOefUSkAOx7rJTiAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T16:15:47.415229Z"},"content_sha256":"491857852a7e95da28e487fb6a06f676a2df3dee8bd902ead32ae27824fe7db8","schema_version":"1.0","event_id":"sha256:491857852a7e95da28e487fb6a06f676a2df3dee8bd902ead32ae27824fe7db8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE/bundle.json","state_url":"https://pith.science/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T16:15:47Z","links":{"resolver":"https://pith.science/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE","bundle":"https://pith.science/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE/bundle.json","state":"https://pith.science/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IJBVV6ZHPZWF7FNJOLFVLHUCWE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:IJBVV6ZHPZWF7FNJOLFVLHUCWE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c6e8cf1001f6bf82880bc9268f3e562395cf7dd33d2e55fa3111aad2accc8905","cross_cats_sorted":["cs.DB"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2017-02-03T14:17:02Z","title_canon_sha256":"330de2313cf6d3b0b3b87fbf5a4ddc7fad58db8d7636efe748c3ce08cdfe1473"},"schema_version":"1.0","source":{"id":"1702.01015","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1702.01015","created_at":"2026-05-18T00:51:29Z"},{"alias_kind":"arxiv_version","alias_value":"1702.01015v1","created_at":"2026-05-18T00:51:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.01015","created_at":"2026-05-18T00:51:29Z"},{"alias_kind":"pith_short_12","alias_value":"IJBVV6ZHPZWF","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_16","alias_value":"IJBVV6ZHPZWF7FNJ","created_at":"2026-05-18T12:31:21Z"},{"alias_kind":"pith_short_8","alias_value":"IJBVV6ZH","created_at":"2026-05-18T12:31:21Z"}],"graph_snapshots":[{"event_id":"sha256:491857852a7e95da28e487fb6a06f676a2df3dee8bd902ead32ae27824fe7db8","target":"graph","created_at":"2026-05-18T00:51:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Web archives are a valuable resource for researchers of various disciplines. However, to use them as a scholarly source, researchers require a tool that provides efficient access to Web archive data for extraction and derivation of smaller datasets. Besides efficient access we identify five other objectives based on practical researcher needs such as ease of use, extensibility and reusability.\n  Towards these objectives we propose ArchiveSpark, a framework for efficient, distributed Web archive processing that builds a research corpus by working on existing and standardized data formats common","authors_text":"Avishek Anand, Helge Holzmann, Vinay Goel","cross_cats":["cs.DB"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2017-02-03T14:17:02Z","title":"ArchiveSpark: Efficient Web Archive Access, Extraction and Derivation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.01015","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fd10e64e752025a14c0b6fb1dfee9e1870d4fc23dda64442a0d0c8ba2b10af6d","target":"record","created_at":"2026-05-18T00:51:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c6e8cf1001f6bf82880bc9268f3e562395cf7dd33d2e55fa3111aad2accc8905","cross_cats_sorted":["cs.DB"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DL","submitted_at":"2017-02-03T14:17:02Z","title_canon_sha256":"330de2313cf6d3b0b3b87fbf5a4ddc7fad58db8d7636efe748c3ce08cdfe1473"},"schema_version":"1.0","source":{"id":"1702.01015","kind":"arxiv","version":1}},"canonical_sha256":"42435afb277e6c5f95a972cb559e82b123d2a29729f905768e47f7661e705574","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"42435afb277e6c5f95a972cb559e82b123d2a29729f905768e47f7661e705574","first_computed_at":"2026-05-18T00:51:29.198261Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:51:29.198261Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"BqRkeGo/PcRCYstyyRIFAzx2eojR2Bgf7uzn+19edn5cPTo+AqkBshseCTvOyZikR4Ben7SKGTxbsZgWYlu/Aw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:51:29.198730Z","signed_message":"canonical_sha256_bytes"},"source_id":"1702.01015","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fd10e64e752025a14c0b6fb1dfee9e1870d4fc23dda64442a0d0c8ba2b10af6d","sha256:491857852a7e95da28e487fb6a06f676a2df3dee8bd902ead32ae27824fe7db8"],"state_sha256":"e3b304fe9ecfd6af59d161f1772c1e409706bd053273907e025bb257f5fc3299"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5DbHqoobE6W6Dct4gy7hDGMFqPzcSWMgp3cvCTNouG/01ufSOblvOA4jihP3KFf4FTvoRokDXEGq15kcS9cWCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T16:15:47.417231Z","bundle_sha256":"02bde2506595aa42349d32e90dfcb6debaf9898c03b4cef682c19abc50fe8a26"}}