{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:NUMK2L3RMQZHJP264IYGS2FH4N","short_pith_number":"pith:NUMK2L3R","canonical_record":{"source":{"id":"1505.05211","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-19T23:45:05Z","cross_cats_sorted":[],"title_canon_sha256":"6306609e3778caffbec3b99ba29013c2e7edf3b8014337e90e9accc768f393fd","abstract_canon_sha256":"037ae7decde05ebedcaf9772df121b8d74498be4f9774ad46df54355f0fb4d95"},"schema_version":"1.0"},"canonical_sha256":"6d18ad2f71643274bf5ee2306968a7e342138f53460952e047a0751eb55100d8","source":{"kind":"arxiv","id":"1505.05211","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.05211","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"arxiv_version","alias_value":"1505.05211v1","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.05211","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"pith_short_12","alias_value":"NUMK2L3RMQZH","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_16","alias_value":"NUMK2L3RMQZHJP26","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_8","alias_value":"NUMK2L3R","created_at":"2026-05-18T12:29:34Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:NUMK2L3RMQZHJP264IYGS2FH4N","target":"record","payload":{"canonical_record":{"source":{"id":"1505.05211","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-19T23:45:05Z","cross_cats_sorted":[],"title_canon_sha256":"6306609e3778caffbec3b99ba29013c2e7edf3b8014337e90e9accc768f393fd","abstract_canon_sha256":"037ae7decde05ebedcaf9772df121b8d74498be4f9774ad46df54355f0fb4d95"},"schema_version":"1.0"},"canonical_sha256":"6d18ad2f71643274bf5ee2306968a7e342138f53460952e047a0751eb55100d8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:03:59.087304Z","signature_b64":"qiHd+E+8p4QNjrwcJ33vcC4TuEzgL0P8ngtB3SACxIWXhaalwA1GpnnxJ/kkkJFDXQ5UpEVI9man+m7jMTgAAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"6d18ad2f71643274bf5ee2306968a7e342138f53460952e047a0751eb55100d8","last_reissued_at":"2026-05-18T02:03:59.086560Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:03:59.086560Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1505.05211","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:03:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WLgNoJdTmEFmX/B2FfxeiqG5w0LHJ7R8S79RThhi54BXe9mWYNbVYcgI2E96aqaGcPM6VWzLMke50/3tVOyjBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T13:23:48.916659Z"},"content_sha256":"d4f2c4d09f902f5fe0bb93b99a393e1fd5108d01769335e4f06453445cb14b98","schema_version":"1.0","event_id":"sha256:d4f2c4d09f902f5fe0bb93b99a393e1fd5108d01769335e4f06453445cb14b98"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:NUMK2L3RMQZHJP264IYGS2FH4N","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Principles of Dataset Versioning: Exploring the Recreation/Storage Tradeoff","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DB","authors_text":"Aditya Parameswaran, Amit Chavan, Amol Deshpande, Silu Huang, Souvik Bhattacherjee","submitted_at":"2015-05-19T23:45:05Z","abstract_excerpt":"The relative ease of collaborative data science and analysis has led to a proliferation of many thousands or millions of $versions$ of the same datasets in many scientific and commercial domains, acquired or constructed at various stages of data analysis across many users, and often over long periods of time. Managing, storing, and recreating these dataset versions is a non-trivial task. The fundamental challenge here is the $storage-recreation\\;trade-off$: the more storage we use, the faster it is to recreate or retrieve versions, while the less storage we use, the slower it is to recreate or"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.05211","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:03:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4K0hyMQJ+8cqwBFF8VmC/ye22ArqYOfPKu66NzJvWQVbJedgZIDbjdM38Akm/3/bj+em0yvY7dNGBvZQNjQVDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T13:23:48.917244Z"},"content_sha256":"e487866f2311f61d4666f22d0bbdca255de84793ef3d9e4776cc2204f4ae69b3","schema_version":"1.0","event_id":"sha256:e487866f2311f61d4666f22d0bbdca255de84793ef3d9e4776cc2204f4ae69b3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NUMK2L3RMQZHJP264IYGS2FH4N/bundle.json","state_url":"https://pith.science/pith/NUMK2L3RMQZHJP264IYGS2FH4N/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NUMK2L3RMQZHJP264IYGS2FH4N/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-19T13:23:48Z","links":{"resolver":"https://pith.science/pith/NUMK2L3RMQZHJP264IYGS2FH4N","bundle":"https://pith.science/pith/NUMK2L3RMQZHJP264IYGS2FH4N/bundle.json","state":"https://pith.science/pith/NUMK2L3RMQZHJP264IYGS2FH4N/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NUMK2L3RMQZHJP264IYGS2FH4N/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:NUMK2L3RMQZHJP264IYGS2FH4N","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"037ae7decde05ebedcaf9772df121b8d74498be4f9774ad46df54355f0fb4d95","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-19T23:45:05Z","title_canon_sha256":"6306609e3778caffbec3b99ba29013c2e7edf3b8014337e90e9accc768f393fd"},"schema_version":"1.0","source":{"id":"1505.05211","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.05211","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"arxiv_version","alias_value":"1505.05211v1","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.05211","created_at":"2026-05-18T02:03:59Z"},{"alias_kind":"pith_short_12","alias_value":"NUMK2L3RMQZH","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_16","alias_value":"NUMK2L3RMQZHJP26","created_at":"2026-05-18T12:29:34Z"},{"alias_kind":"pith_short_8","alias_value":"NUMK2L3R","created_at":"2026-05-18T12:29:34Z"}],"graph_snapshots":[{"event_id":"sha256:e487866f2311f61d4666f22d0bbdca255de84793ef3d9e4776cc2204f4ae69b3","target":"graph","created_at":"2026-05-18T02:03:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The relative ease of collaborative data science and analysis has led to a proliferation of many thousands or millions of $versions$ of the same datasets in many scientific and commercial domains, acquired or constructed at various stages of data analysis across many users, and often over long periods of time. Managing, storing, and recreating these dataset versions is a non-trivial task. The fundamental challenge here is the $storage-recreation\\;trade-off$: the more storage we use, the faster it is to recreate or retrieve versions, while the less storage we use, the slower it is to recreate or","authors_text":"Aditya Parameswaran, Amit Chavan, Amol Deshpande, Silu Huang, Souvik Bhattacherjee","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-19T23:45:05Z","title":"Principles of Dataset Versioning: Exploring the Recreation/Storage Tradeoff"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.05211","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d4f2c4d09f902f5fe0bb93b99a393e1fd5108d01769335e4f06453445cb14b98","target":"record","created_at":"2026-05-18T02:03:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"037ae7decde05ebedcaf9772df121b8d74498be4f9774ad46df54355f0fb4d95","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DB","submitted_at":"2015-05-19T23:45:05Z","title_canon_sha256":"6306609e3778caffbec3b99ba29013c2e7edf3b8014337e90e9accc768f393fd"},"schema_version":"1.0","source":{"id":"1505.05211","kind":"arxiv","version":1}},"canonical_sha256":"6d18ad2f71643274bf5ee2306968a7e342138f53460952e047a0751eb55100d8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"6d18ad2f71643274bf5ee2306968a7e342138f53460952e047a0751eb55100d8","first_computed_at":"2026-05-18T02:03:59.086560Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:03:59.086560Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"qiHd+E+8p4QNjrwcJ33vcC4TuEzgL0P8ngtB3SACxIWXhaalwA1GpnnxJ/kkkJFDXQ5UpEVI9man+m7jMTgAAg==","signature_status":"signed_v1","signed_at":"2026-05-18T02:03:59.087304Z","signed_message":"canonical_sha256_bytes"},"source_id":"1505.05211","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d4f2c4d09f902f5fe0bb93b99a393e1fd5108d01769335e4f06453445cb14b98","sha256:e487866f2311f61d4666f22d0bbdca255de84793ef3d9e4776cc2204f4ae69b3"],"state_sha256":"17d2bce072f4dae1e2c30819b9b16f414a62a13ed15166de04f5b06aabc2c918"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DFZAq5fC0hUH03gkxumCjbauxc2KW2fAurGdIcbUcM5axcfooktnqf2JXWCEj+KkLi2hUfSh6Z4Io8FoZk7kDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-19T13:23:48.919513Z","bundle_sha256":"cbeffd7c97cedcb3f808bf86df737afe64e964aa67600f0bdeb0bc2991e85723"}}