{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:WAX72TXR5E52JWMVD45DMVAWRI","short_pith_number":"pith:WAX72TXR","canonical_record":{"source":{"id":"1712.04146","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-12-12T06:49:28Z","cross_cats_sorted":["cs.DS","physics.data-an","stat.ML"],"title_canon_sha256":"15e9c9f9d499de9c0426ab7eb0361fc12f33d98c54a47f83ac5ee06bf935a27e","abstract_canon_sha256":"7369a9a2af7e495221d2581cb05132a71240844a7a767bc703e15f55616e7407"},"schema_version":"1.0"},"canonical_sha256":"b02ffd4ef1e93ba4d9951f3a3654168a076ceeeda4cede155aa03566d21b5d4c","source":{"kind":"arxiv","id":"1712.04146","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.04146","created_at":"2026-05-17T23:43:53Z"},{"alias_kind":"arxiv_version","alias_value":"1712.04146v2","created_at":"2026-05-17T23:43:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.04146","created_at":"2026-05-17T23:43:53Z"},{"alias_kind":"pith_short_12","alias_value":"WAX72TXR5E52","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_16","alias_value":"WAX72TXR5E52JWMV","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_8","alias_value":"WAX72TXR","created_at":"2026-05-18T12:31:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:WAX72TXR5E52JWMVD45DMVAWRI","target":"record","payload":{"canonical_record":{"source":{"id":"1712.04146","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-12-12T06:49:28Z","cross_cats_sorted":["cs.DS","physics.data-an","stat.ML"],"title_canon_sha256":"15e9c9f9d499de9c0426ab7eb0361fc12f33d98c54a47f83ac5ee06bf935a27e","abstract_canon_sha256":"7369a9a2af7e495221d2581cb05132a71240844a7a767bc703e15f55616e7407"},"schema_version":"1.0"},"canonical_sha256":"b02ffd4ef1e93ba4d9951f3a3654168a076ceeeda4cede155aa03566d21b5d4c","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:53.971371Z","signature_b64":"Vt+vL2tK/jFM0tMtDj6K4rldRY52zhPHnYGOdHQuRS3hwPT08Htgfi6lVhJpv0voSqSQjqXlewZChSCYwAAyDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b02ffd4ef1e93ba4d9951f3a3654168a076ceeeda4cede155aa03566d21b5d4c","last_reissued_at":"2026-05-17T23:43:53.970698Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:53.970698Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.04146","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"S6luE67vGkJ12HT0FztCpRcQFfRjrlSSmndvKLXlCaBlcrnazkji/KZOkmb/uSruAX9jPlxm1Fq2VAms6r7zCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T20:05:02.314169Z"},"content_sha256":"d4908e024ab874518dd19ed0f96ddd3cd2571ffdfed5fd2c9edd434f402c64a0","schema_version":"1.0","event_id":"sha256:d4908e024ab874518dd19ed0f96ddd3cd2571ffdfed5fd2c9edd434f402c64a0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:WAX72TXR5E52JWMVD45DMVAWRI","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Random Sample Partition Data Model for Big Data Analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.DS","physics.data-an","stat.ML"],"primary_cat":"cs.DC","authors_text":"Chenghao Wei, Heping He, Joshua Zhexue Huang, Salman Salloum, Tamer Z. Emara, Xiaoliang Zhang, Yulin He","submitted_at":"2017-12-12T06:49:28Z","abstract_excerpt":"Big data sets must be carefully partitioned into statistically similar data subsets that can be used as representative samples for big data analysis tasks. In this paper, we propose the random sample partition (RSP) data model to represent a big data set as a set of non-overlapping data subsets, called RSP data blocks, where each RSP data block has a probability distribution similar to the whole big data set. Under this data model, efficient block level sampling is used to randomly select RSP data blocks, replacing expensive record level sampling to select sample data from a big distributed da"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.04146","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VXpRZNcBix7lZAEf8hUdonT8dvYsICOOVWod+uKhmlsmhBgaOcRSYx/nNQDu8f+7BfM/IgemeA2XRp//d0wuCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T20:05:02.314830Z"},"content_sha256":"612ceb45a86d43989d37db75eff0dc2972aa9f763e8639b064ca88f5d66f3da8","schema_version":"1.0","event_id":"sha256:612ceb45a86d43989d37db75eff0dc2972aa9f763e8639b064ca88f5d66f3da8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WAX72TXR5E52JWMVD45DMVAWRI/bundle.json","state_url":"https://pith.science/pith/WAX72TXR5E52JWMVD45DMVAWRI/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WAX72TXR5E52JWMVD45DMVAWRI/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T20:05:02Z","links":{"resolver":"https://pith.science/pith/WAX72TXR5E52JWMVD45DMVAWRI","bundle":"https://pith.science/pith/WAX72TXR5E52JWMVD45DMVAWRI/bundle.json","state":"https://pith.science/pith/WAX72TXR5E52JWMVD45DMVAWRI/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WAX72TXR5E52JWMVD45DMVAWRI/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:WAX72TXR5E52JWMVD45DMVAWRI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7369a9a2af7e495221d2581cb05132a71240844a7a767bc703e15f55616e7407","cross_cats_sorted":["cs.DS","physics.data-an","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-12-12T06:49:28Z","title_canon_sha256":"15e9c9f9d499de9c0426ab7eb0361fc12f33d98c54a47f83ac5ee06bf935a27e"},"schema_version":"1.0","source":{"id":"1712.04146","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.04146","created_at":"2026-05-17T23:43:53Z"},{"alias_kind":"arxiv_version","alias_value":"1712.04146v2","created_at":"2026-05-17T23:43:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.04146","created_at":"2026-05-17T23:43:53Z"},{"alias_kind":"pith_short_12","alias_value":"WAX72TXR5E52","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_16","alias_value":"WAX72TXR5E52JWMV","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_8","alias_value":"WAX72TXR","created_at":"2026-05-18T12:31:53Z"}],"graph_snapshots":[{"event_id":"sha256:612ceb45a86d43989d37db75eff0dc2972aa9f763e8639b064ca88f5d66f3da8","target":"graph","created_at":"2026-05-17T23:43:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Big data sets must be carefully partitioned into statistically similar data subsets that can be used as representative samples for big data analysis tasks. In this paper, we propose the random sample partition (RSP) data model to represent a big data set as a set of non-overlapping data subsets, called RSP data blocks, where each RSP data block has a probability distribution similar to the whole big data set. Under this data model, efficient block level sampling is used to randomly select RSP data blocks, replacing expensive record level sampling to select sample data from a big distributed da","authors_text":"Chenghao Wei, Heping He, Joshua Zhexue Huang, Salman Salloum, Tamer Z. Emara, Xiaoliang Zhang, Yulin He","cross_cats":["cs.DS","physics.data-an","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-12-12T06:49:28Z","title":"A Random Sample Partition Data Model for Big Data Analysis"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.04146","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d4908e024ab874518dd19ed0f96ddd3cd2571ffdfed5fd2c9edd434f402c64a0","target":"record","created_at":"2026-05-17T23:43:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7369a9a2af7e495221d2581cb05132a71240844a7a767bc703e15f55616e7407","cross_cats_sorted":["cs.DS","physics.data-an","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-12-12T06:49:28Z","title_canon_sha256":"15e9c9f9d499de9c0426ab7eb0361fc12f33d98c54a47f83ac5ee06bf935a27e"},"schema_version":"1.0","source":{"id":"1712.04146","kind":"arxiv","version":2}},"canonical_sha256":"b02ffd4ef1e93ba4d9951f3a3654168a076ceeeda4cede155aa03566d21b5d4c","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b02ffd4ef1e93ba4d9951f3a3654168a076ceeeda4cede155aa03566d21b5d4c","first_computed_at":"2026-05-17T23:43:53.970698Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:43:53.970698Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Vt+vL2tK/jFM0tMtDj6K4rldRY52zhPHnYGOdHQuRS3hwPT08Htgfi6lVhJpv0voSqSQjqXlewZChSCYwAAyDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:43:53.971371Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.04146","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d4908e024ab874518dd19ed0f96ddd3cd2571ffdfed5fd2c9edd434f402c64a0","sha256:612ceb45a86d43989d37db75eff0dc2972aa9f763e8639b064ca88f5d66f3da8"],"state_sha256":"750dbe58d8199045601c161663a6e0c28e40c784d6f98c276e0f2ce78eb5f2e9"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"laOwIffGJQVaTPidpq1+ZBzG/kdxii4t2Beqz+DwF635SsDWjhuhzzM2EQbXN0JxCGXDcY310VrE9QYE5HYLCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T20:05:02.317784Z","bundle_sha256":"486a4363846552fd61c1242365d7f2e4ccb52dd5ccd136acb55a67e390b68791"}}