{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:A6XA777UD7EWW2XLUJV477O3KW","short_pith_number":"pith:A6XA777U","canonical_record":{"source":{"id":"2603.21996","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-03-23T14:01:16Z","cross_cats_sorted":["stat.CO"],"title_canon_sha256":"1884dedc7de00abe731dbdca77ab8262610f7e3765c6291ed1867f078953f448","abstract_canon_sha256":"93a8a0e09889fa6756278e541eb116ae8045e39e1df12d3706ab3ed45d86b115"},"schema_version":"1.0"},"canonical_sha256":"07ae0ffff41fc96b6aeba26bcffddb558b6f223a9f37d97babb859fc76ccd1af","source":{"kind":"arxiv","id":"2603.21996","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.21996","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"arxiv_version","alias_value":"2603.21996v2","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.21996","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"pith_short_12","alias_value":"A6XA777UD7EW","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"A6XA777UD7EWW2XL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"A6XA777U","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:A6XA777UD7EWW2XLUJV477O3KW","target":"record","payload":{"canonical_record":{"source":{"id":"2603.21996","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-03-23T14:01:16Z","cross_cats_sorted":["stat.CO"],"title_canon_sha256":"1884dedc7de00abe731dbdca77ab8262610f7e3765c6291ed1867f078953f448","abstract_canon_sha256":"93a8a0e09889fa6756278e541eb116ae8045e39e1df12d3706ab3ed45d86b115"},"schema_version":"1.0"},"canonical_sha256":"07ae0ffff41fc96b6aeba26bcffddb558b6f223a9f37d97babb859fc76ccd1af","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:59.576917Z","signature_b64":"GJrdwQ5fNM51nCDxeGXbgCwZ88E1/3LJD3bl2rdaT0MjwhD/q5JWnl0V7nh4GsG6FDSnT9QZn5u2TJ8GDybaDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"07ae0ffff41fc96b6aeba26bcffddb558b6f223a9f37d97babb859fc76ccd1af","last_reissued_at":"2026-05-17T23:38:59.576266Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:59.576266Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.21996","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FmfHu9SSYYszsmSXc+DKCkwyapxdiPc4E9/DTsU4Pjm6NOGbIW1hzlGbmZiYd1oHucKMOn9FkOYjaUu90XXdAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T13:00:19.606233Z"},"content_sha256":"e431665222d630c6a26efb586d6151fee64303b30d3823469590ce07ac6f5feb","schema_version":"1.0","event_id":"sha256:e431665222d630c6a26efb586d6151fee64303b30d3823469590ce07ac6f5feb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:A6XA777UD7EWW2XLUJV477O3KW","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"StreamSampling.jl: Efficient Sampling from Data Streams in Julia","license":"http://creativecommons.org/licenses/by/4.0/","headline":"StreamSampling.jl enables unbiased sampling from data streams of unknown size in one pass while using only constant memory.","cross_cats":["stat.CO"],"primary_cat":"cs.SE","authors_text":"Adriano Meligrana","submitted_at":"2026-03-23T14:01:16Z","abstract_excerpt":"StreamSampling$.$jl is a Julia library designed to provide general and efficient methods for sampling from data streams in a single pass, even when the total number of items is unknown. In this paper, we describe the capabilities of the library and its advantages over traditional sampling procedures, such as maintaining a small, constant memory footprint and avoiding the need to fully materialize the stream in memory. Furthermore, we provide empirical benchmarks comparing online sampling methods against standard approaches, demonstrating performance and memory improvements."},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"StreamSampling.jl provides general and efficient methods for sampling from data streams in a single pass, even when the total number of items is unknown, while maintaining a small, constant memory footprint.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The implemented algorithms produce unbiased samples and the reported benchmarks reflect genuine improvements without hidden implementation details or cherry-picked test cases.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"StreamSampling.jl implements efficient one-pass sampling algorithms for data streams in Julia with constant memory footprint and performance gains over traditional methods.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"StreamSampling.jl enables unbiased sampling from data streams of unknown size in one pass while using only constant memory.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"3ce2d674a28d4cf19d2632d0c677d82436ccab75cb3232a173d8c125ddea80f6"},"source":{"id":"2603.21996","kind":"arxiv","version":2},"verdict":{"id":"cc02baae-37de-415d-9ff0-b5c40ac565c0","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T06:26:24.730470Z","strongest_claim":"StreamSampling.jl provides general and efficient methods for sampling from data streams in a single pass, even when the total number of items is unknown, while maintaining a small, constant memory footprint.","one_line_summary":"StreamSampling.jl implements efficient one-pass sampling algorithms for data streams in Julia with constant memory footprint and performance gains over traditional methods.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The implemented algorithms produce unbiased samples and the reported benchmarks reflect genuine improvements without hidden implementation details or cherry-picked test cases.","pith_extraction_headline":"StreamSampling.jl enables unbiased sampling from data streams of unknown size in one pass while using only constant memory."},"references":{"count":26,"sample":[{"doi":"","year":null,"title":"The methods covered here fall under the umbrella ofonline sampling, i.e","work_id":"50576a49-56b5-4c52-a9ae-793d42e28127","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Related Work Online sampling techniques have been implemented across a range of programming languages and frameworks, typically addressing specific use cases rather than providing a unified suite of a","work_id":"f5363467-5485-4318-bf1e-e06a5469e416","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"sequential), the sampling scheme (with vs","work_id":"ba66833f-96dc-4382-ae4d-8d01d50e48cf","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2026,"title":"StreamSampling.jl: Efficient Sampling from Data Streams in Julia","work_id":"990ee970-098e-4873-afc9-1ded83eab296","ref_index":4,"cited_arxiv_id":"2603.21996","is_internal_anchor":true},{"doi":"","year":null,"title":"The package also providesitsample, which, similarly toStatsBase.sample, returns anArray, but, by using stream methods, it can be applied to any iterator","work_id":"50b90f2d-eccf-48c8-932c-8c0b15f78298","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":26,"snapshot_sha256":"d31a89fb8fa5340c650ac3398e734d2e0d020485c087cbcca4e2a0d811e833ce","internal_anchors":2},"formal_canon":{"evidence_count":1,"snapshot_sha256":"8cdd24e0364e8c7cd900ade6f30f772ef1c44afe2c25b499081c8b151cbee07a"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"cc02baae-37de-415d-9ff0-b5c40ac565c0"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"3Fn/zn9gPu1w08JY9ZlI8ghsyOAUkzW072HAB5VvqrVvJ42YpR0zlBubBd9etzwu8t1UKdl98uVhVxWZ2LS+Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T13:00:19.607332Z"},"content_sha256":"b4f735078a0e677fff17988524074a12f3c4f30b049dd7dbd2ea4893ede88d75","schema_version":"1.0","event_id":"sha256:b4f735078a0e677fff17988524074a12f3c4f30b049dd7dbd2ea4893ede88d75"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/A6XA777UD7EWW2XLUJV477O3KW/bundle.json","state_url":"https://pith.science/pith/A6XA777UD7EWW2XLUJV477O3KW/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/A6XA777UD7EWW2XLUJV477O3KW/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T13:00:19Z","links":{"resolver":"https://pith.science/pith/A6XA777UD7EWW2XLUJV477O3KW","bundle":"https://pith.science/pith/A6XA777UD7EWW2XLUJV477O3KW/bundle.json","state":"https://pith.science/pith/A6XA777UD7EWW2XLUJV477O3KW/state.json","well_known_bundle":"https://pith.science/.well-known/pith/A6XA777UD7EWW2XLUJV477O3KW/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:A6XA777UD7EWW2XLUJV477O3KW","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"93a8a0e09889fa6756278e541eb116ae8045e39e1df12d3706ab3ed45d86b115","cross_cats_sorted":["stat.CO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-03-23T14:01:16Z","title_canon_sha256":"1884dedc7de00abe731dbdca77ab8262610f7e3765c6291ed1867f078953f448"},"schema_version":"1.0","source":{"id":"2603.21996","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.21996","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"arxiv_version","alias_value":"2603.21996v2","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.21996","created_at":"2026-05-17T23:38:59Z"},{"alias_kind":"pith_short_12","alias_value":"A6XA777UD7EW","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"A6XA777UD7EWW2XL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"A6XA777U","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:b4f735078a0e677fff17988524074a12f3c4f30b049dd7dbd2ea4893ede88d75","target":"graph","created_at":"2026-05-17T23:38:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"StreamSampling.jl provides general and efficient methods for sampling from data streams in a single pass, even when the total number of items is unknown, while maintaining a small, constant memory footprint."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The implemented algorithms produce unbiased samples and the reported benchmarks reflect genuine improvements without hidden implementation details or cherry-picked test cases."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"StreamSampling.jl implements efficient one-pass sampling algorithms for data streams in Julia with constant memory footprint and performance gains over traditional methods."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"StreamSampling.jl enables unbiased sampling from data streams of unknown size in one pass while using only constant memory."}],"snapshot_sha256":"3ce2d674a28d4cf19d2632d0c677d82436ccab75cb3232a173d8c125ddea80f6"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"8cdd24e0364e8c7cd900ade6f30f772ef1c44afe2c25b499081c8b151cbee07a"},"paper":{"abstract_excerpt":"StreamSampling$.$jl is a Julia library designed to provide general and efficient methods for sampling from data streams in a single pass, even when the total number of items is unknown. In this paper, we describe the capabilities of the library and its advantages over traditional sampling procedures, such as maintaining a small, constant memory footprint and avoiding the need to fully materialize the stream in memory. Furthermore, we provide empirical benchmarks comparing online sampling methods against standard approaches, demonstrating performance and memory improvements.","authors_text":"Adriano Meligrana","cross_cats":["stat.CO"],"headline":"StreamSampling.jl enables unbiased sampling from data streams of unknown size in one pass while using only constant memory.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-03-23T14:01:16Z","title":"StreamSampling.jl: Efficient Sampling from Data Streams in Julia"},"references":{"count":26,"internal_anchors":2,"resolved_work":26,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"The methods covered here fall under the umbrella ofonline sampling, i.e","work_id":"50576a49-56b5-4c52-a9ae-793d42e28127","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Related Work Online sampling techniques have been implemented across a range of programming languages and frameworks, typically addressing specific use cases rather than providing a unified suite of a","work_id":"f5363467-5485-4318-bf1e-e06a5469e416","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"sequential), the sampling scheme (with vs","work_id":"ba66833f-96dc-4382-ae4d-8d01d50e48cf","year":null},{"cited_arxiv_id":"2603.21996","doi":"","is_internal_anchor":true,"ref_index":4,"title":"StreamSampling.jl: Efficient Sampling from Data Streams in Julia","work_id":"990ee970-098e-4873-afc9-1ded83eab296","year":2026},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"The package also providesitsample, which, similarly toStatsBase.sample, returns anArray, but, by using stream methods, it can be applied to any iterator","work_id":"50b90f2d-eccf-48c8-932c-8c0b15f78298","year":null}],"snapshot_sha256":"d31a89fb8fa5340c650ac3398e734d2e0d020485c087cbcca4e2a0d811e833ce"},"source":{"id":"2603.21996","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T06:26:24.730470Z","id":"cc02baae-37de-415d-9ff0-b5c40ac565c0","model_set":{"reader":"grok-4.3"},"one_line_summary":"StreamSampling.jl implements efficient one-pass sampling algorithms for data streams in Julia with constant memory footprint and performance gains over traditional methods.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"StreamSampling.jl enables unbiased sampling from data streams of unknown size in one pass while using only constant memory.","strongest_claim":"StreamSampling.jl provides general and efficient methods for sampling from data streams in a single pass, even when the total number of items is unknown, while maintaining a small, constant memory footprint.","weakest_assumption":"The implemented algorithms produce unbiased samples and the reported benchmarks reflect genuine improvements without hidden implementation details or cherry-picked test cases."}},"verdict_id":"cc02baae-37de-415d-9ff0-b5c40ac565c0"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e431665222d630c6a26efb586d6151fee64303b30d3823469590ce07ac6f5feb","target":"record","created_at":"2026-05-17T23:38:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"93a8a0e09889fa6756278e541eb116ae8045e39e1df12d3706ab3ed45d86b115","cross_cats_sorted":["stat.CO"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2026-03-23T14:01:16Z","title_canon_sha256":"1884dedc7de00abe731dbdca77ab8262610f7e3765c6291ed1867f078953f448"},"schema_version":"1.0","source":{"id":"2603.21996","kind":"arxiv","version":2}},"canonical_sha256":"07ae0ffff41fc96b6aeba26bcffddb558b6f223a9f37d97babb859fc76ccd1af","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"07ae0ffff41fc96b6aeba26bcffddb558b6f223a9f37d97babb859fc76ccd1af","first_computed_at":"2026-05-17T23:38:59.576266Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:59.576266Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"GJrdwQ5fNM51nCDxeGXbgCwZ88E1/3LJD3bl2rdaT0MjwhD/q5JWnl0V7nh4GsG6FDSnT9QZn5u2TJ8GDybaDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:59.576917Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.21996","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e431665222d630c6a26efb586d6151fee64303b30d3823469590ce07ac6f5feb","sha256:b4f735078a0e677fff17988524074a12f3c4f30b049dd7dbd2ea4893ede88d75"],"state_sha256":"7718932c18825597f52ca1b740ad742414173eff0e088e86119a9e18a831f0a4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NP3etbaviXc5cX7qUJbVCjJzq1lGoMeQGNbp3Ni75q8wOVFDM2DZ6mNsH1Ux5d/bki9aqZr3R9lBFouN4mkAAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T13:00:19.612194Z","bundle_sha256":"c9e5acdcb4ed1f24071ee8f33e6e4dfaa1e1b54b75c077358b3736c18a57031e"}}