{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:ZS6MWKTTO4OHCQXTVPE4LTE7ZM","short_pith_number":"pith:ZS6MWKTT","canonical_record":{"source":{"id":"1505.05613","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-05-21T06:22:04Z","cross_cats_sorted":["cs.AI","cs.DC"],"title_canon_sha256":"e9a75ad17b08cc67c63899bbdb2f45922ddaef35cbb8f240bf69090e896d721c","abstract_canon_sha256":"5be7950c01d666b863c6ca4c999572dbab30a4d676090467251cdf7f21d33f3a"},"schema_version":"1.0"},"canonical_sha256":"ccbccb2a73771c7142f3abc9c5cc9fcb37ca7b1715de78a375fd4913a8434050","source":{"kind":"arxiv","id":"1505.05613","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.05613","created_at":"2026-05-18T02:03:54Z"},{"alias_kind":"arxiv_version","alias_value":"1505.05613v1","created_at":"2026-05-18T02:03:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.05613","created_at":"2026-05-18T02:03:54Z"},{"alias_kind":"pith_short_12","alias_value":"ZS6MWKTTO4OH","created_at":"2026-05-18T12:29:52Z"},{"alias_kind":"pith_short_16","alias_value":"ZS6MWKTTO4OHCQXT","created_at":"2026-05-18T12:29:52Z"},{"alias_kind":"pith_short_8","alias_value":"ZS6MWKTT","created_at":"2026-05-18T12:29:52Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:ZS6MWKTTO4OHCQXTVPE4LTE7ZM","target":"record","payload":{"canonical_record":{"source":{"id":"1505.05613","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-05-21T06:22:04Z","cross_cats_sorted":["cs.AI","cs.DC"],"title_canon_sha256":"e9a75ad17b08cc67c63899bbdb2f45922ddaef35cbb8f240bf69090e896d721c","abstract_canon_sha256":"5be7950c01d666b863c6ca4c999572dbab30a4d676090467251cdf7f21d33f3a"},"schema_version":"1.0"},"canonical_sha256":"ccbccb2a73771c7142f3abc9c5cc9fcb37ca7b1715de78a375fd4913a8434050","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:03:54.001992Z","signature_b64":"LiXYxkn3wT8KXDYp/hIVGedHSPgOkHy0Nr9lteUcPgsotfcc2HUMdmm3wfL16M4zXYRsW8/SmiNT2a65l/RlBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ccbccb2a73771c7142f3abc9c5cc9fcb37ca7b1715de78a375fd4913a8434050","last_reissued_at":"2026-05-18T02:03:54.001190Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:03:54.001190Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1505.05613","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:03:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"aQzCRyCNGnZDNl/bre4mdoQeKKej7e3v0kuScLFdvapQp7NjZUERyZyo02DncDSjaniGJlK6nr7hKPKvi/VKAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T22:14:14.761921Z"},"content_sha256":"f01a968f602ab281f8d7d0368dee8bed2925f711b0d014f3490f1cc6c2492627","schema_version":"1.0","event_id":"sha256:f01a968f602ab281f8d7d0368dee8bed2925f711b0d014f3490f1cc6c2492627"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:ZS6MWKTTO4OHCQXTVPE4LTE7ZM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Parallel Streaming Signature EM-tree: A Clustering Algorithm for Web Scale Applications","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.DC"],"primary_cat":"cs.IR","authors_text":"Christopher M. De Vries, Lance De Vine, Richi Nayak, Shlomo Geva","submitted_at":"2015-05-21T06:22:04Z","abstract_excerpt":"The proliferation of the web presents an unsolved problem of automatically analyzing billions of pages of natural language. We introduce a scalable algorithm that clusters hundreds of millions of web pages into hundreds of thousands of clusters. It does this on a single mid-range machine using efficient algorithms and compressed document representations. It is applied to two web-scale crawls covering tens of terabytes. ClueWeb09 and ClueWeb12 contain 500 and 733 million web pages and were clustered into 500,000 to 700,000 clusters. To the best of our knowledge, such fine grained clustering has"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.05613","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T02:03:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ksBidGnlMGaTCOhKK0pKIEmnrgG+9pEonoBTQyjphJhHXwFPJszOTVU0zUxaFH3QAXkUBt+jpRn2/k06l9jhCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T22:14:14.762619Z"},"content_sha256":"74250cd4c7be94d159539c8799925236dc2efaafda0962b2d5423aabb7f72bb9","schema_version":"1.0","event_id":"sha256:74250cd4c7be94d159539c8799925236dc2efaafda0962b2d5423aabb7f72bb9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM/bundle.json","state_url":"https://pith.science/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T22:14:14Z","links":{"resolver":"https://pith.science/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM","bundle":"https://pith.science/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM/bundle.json","state":"https://pith.science/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZS6MWKTTO4OHCQXTVPE4LTE7ZM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:ZS6MWKTTO4OHCQXTVPE4LTE7ZM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5be7950c01d666b863c6ca4c999572dbab30a4d676090467251cdf7f21d33f3a","cross_cats_sorted":["cs.AI","cs.DC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-05-21T06:22:04Z","title_canon_sha256":"e9a75ad17b08cc67c63899bbdb2f45922ddaef35cbb8f240bf69090e896d721c"},"schema_version":"1.0","source":{"id":"1505.05613","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1505.05613","created_at":"2026-05-18T02:03:54Z"},{"alias_kind":"arxiv_version","alias_value":"1505.05613v1","created_at":"2026-05-18T02:03:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1505.05613","created_at":"2026-05-18T02:03:54Z"},{"alias_kind":"pith_short_12","alias_value":"ZS6MWKTTO4OH","created_at":"2026-05-18T12:29:52Z"},{"alias_kind":"pith_short_16","alias_value":"ZS6MWKTTO4OHCQXT","created_at":"2026-05-18T12:29:52Z"},{"alias_kind":"pith_short_8","alias_value":"ZS6MWKTT","created_at":"2026-05-18T12:29:52Z"}],"graph_snapshots":[{"event_id":"sha256:74250cd4c7be94d159539c8799925236dc2efaafda0962b2d5423aabb7f72bb9","target":"graph","created_at":"2026-05-18T02:03:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The proliferation of the web presents an unsolved problem of automatically analyzing billions of pages of natural language. We introduce a scalable algorithm that clusters hundreds of millions of web pages into hundreds of thousands of clusters. It does this on a single mid-range machine using efficient algorithms and compressed document representations. It is applied to two web-scale crawls covering tens of terabytes. ClueWeb09 and ClueWeb12 contain 500 and 733 million web pages and were clustered into 500,000 to 700,000 clusters. To the best of our knowledge, such fine grained clustering has","authors_text":"Christopher M. De Vries, Lance De Vine, Richi Nayak, Shlomo Geva","cross_cats":["cs.AI","cs.DC"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-05-21T06:22:04Z","title":"Parallel Streaming Signature EM-tree: A Clustering Algorithm for Web Scale Applications"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1505.05613","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f01a968f602ab281f8d7d0368dee8bed2925f711b0d014f3490f1cc6c2492627","target":"record","created_at":"2026-05-18T02:03:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5be7950c01d666b863c6ca4c999572dbab30a4d676090467251cdf7f21d33f3a","cross_cats_sorted":["cs.AI","cs.DC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2015-05-21T06:22:04Z","title_canon_sha256":"e9a75ad17b08cc67c63899bbdb2f45922ddaef35cbb8f240bf69090e896d721c"},"schema_version":"1.0","source":{"id":"1505.05613","kind":"arxiv","version":1}},"canonical_sha256":"ccbccb2a73771c7142f3abc9c5cc9fcb37ca7b1715de78a375fd4913a8434050","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ccbccb2a73771c7142f3abc9c5cc9fcb37ca7b1715de78a375fd4913a8434050","first_computed_at":"2026-05-18T02:03:54.001190Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T02:03:54.001190Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LiXYxkn3wT8KXDYp/hIVGedHSPgOkHy0Nr9lteUcPgsotfcc2HUMdmm3wfL16M4zXYRsW8/SmiNT2a65l/RlBQ==","signature_status":"signed_v1","signed_at":"2026-05-18T02:03:54.001992Z","signed_message":"canonical_sha256_bytes"},"source_id":"1505.05613","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f01a968f602ab281f8d7d0368dee8bed2925f711b0d014f3490f1cc6c2492627","sha256:74250cd4c7be94d159539c8799925236dc2efaafda0962b2d5423aabb7f72bb9"],"state_sha256":"a9da6c7ca928f0c54082a801a9f36fead2a86092e2dcfe795ec94fc218a56d8c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"f3M/Z+AQCLT6r/lGohon4RG8mTQsb8HVdSXfSbxs4xnNfM7/caTYNUogmzYZIX59MitT4fft2bTUY4AEktHkDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T22:14:14.766168Z","bundle_sha256":"56cf437d97f6601981193d22e66a82e13c8df89fc11a81289078a75189c8a60c"}}