{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:BETR5BGXGBR3IVITZIPMVH5BX5","short_pith_number":"pith:BETR5BGX","canonical_record":{"source":{"id":"1703.08110","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-03-23T15:35:33Z","cross_cats_sorted":[],"title_canon_sha256":"20a7a002d9549ee178da3aedd327405b3d90588a6a4cc093edbd0ed165d5d4df","abstract_canon_sha256":"f058902c4eac1ee4f9313ba0b2e37c006c8db61b72850beec1e289f064366bf2"},"schema_version":"1.0"},"canonical_sha256":"09271e84d73063b45513ca1eca9fa1bf46f7fdba6bf4803fea01aa6b67842aa6","source":{"kind":"arxiv","id":"1703.08110","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.08110","created_at":"2026-05-18T00:26:00Z"},{"alias_kind":"arxiv_version","alias_value":"1703.08110v2","created_at":"2026-05-18T00:26:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08110","created_at":"2026-05-18T00:26:00Z"},{"alias_kind":"pith_short_12","alias_value":"BETR5BGXGBR3","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"BETR5BGXGBR3IVIT","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"BETR5BGX","created_at":"2026-05-18T12:31:08Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:BETR5BGXGBR3IVITZIPMVH5BX5","target":"record","payload":{"canonical_record":{"source":{"id":"1703.08110","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-03-23T15:35:33Z","cross_cats_sorted":[],"title_canon_sha256":"20a7a002d9549ee178da3aedd327405b3d90588a6a4cc093edbd0ed165d5d4df","abstract_canon_sha256":"f058902c4eac1ee4f9313ba0b2e37c006c8db61b72850beec1e289f064366bf2"},"schema_version":"1.0"},"canonical_sha256":"09271e84d73063b45513ca1eca9fa1bf46f7fdba6bf4803fea01aa6b67842aa6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:26:00.527211Z","signature_b64":"Pnqrw1Pb0DCv3xHjR4ItE2Ozli2qp4oQ7KZVmB+HRvBgQADghdB6Z5wkFIcSCOrT2zUtztKc4w/2lVHTbQlEDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"09271e84d73063b45513ca1eca9fa1bf46f7fdba6bf4803fea01aa6b67842aa6","last_reissued_at":"2026-05-18T00:26:00.526558Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:26:00.526558Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1703.08110","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:26:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qr3bMdv0I+q+l3ZsUPT9bVywZ5gzGu/wvk23JjJNTnsJE9Cfg8TTxSSBh0s3P7vp7fLzUzW2MJh6KJ53+SHtBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T21:36:19.612798Z"},"content_sha256":"ed1f0514df039de334df20d03399aa8e4352302dc6b449838b08481b2b816d8e","schema_version":"1.0","event_id":"sha256:ed1f0514df039de334df20d03399aa8e4352302dc6b449838b08481b2b816d8e"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:BETR5BGXGBR3IVITZIPMVH5BX5","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Training Gaussian Mixture Models at Scale via Coresets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ML","authors_text":"Andreas Krause, Dan Feldman, Mario Lucic, Matthew Faulkner","submitted_at":"2017-03-23T15:35:33Z","abstract_excerpt":"How can we train a statistical mixture model on a massive data set? In this work we show how to construct coresets for mixtures of Gaussians. A coreset is a weighted subset of the data, which guarantees that models fitting the coreset also provide a good fit for the original data set. We show that, perhaps surprisingly, Gaussian mixtures admit coresets of size polynomial in dimension and the number of mixture components, while being independent of the data set size. Hence, one can harness computationally intensive algorithms to compute a good approximation on a significantly smaller data set. "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08110","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:26:00Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UOQAGurTmunG1nEXyUjVSt+BfXWTmbRbpXAhPvb+FSmdV8Bk5V3SM0dDyR4Os9/Q3bs0cd2VrpmWclVRflukAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T21:36:19.613291Z"},"content_sha256":"708a07be758a7c1583e10e9813143a50689a04d6676eacda71dad4ca0015b18f","schema_version":"1.0","event_id":"sha256:708a07be758a7c1583e10e9813143a50689a04d6676eacda71dad4ca0015b18f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BETR5BGXGBR3IVITZIPMVH5BX5/bundle.json","state_url":"https://pith.science/pith/BETR5BGXGBR3IVITZIPMVH5BX5/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BETR5BGXGBR3IVITZIPMVH5BX5/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T21:36:19Z","links":{"resolver":"https://pith.science/pith/BETR5BGXGBR3IVITZIPMVH5BX5","bundle":"https://pith.science/pith/BETR5BGXGBR3IVITZIPMVH5BX5/bundle.json","state":"https://pith.science/pith/BETR5BGXGBR3IVITZIPMVH5BX5/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BETR5BGXGBR3IVITZIPMVH5BX5/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:BETR5BGXGBR3IVITZIPMVH5BX5","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f058902c4eac1ee4f9313ba0b2e37c006c8db61b72850beec1e289f064366bf2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-03-23T15:35:33Z","title_canon_sha256":"20a7a002d9549ee178da3aedd327405b3d90588a6a4cc093edbd0ed165d5d4df"},"schema_version":"1.0","source":{"id":"1703.08110","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1703.08110","created_at":"2026-05-18T00:26:00Z"},{"alias_kind":"arxiv_version","alias_value":"1703.08110v2","created_at":"2026-05-18T00:26:00Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.08110","created_at":"2026-05-18T00:26:00Z"},{"alias_kind":"pith_short_12","alias_value":"BETR5BGXGBR3","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_16","alias_value":"BETR5BGXGBR3IVIT","created_at":"2026-05-18T12:31:08Z"},{"alias_kind":"pith_short_8","alias_value":"BETR5BGX","created_at":"2026-05-18T12:31:08Z"}],"graph_snapshots":[{"event_id":"sha256:708a07be758a7c1583e10e9813143a50689a04d6676eacda71dad4ca0015b18f","target":"graph","created_at":"2026-05-18T00:26:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"How can we train a statistical mixture model on a massive data set? In this work we show how to construct coresets for mixtures of Gaussians. A coreset is a weighted subset of the data, which guarantees that models fitting the coreset also provide a good fit for the original data set. We show that, perhaps surprisingly, Gaussian mixtures admit coresets of size polynomial in dimension and the number of mixture components, while being independent of the data set size. Hence, one can harness computationally intensive algorithms to compute a good approximation on a significantly smaller data set. ","authors_text":"Andreas Krause, Dan Feldman, Mario Lucic, Matthew Faulkner","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-03-23T15:35:33Z","title":"Training Gaussian Mixture Models at Scale via Coresets"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.08110","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ed1f0514df039de334df20d03399aa8e4352302dc6b449838b08481b2b816d8e","target":"record","created_at":"2026-05-18T00:26:00Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f058902c4eac1ee4f9313ba0b2e37c006c8db61b72850beec1e289f064366bf2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2017-03-23T15:35:33Z","title_canon_sha256":"20a7a002d9549ee178da3aedd327405b3d90588a6a4cc093edbd0ed165d5d4df"},"schema_version":"1.0","source":{"id":"1703.08110","kind":"arxiv","version":2}},"canonical_sha256":"09271e84d73063b45513ca1eca9fa1bf46f7fdba6bf4803fea01aa6b67842aa6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"09271e84d73063b45513ca1eca9fa1bf46f7fdba6bf4803fea01aa6b67842aa6","first_computed_at":"2026-05-18T00:26:00.526558Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:26:00.526558Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Pnqrw1Pb0DCv3xHjR4ItE2Ozli2qp4oQ7KZVmB+HRvBgQADghdB6Z5wkFIcSCOrT2zUtztKc4w/2lVHTbQlEDw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:26:00.527211Z","signed_message":"canonical_sha256_bytes"},"source_id":"1703.08110","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ed1f0514df039de334df20d03399aa8e4352302dc6b449838b08481b2b816d8e","sha256:708a07be758a7c1583e10e9813143a50689a04d6676eacda71dad4ca0015b18f"],"state_sha256":"00fe93c0797609ee006f61f85586f5b81c552ab3a69316949c2368ed229c9cdf"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DkLyVdxcOqBGRpY9jsrEUUGGFpwmPyZE0Ge2QKnopZLJcElhSPH8yWdJgYkOVJJBQFCHI1F0xbhfMHfwQ86rBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T21:36:19.615311Z","bundle_sha256":"71c0df08cdb563b231c4720da99633c54aecc3b1d64dd49b8009dcdabf7595a4"}}