{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:L5KOFDYR7KCK7I4VOFQPJRL3GE","short_pith_number":"pith:L5KOFDYR","schema_version":"1.0","canonical_sha256":"5f54e28f11fa84afa3957160f4c57b31078d276af2d34f75890aae40d2dd2e4f","source":{"kind":"arxiv","id":"1612.02099","version":1},"attestation_state":"computed","paper":{"title":"Statistical and Computational Guarantees of Lloyd's Algorithm and its Variants","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML","stat.TH"],"primary_cat":"math.ST","authors_text":"Harrison H. Zhou, Yu Lu","submitted_at":"2016-12-07T02:35:54Z","abstract_excerpt":"Clustering is a fundamental problem in statistics and machine learning. Lloyd's algorithm, proposed in 1957, is still possibly the most widely used clustering algorithm in practice due to its simplicity and empirical performance. However, there has been little theoretical investigation on the statistical and computational guarantees of Lloyd's algorithm. This paper is an attempt to bridge this gap between practice and theory. We investigate the performance of Lloyd's algorithm on clustering sub-Gaussian mixtures. Under an appropriate initialization for labels or centers, we show that Lloyd's a"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1612.02099","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"math.ST","submitted_at":"2016-12-07T02:35:54Z","cross_cats_sorted":["cs.LG","stat.ML","stat.TH"],"title_canon_sha256":"f3c5f7a68ff5c6b53c2246081ad8c14c8d4c8f0c11e413b5c20cd9935c88821b","abstract_canon_sha256":"9ec185c10d9b2d4f2078e0f223fadd01ae89825812b63b8d4522b2b60290e9c2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:55:35.332863Z","signature_b64":"qH5rLgTUz9ZAQKKlnwoXlcv6gItGMQGmAj+HMv7TLBdk97ajKglX7Zt0OOptIQM2qS2V/RDu4iBJDXLOSq+gDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5f54e28f11fa84afa3957160f4c57b31078d276af2d34f75890aae40d2dd2e4f","last_reissued_at":"2026-05-18T00:55:35.332294Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:55:35.332294Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Statistical and Computational Guarantees of Lloyd's Algorithm and its Variants","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML","stat.TH"],"primary_cat":"math.ST","authors_text":"Harrison H. Zhou, Yu Lu","submitted_at":"2016-12-07T02:35:54Z","abstract_excerpt":"Clustering is a fundamental problem in statistics and machine learning. Lloyd's algorithm, proposed in 1957, is still possibly the most widely used clustering algorithm in practice due to its simplicity and empirical performance. However, there has been little theoretical investigation on the statistical and computational guarantees of Lloyd's algorithm. This paper is an attempt to bridge this gap between practice and theory. We investigate the performance of Lloyd's algorithm on clustering sub-Gaussian mixtures. Under an appropriate initialization for labels or centers, we show that Lloyd's a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.02099","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1612.02099","created_at":"2026-05-18T00:55:35.332380+00:00"},{"alias_kind":"arxiv_version","alias_value":"1612.02099v1","created_at":"2026-05-18T00:55:35.332380+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.02099","created_at":"2026-05-18T00:55:35.332380+00:00"},{"alias_kind":"pith_short_12","alias_value":"L5KOFDYR7KCK","created_at":"2026-05-18T12:30:29.479603+00:00"},{"alias_kind":"pith_short_16","alias_value":"L5KOFDYR7KCK7I4V","created_at":"2026-05-18T12:30:29.479603+00:00"},{"alias_kind":"pith_short_8","alias_value":"L5KOFDYR","created_at":"2026-05-18T12:30:29.479603+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2604.22633","citing_title":"Mixed Membership sub-Gaussian Models","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19091","citing_title":"Fast estimation of Gaussian mixture components via centering and singular value thresholding","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.05337","citing_title":"Individual-heterogeneous sub-Gaussian Mixture Models","ref_index":15,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02448","citing_title":"The interplay of signal-to-noise ratio and variance misspecification in Gaussian mixtures","ref_index":28,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE","json":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE.json","graph_json":"https://pith.science/api/pith-number/L5KOFDYR7KCK7I4VOFQPJRL3GE/graph.json","events_json":"https://pith.science/api/pith-number/L5KOFDYR7KCK7I4VOFQPJRL3GE/events.json","paper":"https://pith.science/paper/L5KOFDYR"},"agent_actions":{"view_html":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE","download_json":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE.json","view_paper":"https://pith.science/paper/L5KOFDYR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1612.02099&json=true","fetch_graph":"https://pith.science/api/pith-number/L5KOFDYR7KCK7I4VOFQPJRL3GE/graph.json","fetch_events":"https://pith.science/api/pith-number/L5KOFDYR7KCK7I4VOFQPJRL3GE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE/action/storage_attestation","attest_author":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE/action/author_attestation","sign_citation":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE/action/citation_signature","submit_replication":"https://pith.science/pith/L5KOFDYR7KCK7I4VOFQPJRL3GE/action/replication_record"}},"created_at":"2026-05-18T00:55:35.332380+00:00","updated_at":"2026-05-18T00:55:35.332380+00:00"}