{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:YBS76X3IRDAOJ5HFN5P2NIF4KD","short_pith_number":"pith:YBS76X3I","schema_version":"1.0","canonical_sha256":"c065ff5f6888c0e4f4e56f5fa6a0bc50ecc86a03ab1c0e18af87bb895d31e6d7","source":{"kind":"arxiv","id":"1605.02989","version":1},"attestation_state":"computed","paper":{"title":"An efficient K-means algorithm for Massive Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Aritz P\\'erez, Jos\\'e Antonio Lozano, Marco Cap\\'o","submitted_at":"2016-05-10T13:01:37Z","abstract_excerpt":"Due to the progressive growth of the amount of data available in a wide variety of scientific fields, it has become more difficult to ma- nipulate and analyze such information. Even though datasets have grown in size, the K-means algorithm remains as one of the most popular clustering methods, in spite of its dependency on the initial settings and high computational cost, especially in terms of distance computations. In this work, we propose an efficient approximation to the K-means problem intended for massive data. Our approach recursively partitions the entire dataset into a small number of"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1605.02989","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2016-05-10T13:01:37Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"7c9555d0fb9f5e867768faadd5eda866b51f5ce751f351bc21b93a4186d50104","abstract_canon_sha256":"be3eab3d415bb371bddf75c9516137467c63711c63b066c5f88e226a5f4b861d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:15:07.954656Z","signature_b64":"s1WD0ouk0zb4wnn08gF9B8UR96y4axflaC+w6twHV8V/N/LjDeJZMStv2/28Tlsuu86OhJixngkHiwvaP7OcCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c065ff5f6888c0e4f4e56f5fa6a0bc50ecc86a03ab1c0e18af87bb895d31e6d7","last_reissued_at":"2026-05-18T01:15:07.953961Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:15:07.953961Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"An efficient K-means algorithm for Massive Data","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Aritz P\\'erez, Jos\\'e Antonio Lozano, Marco Cap\\'o","submitted_at":"2016-05-10T13:01:37Z","abstract_excerpt":"Due to the progressive growth of the amount of data available in a wide variety of scientific fields, it has become more difficult to ma- nipulate and analyze such information. Even though datasets have grown in size, the K-means algorithm remains as one of the most popular clustering methods, in spite of its dependency on the initial settings and high computational cost, especially in terms of distance computations. In this work, we propose an efficient approximation to the K-means problem intended for massive data. Our approach recursively partitions the entire dataset into a small number of"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1605.02989","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1605.02989","created_at":"2026-05-18T01:15:07.954083+00:00"},{"alias_kind":"arxiv_version","alias_value":"1605.02989v1","created_at":"2026-05-18T01:15:07.954083+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1605.02989","created_at":"2026-05-18T01:15:07.954083+00:00"},{"alias_kind":"pith_short_12","alias_value":"YBS76X3IRDAO","created_at":"2026-05-18T12:30:53.716459+00:00"},{"alias_kind":"pith_short_16","alias_value":"YBS76X3IRDAOJ5HF","created_at":"2026-05-18T12:30:53.716459+00:00"},{"alias_kind":"pith_short_8","alias_value":"YBS76X3I","created_at":"2026-05-18T12:30:53.716459+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD","json":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD.json","graph_json":"https://pith.science/api/pith-number/YBS76X3IRDAOJ5HFN5P2NIF4KD/graph.json","events_json":"https://pith.science/api/pith-number/YBS76X3IRDAOJ5HFN5P2NIF4KD/events.json","paper":"https://pith.science/paper/YBS76X3I"},"agent_actions":{"view_html":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD","download_json":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD.json","view_paper":"https://pith.science/paper/YBS76X3I","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1605.02989&json=true","fetch_graph":"https://pith.science/api/pith-number/YBS76X3IRDAOJ5HFN5P2NIF4KD/graph.json","fetch_events":"https://pith.science/api/pith-number/YBS76X3IRDAOJ5HFN5P2NIF4KD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD/action/storage_attestation","attest_author":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD/action/author_attestation","sign_citation":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD/action/citation_signature","submit_replication":"https://pith.science/pith/YBS76X3IRDAOJ5HFN5P2NIF4KD/action/replication_record"}},"created_at":"2026-05-18T01:15:07.954083+00:00","updated_at":"2026-05-18T01:15:07.954083+00:00"}