{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:UIMMJSIDUEPESP4JNCWCYSWX2M","short_pith_number":"pith:UIMMJSID","schema_version":"1.0","canonical_sha256":"a218c4c903a11e493f8968ac2c4ad7d30d9fb6893cd1aedcf71b4874ba48e6b2","source":{"kind":"arxiv","id":"1905.12753","version":1},"attestation_state":"computed","paper":{"title":"Clustering without Over-Representation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.DS","authors_text":"Alessandro Epasto, Mohammad Mahdian, Ravi Kumar, Sara Ahmadian","submitted_at":"2019-05-29T22:21:47Z","abstract_excerpt":"In this paper we consider clustering problems in which each point is endowed with a color. The goal is to cluster the points to minimize the classical clustering cost but with the additional constraint that no color is over-represented in any cluster. This problem is motivated by practical clustering settings, e.g., in clustering news articles where the color of an article is its source, it is preferable that no single news source dominates any cluster.\n  For the most general version of this problem, we obtain an algorithm that has provable guarantees of performance; our algorithm is based on "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1905.12753","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DS","submitted_at":"2019-05-29T22:21:47Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"2be6d3b266f2525ce6b4df24c28bf4c0fc45f2168ea058178011888746c8bbcf","abstract_canon_sha256":"d7a071710dd60291e9ec76f0bcb764d31c52f34654b70e42357124c2cb066df7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:39.737798Z","signature_b64":"Rx1i7hDdXO9oCCaVPo9GO4atnXJiGjgowTJTzcD+w1+eTRdjYL0R7Di0uzxm/euB6huaNNCsRnB55OtQshJECQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a218c4c903a11e493f8968ac2c4ad7d30d9fb6893cd1aedcf71b4874ba48e6b2","last_reissued_at":"2026-05-17T23:44:39.737332Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:39.737332Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Clustering without Over-Representation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.DS","authors_text":"Alessandro Epasto, Mohammad Mahdian, Ravi Kumar, Sara Ahmadian","submitted_at":"2019-05-29T22:21:47Z","abstract_excerpt":"In this paper we consider clustering problems in which each point is endowed with a color. The goal is to cluster the points to minimize the classical clustering cost but with the additional constraint that no color is over-represented in any cluster. This problem is motivated by practical clustering settings, e.g., in clustering news articles where the color of an article is its source, it is preferable that no single news source dominates any cluster.\n  For the most general version of this problem, we obtain an algorithm that has provable guarantees of performance; our algorithm is based on "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1905.12753","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1905.12753","created_at":"2026-05-17T23:44:39.737410+00:00"},{"alias_kind":"arxiv_version","alias_value":"1905.12753v1","created_at":"2026-05-17T23:44:39.737410+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1905.12753","created_at":"2026-05-17T23:44:39.737410+00:00"},{"alias_kind":"pith_short_12","alias_value":"UIMMJSIDUEPE","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_16","alias_value":"UIMMJSIDUEPESP4J","created_at":"2026-05-18T12:33:30.264802+00:00"},{"alias_kind":"pith_short_8","alias_value":"UIMMJSID","created_at":"2026-05-18T12:33:30.264802+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M","json":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M.json","graph_json":"https://pith.science/api/pith-number/UIMMJSIDUEPESP4JNCWCYSWX2M/graph.json","events_json":"https://pith.science/api/pith-number/UIMMJSIDUEPESP4JNCWCYSWX2M/events.json","paper":"https://pith.science/paper/UIMMJSID"},"agent_actions":{"view_html":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M","download_json":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M.json","view_paper":"https://pith.science/paper/UIMMJSID","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1905.12753&json=true","fetch_graph":"https://pith.science/api/pith-number/UIMMJSIDUEPESP4JNCWCYSWX2M/graph.json","fetch_events":"https://pith.science/api/pith-number/UIMMJSIDUEPESP4JNCWCYSWX2M/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M/action/storage_attestation","attest_author":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M/action/author_attestation","sign_citation":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M/action/citation_signature","submit_replication":"https://pith.science/pith/UIMMJSIDUEPESP4JNCWCYSWX2M/action/replication_record"}},"created_at":"2026-05-17T23:44:39.737410+00:00","updated_at":"2026-05-17T23:44:39.737410+00:00"}