{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OAWKW5G3NYHHAAGI5IJEGOC535","short_pith_number":"pith:OAWKW5G3","schema_version":"1.0","canonical_sha256":"702cab74db6e0e7000c8ea1243385ddf7cfbeebf6494b3bac3fdef139b60cafd","source":{"kind":"arxiv","id":"2606.10593","version":1},"attestation_state":"computed","paper":{"title":"Data compression for fast dimension reduction and clustering of high-dimensional discrete data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.CO"],"primary_cat":"stat.ME","authors_text":"Michael Fop, Silvia D'Angelo","submitted_at":"2026-06-09T08:58:42Z","abstract_excerpt":"High-dimensional discrete data arise in many contemporary applications, including genomics, microbiome research, survey studies, and digital behavioral analysis. Clustering such data remains challenging because existing methods are often computationally demanding, sensitive to sparsity and discreteness, or designed for specific data types. We propose a deterministic dimension-reduction framework for clustering high-dimensional discrete observations. The method compresses each observation into a low-dimensional continuous representation through weighted sums defined by a scaled positional encod"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.10593","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"stat.ME","submitted_at":"2026-06-09T08:58:42Z","cross_cats_sorted":["stat.CO"],"title_canon_sha256":"faab31290cae6e80a97b65cd38d01c41be9ebba66c01a9833327eec0c9afa524","abstract_canon_sha256":"ff5bccbb8864b4a10f05b9a090a45273822e6c2aec68abd73067e5c8f5c42b78"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:10:28.466160Z","signature_b64":"HTEpMkKbdefCA0jlTqkyikU7/XqslQaruMV/dFk/WddHgFHgldd8icXufaCbs6TmGSJ/qhWVhXT6nd5C9EWHDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"702cab74db6e0e7000c8ea1243385ddf7cfbeebf6494b3bac3fdef139b60cafd","last_reissued_at":"2026-06-10T01:10:28.464959Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:10:28.464959Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Data compression for fast dimension reduction and clustering of high-dimensional discrete data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.CO"],"primary_cat":"stat.ME","authors_text":"Michael Fop, Silvia D'Angelo","submitted_at":"2026-06-09T08:58:42Z","abstract_excerpt":"High-dimensional discrete data arise in many contemporary applications, including genomics, microbiome research, survey studies, and digital behavioral analysis. Clustering such data remains challenging because existing methods are often computationally demanding, sensitive to sparsity and discreteness, or designed for specific data types. We propose a deterministic dimension-reduction framework for clustering high-dimensional discrete observations. The method compresses each observation into a low-dimensional continuous representation through weighted sums defined by a scaled positional encod"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.10593","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.10593/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.10593","created_at":"2026-06-10T01:10:28.465046+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.10593v1","created_at":"2026-06-10T01:10:28.465046+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.10593","created_at":"2026-06-10T01:10:28.465046+00:00"},{"alias_kind":"pith_short_12","alias_value":"OAWKW5G3NYHH","created_at":"2026-06-10T01:10:28.465046+00:00"},{"alias_kind":"pith_short_16","alias_value":"OAWKW5G3NYHHAAGI","created_at":"2026-06-10T01:10:28.465046+00:00"},{"alias_kind":"pith_short_8","alias_value":"OAWKW5G3","created_at":"2026-06-10T01:10:28.465046+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535","json":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535.json","graph_json":"https://pith.science/api/pith-number/OAWKW5G3NYHHAAGI5IJEGOC535/graph.json","events_json":"https://pith.science/api/pith-number/OAWKW5G3NYHHAAGI5IJEGOC535/events.json","paper":"https://pith.science/paper/OAWKW5G3"},"agent_actions":{"view_html":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535","download_json":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535.json","view_paper":"https://pith.science/paper/OAWKW5G3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.10593&json=true","fetch_graph":"https://pith.science/api/pith-number/OAWKW5G3NYHHAAGI5IJEGOC535/graph.json","fetch_events":"https://pith.science/api/pith-number/OAWKW5G3NYHHAAGI5IJEGOC535/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535/action/storage_attestation","attest_author":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535/action/author_attestation","sign_citation":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535/action/citation_signature","submit_replication":"https://pith.science/pith/OAWKW5G3NYHHAAGI5IJEGOC535/action/replication_record"}},"created_at":"2026-06-10T01:10:28.465046+00:00","updated_at":"2026-06-10T01:10:28.465046+00:00"}