{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:7E7RGHPTNPVUZP3VKTT3SZQ63Q","short_pith_number":"pith:7E7RGHPT","canonical_record":{"source":{"id":"1901.00630","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2019-01-03T06:47:37Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"056e3966dc3c2b19f77017b1d02f8c9a0a9ccdc698752ece21e3c43602e24515","abstract_canon_sha256":"21f15775fcc92255002be775914eb636c50bd5ac13b88237185c000a8331beda"},"schema_version":"1.0"},"canonical_sha256":"f93f131df36beb4cbf7554e7b9661edc0d5e4f8a27a607d867d16520a2233f3b","source":{"kind":"arxiv","id":"1901.00630","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.00630","created_at":"2026-05-17T23:57:01Z"},{"alias_kind":"arxiv_version","alias_value":"1901.00630v1","created_at":"2026-05-17T23:57:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.00630","created_at":"2026-05-17T23:57:01Z"},{"alias_kind":"pith_short_12","alias_value":"7E7RGHPTNPVU","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"7E7RGHPTNPVUZP3V","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"7E7RGHPT","created_at":"2026-05-18T12:33:12Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:7E7RGHPTNPVUZP3VKTT3SZQ63Q","target":"record","payload":{"canonical_record":{"source":{"id":"1901.00630","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2019-01-03T06:47:37Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"056e3966dc3c2b19f77017b1d02f8c9a0a9ccdc698752ece21e3c43602e24515","abstract_canon_sha256":"21f15775fcc92255002be775914eb636c50bd5ac13b88237185c000a8331beda"},"schema_version":"1.0"},"canonical_sha256":"f93f131df36beb4cbf7554e7b9661edc0d5e4f8a27a607d867d16520a2233f3b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:01.848209Z","signature_b64":"aSXjomS3VlpZlvO6Qdld0A3ZBtt8O7fz8fe/K5Mzuu888WiiADFMnLEC18CRETGkQEz1GpAxFOtK075Vumf8BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f93f131df36beb4cbf7554e7b9661edc0d5e4f8a27a607d867d16520a2233f3b","last_reissued_at":"2026-05-17T23:57:01.847709Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:01.847709Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1901.00630","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:57:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Qhu00q8GQTxN3m9+54GA3OVEBIB01x49AjI4GveyVLDn1O/3Y8YVDrCP19XQucveJe1/E7dgmt9C1LnsLWZQCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T01:41:17.944833Z"},"content_sha256":"7859a5f6de107550f1ee7253bc985478839517e82f8c10beff803625049890ba","schema_version":"1.0","event_id":"sha256:7859a5f6de107550f1ee7253bc985478839517e82f8c10beff803625049890ba"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:7E7RGHPTNPVUZP3VKTT3SZQ63Q","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Projecting \"better than randomly\": How to reduce the dimensionality of very large datasets in a way that outperforms random projections","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"stat.ML","authors_text":"Di Zhang, Glenn Chisholm, Matt Wolff, Michael Wojnowicz, Xuan Zhao","submitted_at":"2019-01-03T06:47:37Z","abstract_excerpt":"For very large datasets, random projections (RP) have become the tool of choice for dimensionality reduction. This is due to the computational complexity of principal component analysis. However, the recent development of randomized principal component analysis (RPCA) has opened up the possibility of obtaining approximate principal components on very large datasets. In this paper, we compare the performance of RPCA and RP in dimensionality reduction for supervised learning. In Experiment 1, study a malware classification task on a dataset with over 10 million samples, almost 100,000 features, "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.00630","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:57:01Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KFJK1f0oPubMVb6h4bktNhMilaZvSBRSdBSQyEUReGL7pu44n29f1+jMZFd/BJ2LA8dhiCOCBvw2PDRbCbEVBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T01:41:17.945177Z"},"content_sha256":"261682ac7df32eb080d8ea1879ca86354c4d46381511dad5bc1574ca66921f6a","schema_version":"1.0","event_id":"sha256:261682ac7df32eb080d8ea1879ca86354c4d46381511dad5bc1574ca66921f6a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q/bundle.json","state_url":"https://pith.science/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T01:41:17Z","links":{"resolver":"https://pith.science/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q","bundle":"https://pith.science/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q/bundle.json","state":"https://pith.science/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7E7RGHPTNPVUZP3VKTT3SZQ63Q/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:7E7RGHPTNPVUZP3VKTT3SZQ63Q","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"21f15775fcc92255002be775914eb636c50bd5ac13b88237185c000a8331beda","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2019-01-03T06:47:37Z","title_canon_sha256":"056e3966dc3c2b19f77017b1d02f8c9a0a9ccdc698752ece21e3c43602e24515"},"schema_version":"1.0","source":{"id":"1901.00630","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1901.00630","created_at":"2026-05-17T23:57:01Z"},{"alias_kind":"arxiv_version","alias_value":"1901.00630v1","created_at":"2026-05-17T23:57:01Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.00630","created_at":"2026-05-17T23:57:01Z"},{"alias_kind":"pith_short_12","alias_value":"7E7RGHPTNPVU","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_16","alias_value":"7E7RGHPTNPVUZP3V","created_at":"2026-05-18T12:33:12Z"},{"alias_kind":"pith_short_8","alias_value":"7E7RGHPT","created_at":"2026-05-18T12:33:12Z"}],"graph_snapshots":[{"event_id":"sha256:261682ac7df32eb080d8ea1879ca86354c4d46381511dad5bc1574ca66921f6a","target":"graph","created_at":"2026-05-17T23:57:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"For very large datasets, random projections (RP) have become the tool of choice for dimensionality reduction. This is due to the computational complexity of principal component analysis. However, the recent development of randomized principal component analysis (RPCA) has opened up the possibility of obtaining approximate principal components on very large datasets. In this paper, we compare the performance of RPCA and RP in dimensionality reduction for supervised learning. In Experiment 1, study a malware classification task on a dataset with over 10 million samples, almost 100,000 features, ","authors_text":"Di Zhang, Glenn Chisholm, Matt Wolff, Michael Wojnowicz, Xuan Zhao","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2019-01-03T06:47:37Z","title":"Projecting \"better than randomly\": How to reduce the dimensionality of very large datasets in a way that outperforms random projections"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.00630","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:7859a5f6de107550f1ee7253bc985478839517e82f8c10beff803625049890ba","target":"record","created_at":"2026-05-17T23:57:01Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"21f15775fcc92255002be775914eb636c50bd5ac13b88237185c000a8331beda","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ML","submitted_at":"2019-01-03T06:47:37Z","title_canon_sha256":"056e3966dc3c2b19f77017b1d02f8c9a0a9ccdc698752ece21e3c43602e24515"},"schema_version":"1.0","source":{"id":"1901.00630","kind":"arxiv","version":1}},"canonical_sha256":"f93f131df36beb4cbf7554e7b9661edc0d5e4f8a27a607d867d16520a2233f3b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f93f131df36beb4cbf7554e7b9661edc0d5e4f8a27a607d867d16520a2233f3b","first_computed_at":"2026-05-17T23:57:01.847709Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:57:01.847709Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"aSXjomS3VlpZlvO6Qdld0A3ZBtt8O7fz8fe/K5Mzuu888WiiADFMnLEC18CRETGkQEz1GpAxFOtK075Vumf8BQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:57:01.848209Z","signed_message":"canonical_sha256_bytes"},"source_id":"1901.00630","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:7859a5f6de107550f1ee7253bc985478839517e82f8c10beff803625049890ba","sha256:261682ac7df32eb080d8ea1879ca86354c4d46381511dad5bc1574ca66921f6a"],"state_sha256":"73f30f4efee5192b8ae6941f1094353c34f0c4e154ed036a70bac06ad783eec6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Rs40cMko30uvYtaR+Kjz2sqI7If8mlR5wgz7CmkyJwFYgAAVMrj0Aw93smHa/4DF6LEp8kXyO6vW3VPfEftCCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T01:41:17.947169Z","bundle_sha256":"62fb48945d003f3e590cf9676cffd4301b4d0fb63a613b987bd5483aa81dd9b7"}}