{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:TH4GVOC5GVFKZN3CH43TZJ7PSP","short_pith_number":"pith:TH4GVOC5","schema_version":"1.0","canonical_sha256":"99f86ab85d354aacb7623f373ca7ef93cfe8cd94ddf3cffa99789be90fe59dff","source":{"kind":"arxiv","id":"1404.1100","version":1},"attestation_state":"computed","paper":{"title":"A Tutorial on Principal Component Analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Jonathon Shlens","submitted_at":"2014-04-03T21:16:49Z","abstract_excerpt":"Principal component analysis (PCA) is a mainstay of modern data analysis - a black box that is widely used but (sometimes) poorly understood. The goal of this paper is to dispel the magic behind this black box. This manuscript focuses on building a solid intuition for how and why principal component analysis works. This manuscript crystallizes this knowledge by deriving from simple intuitions, the mathematics behind PCA. This tutorial does not shy away from explaining the ideas informally, nor does it shy away from the mathematics. The hope is that by addressing both aspects, readers of all le"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1404.1100","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2014-04-03T21:16:49Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"4dae13497d44f83e9b360fb89a8d399b28ea345ea3f7f59e39a9df5a7a44e6c0","abstract_canon_sha256":"bd7bda34de0c369908bb7d936e82ce2439adaa49fcf922d97085f5a3f2b85168"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:54:54.207879Z","signature_b64":"Zo2JsOWUrHZeoNf7JSOEj0yURmI0wuFg41PRpE4eoFQS5cnzbns9opadqGq9RWzF3YeP1jmKReEw1bTWAhFVAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"99f86ab85d354aacb7623f373ca7ef93cfe8cd94ddf3cffa99789be90fe59dff","last_reissued_at":"2026-05-18T02:54:54.207254Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:54:54.207254Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Tutorial on Principal Component Analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Jonathon Shlens","submitted_at":"2014-04-03T21:16:49Z","abstract_excerpt":"Principal component analysis (PCA) is a mainstay of modern data analysis - a black box that is widely used but (sometimes) poorly understood. The goal of this paper is to dispel the magic behind this black box. This manuscript focuses on building a solid intuition for how and why principal component analysis works. This manuscript crystallizes this knowledge by deriving from simple intuitions, the mathematics behind PCA. This tutorial does not shy away from explaining the ideas informally, nor does it shy away from the mathematics. The hope is that by addressing both aspects, readers of all le"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1404.1100","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1404.1100","created_at":"2026-05-18T02:54:54.207357+00:00"},{"alias_kind":"arxiv_version","alias_value":"1404.1100v1","created_at":"2026-05-18T02:54:54.207357+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1404.1100","created_at":"2026-05-18T02:54:54.207357+00:00"},{"alias_kind":"pith_short_12","alias_value":"TH4GVOC5GVFK","created_at":"2026-05-18T12:28:49.207871+00:00"},{"alias_kind":"pith_short_16","alias_value":"TH4GVOC5GVFKZN3C","created_at":"2026-05-18T12:28:49.207871+00:00"},{"alias_kind":"pith_short_8","alias_value":"TH4GVOC5","created_at":"2026-05-18T12:28:49.207871+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":16,"internal_anchor_count":9,"sample":[{"citing_arxiv_id":"1907.04827","citing_title":"Hillview: A trillion-cell spreadsheet for big data","ref_index":88,"is_internal_anchor":true},{"citing_arxiv_id":"2212.02987","citing_title":"Generative random latent features models and statistics of natural images","ref_index":53,"is_internal_anchor":true},{"citing_arxiv_id":"2306.11060","citing_title":"PCA and t-SNE analysis in the study of QAOA entangled and non-entangled mixing operators","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2409.15030","citing_title":"Anomaly Detection from a Tensor Train Perspective","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13520","citing_title":"Beyond Explained Variance: A Cautionary Tale of PCA","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2507.12237","citing_title":"Constructed Realities? Technical and Contextual Anomalies in a High-Profile Image","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2603.02275","citing_title":"A Comparative Study of UMAP and Other Dimensionality Reduction Methods","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14318","citing_title":"Semantic Feature Segmentation for Interpretable Predictive Maintenance in Complex Systems","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13520","citing_title":"Beyond Explained Variance: A Cautionary Tale of PCA","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10606","citing_title":"Measuring Embedding Sensitivity to Authorial Style in French: Comparing Literary Texts with Language Model Rewritings","ref_index":39,"is_internal_anchor":false},{"citing_arxiv_id":"2605.04164","citing_title":"Enabling Real-Time Training of a Wildfire-to-Smoke Map with Multilinear Operators","ref_index":84,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01514","citing_title":"MANOJAVAM: A Scalable, Unified FPGA Accelerator for Matrix Multiplication and Singular Value Decomposition in Principal Component Analysis","ref_index":15,"is_internal_anchor":false},{"citing_arxiv_id":"2604.12200","citing_title":"21 cm Power Spectrum Analysis of North Celestial Pole Observations with the Tianlai Dish Pathfinder Array","ref_index":42,"is_internal_anchor":false},{"citing_arxiv_id":"2604.09862","citing_title":"FF3R: Feedforward Feature 3D Reconstruction from Unconstrained views","ref_index":33,"is_internal_anchor":false},{"citing_arxiv_id":"2604.14958","citing_title":"Frequency-Enhanced Dual-Subspace Networks for Few-Shot Fine-Grained Image Classification","ref_index":35,"is_internal_anchor":false},{"citing_arxiv_id":"2604.18691","citing_title":"Harmoniq: Efficient Data Augmentation on a Quantum Computer Inspired by Harmonic Analysis","ref_index":26,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP","json":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP.json","graph_json":"https://pith.science/api/pith-number/TH4GVOC5GVFKZN3CH43TZJ7PSP/graph.json","events_json":"https://pith.science/api/pith-number/TH4GVOC5GVFKZN3CH43TZJ7PSP/events.json","paper":"https://pith.science/paper/TH4GVOC5"},"agent_actions":{"view_html":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP","download_json":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP.json","view_paper":"https://pith.science/paper/TH4GVOC5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1404.1100&json=true","fetch_graph":"https://pith.science/api/pith-number/TH4GVOC5GVFKZN3CH43TZJ7PSP/graph.json","fetch_events":"https://pith.science/api/pith-number/TH4GVOC5GVFKZN3CH43TZJ7PSP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP/action/storage_attestation","attest_author":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP/action/author_attestation","sign_citation":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP/action/citation_signature","submit_replication":"https://pith.science/pith/TH4GVOC5GVFKZN3CH43TZJ7PSP/action/replication_record"}},"created_at":"2026-05-18T02:54:54.207357+00:00","updated_at":"2026-05-18T02:54:54.207357+00:00"}