{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:OKAWOJMHSMDWTR3HGJJSDTY2U7","short_pith_number":"pith:OKAWOJMH","canonical_record":{"source":{"id":"1705.07474","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-21T16:49:36Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"26f781f7e63aa7940c63d88a5bbee2033a49e5f07e1c49e3f509a31d8c9ed6f4","abstract_canon_sha256":"29c960a096834968039225fa3f51f1ae4f8c43e302abaccbff1633894972f921"},"schema_version":"1.0"},"canonical_sha256":"7281672587930769c767325321cf1aa7f4fe965fa37e204eb1a6ee32cdf2f93d","source":{"kind":"arxiv","id":"1705.07474","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.07474","created_at":"2026-05-18T00:14:41Z"},{"alias_kind":"arxiv_version","alias_value":"1705.07474v2","created_at":"2026-05-18T00:14:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.07474","created_at":"2026-05-18T00:14:41Z"},{"alias_kind":"pith_short_12","alias_value":"OKAWOJMHSMDW","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OKAWOJMHSMDWTR3H","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OKAWOJMH","created_at":"2026-05-18T12:31:34Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:OKAWOJMHSMDWTR3HGJJSDTY2U7","target":"record","payload":{"canonical_record":{"source":{"id":"1705.07474","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-21T16:49:36Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"26f781f7e63aa7940c63d88a5bbee2033a49e5f07e1c49e3f509a31d8c9ed6f4","abstract_canon_sha256":"29c960a096834968039225fa3f51f1ae4f8c43e302abaccbff1633894972f921"},"schema_version":"1.0"},"canonical_sha256":"7281672587930769c767325321cf1aa7f4fe965fa37e204eb1a6ee32cdf2f93d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:14:41.191175Z","signature_b64":"PNn9Pyc/87hwWDIErMj7n8EsogFaPJV9LgJJSZP4fXDGdB6QG+SoXsQuk6EQYoaFyqUy8XU+Bo5noTikCmrOCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7281672587930769c767325321cf1aa7f4fe965fa37e204eb1a6ee32cdf2f93d","last_reissued_at":"2026-05-18T00:14:41.190183Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:14:41.190183Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1705.07474","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8/fFnWS7P2wYO8NJWN1T1goq/UFYLnINwsWtKwAooADgZZZ2R8XKkmBrLMvAxEKWXctjpUUTvMNwHjFF25A6Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T04:24:27.762766Z"},"content_sha256":"781e747200eaab346a72d1cda7dd19d4ca4b4a1464802e7402c0bb00e9ada0b5","schema_version":"1.0","event_id":"sha256:781e747200eaab346a72d1cda7dd19d4ca4b4a1464802e7402c0bb00e9ada0b5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:OKAWOJMHSMDWTR3HGJJSDTY2U7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Why are Big Data Matrices Approximately Low Rank?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Alex Townsend, Madeleine Udell","submitted_at":"2017-05-21T16:49:36Z","abstract_excerpt":"Matrices of (approximate) low rank are pervasive in data science, appearing in recommender systems, movie preferences, topic models, medical records, and genomics. While there is a vast literature on how to exploit low rank structure in these datasets, there is less attention on explaining why the low rank structure appears in the first place. Here, we explain the effectiveness of low rank models in data science by considering a simple generative model for these matrices: we suppose that each row or column is associated to a (possibly high dimensional) bounded latent variable, and entries of t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.07474","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:14:41Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9GUajt1eFNn8KNEWNO0WMxHo29hII0X6yfgGhUUadrjQv+SYPXPTql+1+8uEMyrchSpFv7AdhUlwBaD/FronCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T04:24:27.763140Z"},"content_sha256":"55ea78c52729ba462240c71a8a4d0889826981d1decc9b9b99ae0ec2817eea87","schema_version":"1.0","event_id":"sha256:55ea78c52729ba462240c71a8a4d0889826981d1decc9b9b99ae0ec2817eea87"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7/bundle.json","state_url":"https://pith.science/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T04:24:27Z","links":{"resolver":"https://pith.science/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7","bundle":"https://pith.science/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7/bundle.json","state":"https://pith.science/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OKAWOJMHSMDWTR3HGJJSDTY2U7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:OKAWOJMHSMDWTR3HGJJSDTY2U7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"29c960a096834968039225fa3f51f1ae4f8c43e302abaccbff1633894972f921","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-21T16:49:36Z","title_canon_sha256":"26f781f7e63aa7940c63d88a5bbee2033a49e5f07e1c49e3f509a31d8c9ed6f4"},"schema_version":"1.0","source":{"id":"1705.07474","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1705.07474","created_at":"2026-05-18T00:14:41Z"},{"alias_kind":"arxiv_version","alias_value":"1705.07474v2","created_at":"2026-05-18T00:14:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1705.07474","created_at":"2026-05-18T00:14:41Z"},{"alias_kind":"pith_short_12","alias_value":"OKAWOJMHSMDW","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_16","alias_value":"OKAWOJMHSMDWTR3H","created_at":"2026-05-18T12:31:34Z"},{"alias_kind":"pith_short_8","alias_value":"OKAWOJMH","created_at":"2026-05-18T12:31:34Z"}],"graph_snapshots":[{"event_id":"sha256:55ea78c52729ba462240c71a8a4d0889826981d1decc9b9b99ae0ec2817eea87","target":"graph","created_at":"2026-05-18T00:14:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Matrices of (approximate) low rank are pervasive in data science, appearing in recommender systems, movie preferences, topic models, medical records, and genomics. While there is a vast literature on how to exploit low rank structure in these datasets, there is less attention on explaining why the low rank structure appears in the first place. Here, we explain the effectiveness of low rank models in data science by considering a simple generative model for these matrices: we suppose that each row or column is associated to a (possibly high dimensional) bounded latent variable, and entries of t","authors_text":"Alex Townsend, Madeleine Udell","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-21T16:49:36Z","title":"Why are Big Data Matrices Approximately Low Rank?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1705.07474","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:781e747200eaab346a72d1cda7dd19d4ca4b4a1464802e7402c0bb00e9ada0b5","target":"record","created_at":"2026-05-18T00:14:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"29c960a096834968039225fa3f51f1ae4f8c43e302abaccbff1633894972f921","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2017-05-21T16:49:36Z","title_canon_sha256":"26f781f7e63aa7940c63d88a5bbee2033a49e5f07e1c49e3f509a31d8c9ed6f4"},"schema_version":"1.0","source":{"id":"1705.07474","kind":"arxiv","version":2}},"canonical_sha256":"7281672587930769c767325321cf1aa7f4fe965fa37e204eb1a6ee32cdf2f93d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7281672587930769c767325321cf1aa7f4fe965fa37e204eb1a6ee32cdf2f93d","first_computed_at":"2026-05-18T00:14:41.190183Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:14:41.190183Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"PNn9Pyc/87hwWDIErMj7n8EsogFaPJV9LgJJSZP4fXDGdB6QG+SoXsQuk6EQYoaFyqUy8XU+Bo5noTikCmrOCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:14:41.191175Z","signed_message":"canonical_sha256_bytes"},"source_id":"1705.07474","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:781e747200eaab346a72d1cda7dd19d4ca4b4a1464802e7402c0bb00e9ada0b5","sha256:55ea78c52729ba462240c71a8a4d0889826981d1decc9b9b99ae0ec2817eea87"],"state_sha256":"ab0f1061aa5a3570c78713e5c25272673d1b156def7c39189a5f42446e32f8e8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"A2Xvas6dD0a2PTQMK+1k8uSxfqAmu7HEHaOc90a1ztp/GsyxzfNk9u/SgTzwjgY0lT9kPqfDMbOafORQevKaCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T04:24:27.765534Z","bundle_sha256":"9dcc940d5641f4fc8f7490096973eb8e285245cba3a9d6d64b2f3bab540c6b8a"}}