{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:TA7COXXJ5JANU25JWM2F4OLAVR","short_pith_number":"pith:TA7COXXJ","schema_version":"1.0","canonical_sha256":"983e275ee9ea40da6ba9b3345e3960ac4ac537627ad6c62795d551b0886d288a","source":{"kind":"arxiv","id":"2405.07987","version":5},"attestation_state":"computed","paper":{"title":"The Platonic Representation Hypothesis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Representations learned by different neural networks are converging toward a shared statistical model of reality.","cross_cats":["cs.AI","cs.CV","cs.NE"],"primary_cat":"cs.LG","authors_text":"Brian Cheung, Minyoung Huh, Phillip Isola, Tongzhou Wang","submitted_at":"2024-05-13T17:58:30Z","abstract_excerpt":"We argue that representations in AI models, particularly deep networks, are converging. First, we survey many examples of convergence in the literature: over time and across multiple domains, the ways by which different neural networks represent data are becoming more aligned. Next, we demonstrate convergence across data modalities: as vision models and language models get larger, they measure distance between datapoints in a more and more alike way. We hypothesize that this convergence is driving toward a shared statistical model of reality, akin to Plato's concept of an ideal reality. We ter"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2405.07987","kind":"arxiv","version":5},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-05-13T17:58:30Z","cross_cats_sorted":["cs.AI","cs.CV","cs.NE"],"title_canon_sha256":"84711fac72d246673ea735c241943b761c2288440c296e17ea48e682c5019bba","abstract_canon_sha256":"7764d62685e75160e340bdbfad327e15116bcca1f61b1e7f9cea21f703b9bd0f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:53.320241Z","signature_b64":"15g8I9v7arbmxbRQosRey1lnwSoVAgWItgr89lzUKa+/xsyI69m/2CNtzhIW9mDPiUp1fu9NF/s/LFmEdi4pCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"983e275ee9ea40da6ba9b3345e3960ac4ac537627ad6c62795d551b0886d288a","last_reissued_at":"2026-05-17T23:38:53.319583Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:53.319583Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Platonic Representation Hypothesis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Representations learned by different neural networks are converging toward a shared statistical model of reality.","cross_cats":["cs.AI","cs.CV","cs.NE"],"primary_cat":"cs.LG","authors_text":"Brian Cheung, Minyoung Huh, Phillip Isola, Tongzhou Wang","submitted_at":"2024-05-13T17:58:30Z","abstract_excerpt":"We argue that representations in AI models, particularly deep networks, are converging. First, we survey many examples of convergence in the literature: over time and across multiple domains, the ways by which different neural networks represent data are becoming more aligned. Next, we demonstrate convergence across data modalities: as vision models and language models get larger, they measure distance between datapoints in a more and more alike way. We hypothesize that this convergence is driving toward a shared statistical model of reality, akin to Plato's concept of an ideal reality. We ter"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We hypothesize that this convergence is driving toward a shared statistical model of reality, akin to Plato's concept of an ideal reality. We term such a representation the platonic representation.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That observed similarities in representation spaces reflect convergence to an objective underlying model of reality rather than shared inductive biases, training data overlap, or architectural similarities.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Representations learned by large AI models are converging toward a shared statistical model of reality.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Representations learned by different neural networks are converging toward a shared statistical model of reality.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"9614a7ec0c066c07f3d25e426453e0d680af5b23a9b18f3cb70ba84788696f54"},"source":{"id":"2405.07987","kind":"arxiv","version":5},"verdict":{"id":"39eb710b-4ad3-45fa-af51-5756119544b8","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T05:58:39.603476Z","strongest_claim":"We hypothesize that this convergence is driving toward a shared statistical model of reality, akin to Plato's concept of an ideal reality. We term such a representation the platonic representation.","one_line_summary":"Representations learned by large AI models are converging toward a shared statistical model of reality.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That observed similarities in representation spaces reflect convergence to an objective underlying model of reality rather than shared inductive biases, training data overlap, or architectural similarities.","pith_extraction_headline":"Representations learned by different neural networks are converging toward a shared statistical model of reality."},"references":{"count":272,"sample":[{"doi":"","year":null,"title":"Cognitive Systems Research , volume =","work_id":"2c395a6c-6e7f-4254-9ad1-4bc5f08b6ca0","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Communications of the ACM , volume=","work_id":"6ceafbf5-fa63-4493-b4f6-a0298754841e","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Nature communications , volume=","work_id":"3c961a44-3a00-4a0a-94c8-de2554fa7702","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Text and Code Embeddings by Contrastive Pre-Training","work_id":"b5a7ebcc-85b7-4a3f-8495-8d4b5220f949","ref_index":4,"cited_arxiv_id":"2201.10005","is_internal_anchor":true},{"doi":"","year":null,"title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition , pages=","work_id":"d199e356-1cd3-4a1e-8c3c-b04d6cbed803","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":272,"snapshot_sha256":"c6d4ca73fedde0da0b5184c9930ab6e7bbd10bcae37532c1b337107d6b28ee3c","internal_anchors":38},"formal_canon":{"evidence_count":2,"snapshot_sha256":"3529eecbb8053c09d21662f7a80ab5d8b1be84d33c9402f3378db5598ac53d95"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2405.07987","created_at":"2026-05-17T23:38:53.319678+00:00"},{"alias_kind":"arxiv_version","alias_value":"2405.07987v5","created_at":"2026-05-17T23:38:53.319678+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2405.07987","created_at":"2026-05-17T23:38:53.319678+00:00"},{"alias_kind":"pith_short_12","alias_value":"TA7COXXJ5JAN","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"TA7COXXJ5JANU25J","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"TA7COXXJ","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":35,"internal_anchor_count":15,"sample":[{"citing_arxiv_id":"2602.13249","citing_title":"A Systematic Evaluation of Co-folding Model Representations for Small-Molecule Learning","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2605.23778","citing_title":"The physics of AI weather models","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2505.15263","citing_title":"gen2seg: Generative Models Enable Generalizable Instance Segmentation","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2511.19115","citing_title":"AI Consciousness and Existential Risk","ref_index":60,"is_internal_anchor":true},{"citing_arxiv_id":"2602.06886","citing_title":"Prompt Reinjection: Alleviating Prompt Forgetting in Multimodal Diffusion Transformers","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16321","citing_title":"Language Game: Talking to Non-Human Systems","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00333","citing_title":"Borrowed Geometry: Cross-Distribution Head-Importance Fingerprints of Frozen Pretrained Gemma 4 31B","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17671","citing_title":"PEIRA: Learning Predictive Encoders through Inter-View Regressor Alignment","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18667","citing_title":"Better Together: Evaluating the Complementarity of Earth Embedding Models","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18172","citing_title":"Visualizing the Invisible: Generative Visual Grounding Empowers Universal EEG Understanding in MLLMs","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16776","citing_title":"Distinguishable Deletion: Unifying Knowledge Erasure and Refusal for Large Language Model Unlearning","ref_index":46,"is_internal_anchor":true},{"citing_arxiv_id":"2510.01706","citing_title":"Representational Alignment Across Model Layers and Brain Regions with Multi-Level Optimal Transport","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2510.18457","citing_title":"VFM-VAE: Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2601.23045","citing_title":"The Hot Mess of AI: How Does Misalignment Scale With Model Intelligence and Task Complexity?","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10310","citing_title":"Positive Alignment: Artificial Intelligence for Human Flourishing","ref_index":88,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14145","citing_title":"Rethinking the Good Enough Embedding for Easy Few-Shot Learning","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2605.13612","citing_title":"Deep Learning as Neural Low-Degree Filtering: A Spectral Theory of Hierarchical Feature Learning","ref_index":5,"is_internal_anchor":false},{"citing_arxiv_id":"2604.01833","citing_title":"Language-Pretraining-Induced Bias: A Strong Foundation for General Vision Tasks","ref_index":21,"is_internal_anchor":false},{"citing_arxiv_id":"2605.11135","citing_title":"Control Charts for Multi-agent Systems","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08188","citing_title":"Neuroscience-Inspired Analyses of Visual Interestingness in Multimodal Transformers","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2605.08249","citing_title":"Dimensional Coactivation for Representational Consistency in Frozen Vision Foundation Models","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2605.01325","citing_title":"Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance","ref_index":16,"is_internal_anchor":false},{"citing_arxiv_id":"2605.00333","citing_title":"Borrowed Geometry: Cross-Distribution Head-Importance Fingerprints of Frozen Pretrained Gemma 4 31B","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2604.16487","citing_title":"Geometry-Aware CLIP Retrieval via Local Cross-Modal Alignment and Steering","ref_index":8,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08335","citing_title":"Dead Weights, Live Signals: Feedforward Graphs of Frozen Language Models","ref_index":6,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR","json":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR.json","graph_json":"https://pith.science/api/pith-number/TA7COXXJ5JANU25JWM2F4OLAVR/graph.json","events_json":"https://pith.science/api/pith-number/TA7COXXJ5JANU25JWM2F4OLAVR/events.json","paper":"https://pith.science/paper/TA7COXXJ"},"agent_actions":{"view_html":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR","download_json":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR.json","view_paper":"https://pith.science/paper/TA7COXXJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2405.07987&json=true","fetch_graph":"https://pith.science/api/pith-number/TA7COXXJ5JANU25JWM2F4OLAVR/graph.json","fetch_events":"https://pith.science/api/pith-number/TA7COXXJ5JANU25JWM2F4OLAVR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR/action/storage_attestation","attest_author":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR/action/author_attestation","sign_citation":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR/action/citation_signature","submit_replication":"https://pith.science/pith/TA7COXXJ5JANU25JWM2F4OLAVR/action/replication_record"}},"created_at":"2026-05-17T23:38:53.319678+00:00","updated_at":"2026-05-17T23:38:53.319678+00:00"}