{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:XTLBBB6Y4KTUBXU5EM27RGCOF7","short_pith_number":"pith:XTLBBB6Y","canonical_record":{"source":{"id":"2309.10668","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2023-09-19T14:50:38Z","cross_cats_sorted":["cs.AI","cs.CL","cs.IT","math.IT"],"title_canon_sha256":"6e120a3a5dcd5a2ea8b8e58a3af16ddbf5cf63cc0fa224a78c89c0a65669247e","abstract_canon_sha256":"def926c0e7ca4abc4365977dcb574ca4dabb545c6b3e14e3b6b81a9cc38c332a"},"schema_version":"1.0"},"canonical_sha256":"bcd61087d8e2a740de9d2335f8984e2ffad77aecf94016c81c774f0aefaddc2e","source":{"kind":"arxiv","id":"2309.10668","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.10668","created_at":"2026-05-17T23:38:12Z"},{"alias_kind":"arxiv_version","alias_value":"2309.10668v2","created_at":"2026-05-17T23:38:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.10668","created_at":"2026-05-17T23:38:12Z"},{"alias_kind":"pith_short_12","alias_value":"XTLBBB6Y4KTU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"XTLBBB6Y4KTUBXU5","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"XTLBBB6Y","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:XTLBBB6Y4KTUBXU5EM27RGCOF7","target":"record","payload":{"canonical_record":{"source":{"id":"2309.10668","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2023-09-19T14:50:38Z","cross_cats_sorted":["cs.AI","cs.CL","cs.IT","math.IT"],"title_canon_sha256":"6e120a3a5dcd5a2ea8b8e58a3af16ddbf5cf63cc0fa224a78c89c0a65669247e","abstract_canon_sha256":"def926c0e7ca4abc4365977dcb574ca4dabb545c6b3e14e3b6b81a9cc38c332a"},"schema_version":"1.0"},"canonical_sha256":"bcd61087d8e2a740de9d2335f8984e2ffad77aecf94016c81c774f0aefaddc2e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:12.795772Z","signature_b64":"QtCKI/0PiLLS6CBep3ynAe9GZsnKyAgf7rRCFdvzTKJYIgGVXl8LNEjNtvN9FLJhOSYGsX+fPW433Kt50fciDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bcd61087d8e2a740de9d2335f8984e2ffad77aecf94016c81c774f0aefaddc2e","last_reissued_at":"2026-05-17T23:38:12.795179Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:12.795179Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2309.10668","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mxR+U5xzA6mJ0qg4v/Y/891YxiSYFv4bG/gHrn+Y/okLouT43u1ADB5iWNWZbc/ScXggHgM/zdz4cqYW1bI0Dg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T22:23:56.183377Z"},"content_sha256":"2a21f6a8ec5df3479d00d41d91403829d0a54e4fa7128cb58b0b9848a98374a0","schema_version":"1.0","event_id":"sha256:2a21f6a8ec5df3479d00d41d91403829d0a54e4fa7128cb58b0b9848a98374a0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:XTLBBB6Y4KTUBXU5EM27RGCOF7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Language Modeling Is Compression","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Large language models trained on text compress images and audio better than specialized tools.","cross_cats":["cs.AI","cs.CL","cs.IT","math.IT"],"primary_cat":"cs.LG","authors_text":"Anian Ruoss, Christopher Mattern, Elliot Catt, Gr\\'egoire Del\\'etang, Joel Veness, Jordi Grau-Moya, Laurent Orseau, Li Kevin Wenliang, Marcus Hutter, Matthew Aitchison, Paul-Ambroise Duquenne, Tim Genewein","submitted_at":"2023-09-19T14:50:38Z","abstract_excerpt":"It has long been established that predictive models can be transformed into lossless compressors and vice versa. Incidentally, in recent years, the machine learning community has focused on training increasingly large and powerful self-supervised (language) models. Since these large language models exhibit impressive predictive capabilities, they are well-positioned to be strong compressors. In this work, we advocate for viewing the prediction problem through the lens of compression and evaluate the compression capabilities of large (foundation) models. We show that large language models are p"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Chinchilla 70B, while trained primarily on text, compresses ImageNet patches to 43.4% and LibriSpeech samples to 16.4% of their raw size, beating domain-specific compressors like PNG (58.5%) or FLAC (30.3%), respectively.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the predictive distribution produced by the language model can be directly converted into a lossless compression scheme via arithmetic coding without significant overhead or implementation-specific losses that would invalidate the reported ratios.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Large language models serve as strong general-purpose lossless compressors for text, images, and audio, outperforming domain-specific methods and revealing insights into scaling, tokenization, and in-context learning.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Large language models trained on text compress images and audio better than specialized tools.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"3127474625bf7df2d66d51df773744c43db31e0d5e7b06b7bcf88b9fc8c92bdd"},"source":{"id":"2309.10668","kind":"arxiv","version":2},"verdict":{"id":"4f9f2a55-2aa3-4a8d-bd72-9de4fb57a489","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T22:31:39.076349Z","strongest_claim":"Chinchilla 70B, while trained primarily on text, compresses ImageNet patches to 43.4% and LibriSpeech samples to 16.4% of their raw size, beating domain-specific compressors like PNG (58.5%) or FLAC (30.3%), respectively.","one_line_summary":"Large language models serve as strong general-purpose lossless compressors for text, images, and audio, outperforming domain-specific methods and revealing insights into scaling, tokenization, and in-context learning.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the predictive distribution produced by the language model can be directly converted into a lossless compression scheme via arithmetic coding without significant overhead or implementation-specific losses that would invalidate the reported ratios.","pith_extraction_headline":"Large language models trained on text compress images and audio better than specialized tools."},"references":{"count":20,"sample":[{"doi":"","year":null,"title":"On the Opportunities and Risks of Foundation Models","work_id":"a18039e9-928d-47c9-a836-32656a71bf71","ref_index":1,"cited_arxiv_id":"2108.07258","is_internal_anchor":true},{"doi":"","year":null,"title":"Sparks of Artificial General Intelligence: Early experiments with GPT-4","work_id":"a23cfe92-7f7c-424b-98d4-b386a83002fb","ref_index":2,"cited_arxiv_id":"2303.12712","is_internal_anchor":true},{"doi":"","year":null,"title":"Scaling transformer to 1m tokens and beyond with rmt","work_id":"f5b46c3e-9f26-481f-a773-689cbd972ec4","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"arXiv preprint arXiv:1710.09282 , year=","work_id":"95630094-c860-4769-b4d5-042827b8f21e","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Syntactically Informed Text Compression with Recurrent Neural Networks","work_id":"d37d28de-8bf2-46c9-8cc2-5434dd452dba","ref_index":5,"cited_arxiv_id":"1608.02893","is_internal_anchor":true}],"resolved_work":20,"snapshot_sha256":"85e30d7099f817ef42e31319b401db64fea872e28abdcd5950d96918fd90f139","internal_anchors":8},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"4f9f2a55-2aa3-4a8d-bd72-9de4fb57a489"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:12Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dgb8DjEZ4Y7plcVFeUBKIcUCTTTnQ0Rrx76OModvvBlGIds1CcCBEUOj15z3zPtRC2/d0rlSbsxJ96z/SafkAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-19T22:23:56.183932Z"},"content_sha256":"0ba48a3c7d9f34b101cfc91561f25ff122fc0b3e2da103e29b91f86a0afaefc4","schema_version":"1.0","event_id":"sha256:0ba48a3c7d9f34b101cfc91561f25ff122fc0b3e2da103e29b91f86a0afaefc4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7/bundle.json","state_url":"https://pith.science/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-19T22:23:56Z","links":{"resolver":"https://pith.science/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7","bundle":"https://pith.science/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7/bundle.json","state":"https://pith.science/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XTLBBB6Y4KTUBXU5EM27RGCOF7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:XTLBBB6Y4KTUBXU5EM27RGCOF7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"def926c0e7ca4abc4365977dcb574ca4dabb545c6b3e14e3b6b81a9cc38c332a","cross_cats_sorted":["cs.AI","cs.CL","cs.IT","math.IT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2023-09-19T14:50:38Z","title_canon_sha256":"6e120a3a5dcd5a2ea8b8e58a3af16ddbf5cf63cc0fa224a78c89c0a65669247e"},"schema_version":"1.0","source":{"id":"2309.10668","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.10668","created_at":"2026-05-17T23:38:12Z"},{"alias_kind":"arxiv_version","alias_value":"2309.10668v2","created_at":"2026-05-17T23:38:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.10668","created_at":"2026-05-17T23:38:12Z"},{"alias_kind":"pith_short_12","alias_value":"XTLBBB6Y4KTU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"XTLBBB6Y4KTUBXU5","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"XTLBBB6Y","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:0ba48a3c7d9f34b101cfc91561f25ff122fc0b3e2da103e29b91f86a0afaefc4","target":"graph","created_at":"2026-05-17T23:38:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Chinchilla 70B, while trained primarily on text, compresses ImageNet patches to 43.4% and LibriSpeech samples to 16.4% of their raw size, beating domain-specific compressors like PNG (58.5%) or FLAC (30.3%), respectively."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the predictive distribution produced by the language model can be directly converted into a lossless compression scheme via arithmetic coding without significant overhead or implementation-specific losses that would invalidate the reported ratios."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Large language models serve as strong general-purpose lossless compressors for text, images, and audio, outperforming domain-specific methods and revealing insights into scaling, tokenization, and in-context learning."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Large language models trained on text compress images and audio better than specialized tools."}],"snapshot_sha256":"3127474625bf7df2d66d51df773744c43db31e0d5e7b06b7bcf88b9fc8c92bdd"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"It has long been established that predictive models can be transformed into lossless compressors and vice versa. Incidentally, in recent years, the machine learning community has focused on training increasingly large and powerful self-supervised (language) models. Since these large language models exhibit impressive predictive capabilities, they are well-positioned to be strong compressors. In this work, we advocate for viewing the prediction problem through the lens of compression and evaluate the compression capabilities of large (foundation) models. We show that large language models are p","authors_text":"Anian Ruoss, Christopher Mattern, Elliot Catt, Gr\\'egoire Del\\'etang, Joel Veness, Jordi Grau-Moya, Laurent Orseau, Li Kevin Wenliang, Marcus Hutter, Matthew Aitchison, Paul-Ambroise Duquenne, Tim Genewein","cross_cats":["cs.AI","cs.CL","cs.IT","math.IT"],"headline":"Large language models trained on text compress images and audio better than specialized tools.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2023-09-19T14:50:38Z","title":"Language Modeling Is Compression"},"references":{"count":20,"internal_anchors":8,"resolved_work":20,"sample":[{"cited_arxiv_id":"2108.07258","doi":"","is_internal_anchor":true,"ref_index":1,"title":"On the Opportunities and Risks of Foundation Models","work_id":"a18039e9-928d-47c9-a836-32656a71bf71","year":null},{"cited_arxiv_id":"2303.12712","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Sparks of Artificial General Intelligence: Early experiments with GPT-4","work_id":"a23cfe92-7f7c-424b-98d4-b386a83002fb","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Scaling transformer to 1m tokens and beyond with rmt","work_id":"f5b46c3e-9f26-481f-a773-689cbd972ec4","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"arXiv preprint arXiv:1710.09282 , year=","work_id":"95630094-c860-4769-b4d5-042827b8f21e","year":null},{"cited_arxiv_id":"1608.02893","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Syntactically Informed Text Compression with Recurrent Neural Networks","work_id":"d37d28de-8bf2-46c9-8cc2-5434dd452dba","year":null}],"snapshot_sha256":"85e30d7099f817ef42e31319b401db64fea872e28abdcd5950d96918fd90f139"},"source":{"id":"2309.10668","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T22:31:39.076349Z","id":"4f9f2a55-2aa3-4a8d-bd72-9de4fb57a489","model_set":{"reader":"grok-4.3"},"one_line_summary":"Large language models serve as strong general-purpose lossless compressors for text, images, and audio, outperforming domain-specific methods and revealing insights into scaling, tokenization, and in-context learning.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Large language models trained on text compress images and audio better than specialized tools.","strongest_claim":"Chinchilla 70B, while trained primarily on text, compresses ImageNet patches to 43.4% and LibriSpeech samples to 16.4% of their raw size, beating domain-specific compressors like PNG (58.5%) or FLAC (30.3%), respectively.","weakest_assumption":"That the predictive distribution produced by the language model can be directly converted into a lossless compression scheme via arithmetic coding without significant overhead or implementation-specific losses that would invalidate the reported ratios."}},"verdict_id":"4f9f2a55-2aa3-4a8d-bd72-9de4fb57a489"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2a21f6a8ec5df3479d00d41d91403829d0a54e4fa7128cb58b0b9848a98374a0","target":"record","created_at":"2026-05-17T23:38:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"def926c0e7ca4abc4365977dcb574ca4dabb545c6b3e14e3b6b81a9cc38c332a","cross_cats_sorted":["cs.AI","cs.CL","cs.IT","math.IT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2023-09-19T14:50:38Z","title_canon_sha256":"6e120a3a5dcd5a2ea8b8e58a3af16ddbf5cf63cc0fa224a78c89c0a65669247e"},"schema_version":"1.0","source":{"id":"2309.10668","kind":"arxiv","version":2}},"canonical_sha256":"bcd61087d8e2a740de9d2335f8984e2ffad77aecf94016c81c774f0aefaddc2e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bcd61087d8e2a740de9d2335f8984e2ffad77aecf94016c81c774f0aefaddc2e","first_computed_at":"2026-05-17T23:38:12.795179Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:12.795179Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QtCKI/0PiLLS6CBep3ynAe9GZsnKyAgf7rRCFdvzTKJYIgGVXl8LNEjNtvN9FLJhOSYGsX+fPW433Kt50fciDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:12.795772Z","signed_message":"canonical_sha256_bytes"},"source_id":"2309.10668","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2a21f6a8ec5df3479d00d41d91403829d0a54e4fa7128cb58b0b9848a98374a0","sha256:0ba48a3c7d9f34b101cfc91561f25ff122fc0b3e2da103e29b91f86a0afaefc4"],"state_sha256":"01bc54ff5e35db3208e0d5b688c0a658bb5e7da32a8325c31b6144cec2590950"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"q+eLERmO7MAasN0prUQwXmK9YzJvCA2ybWoL3nlpwAVZ5nzQ55WjV2O4ZI11rc8hjpctfadcSbTH/ubXZxLgAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-19T22:23:56.187234Z","bundle_sha256":"1c820d2dd83fefdea1d755010e7247286341d5eddc38ae8b084124c3a71c3119"}}