{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:U5KALI76GX5OZF7IVTQ3GLLULE","short_pith_number":"pith:U5KALI76","canonical_record":{"source":{"id":"2309.15505","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-09-27T09:13:40Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"62083644e49d78ab01722dcc2435bd6d5edf475e93cf1896c3bc04031e788647","abstract_canon_sha256":"38f0f105f4217052b88ab6b5dbfc6a4369c298813925934c25e84d837197197d"},"schema_version":"1.0"},"canonical_sha256":"a75405a3fe35faec97e8ace1b32d7459196712903675423b00ec02d36aade776","source":{"kind":"arxiv","id":"2309.15505","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.15505","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2309.15505v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.15505","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"U5KALI76GX5O","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"U5KALI76GX5OZF7I","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"U5KALI76","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:U5KALI76GX5OZF7IVTQ3GLLULE","target":"record","payload":{"canonical_record":{"source":{"id":"2309.15505","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-09-27T09:13:40Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"62083644e49d78ab01722dcc2435bd6d5edf475e93cf1896c3bc04031e788647","abstract_canon_sha256":"38f0f105f4217052b88ab6b5dbfc6a4369c298813925934c25e84d837197197d"},"schema_version":"1.0"},"canonical_sha256":"a75405a3fe35faec97e8ace1b32d7459196712903675423b00ec02d36aade776","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:46.967607Z","signature_b64":"svzM7nOEdKc2Tqqgc6BBB6uwpvkfbvCzbsbthsixs/MxGVWSajxEgoD4o3MZb6wMudOCsgWKIJpFOmxduBnkCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a75405a3fe35faec97e8ace1b32d7459196712903675423b00ec02d36aade776","last_reissued_at":"2026-05-17T23:38:46.967163Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:46.967163Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2309.15505","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6hr3YIr9TZ7Tw/dWR76dZdoYY2nJLVUxRt5ZTiGCtZOJxFJ9AdBc2Lw8XS1Rq0VWO5Z3GIRUMH1NNeZ0fL6hDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T06:39:10.641786Z"},"content_sha256":"55da68bc49babda01816db52acf680c118d55d330da246fc29b681320facfb85","schema_version":"1.0","event_id":"sha256:55da68bc49babda01816db52acf680c118d55d330da246fc29b681320facfb85"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:U5KALI76GX5OZF7IVTQ3GLLULE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Finite Scalar Quantization: VQ-VAE Made Simple","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"FSQ replaces vector quantization in VQ-VAEs by projecting latents to a few dimensions and quantizing each independently to fixed levels.","cross_cats":["cs.LG"],"primary_cat":"cs.CV","authors_text":"David Minnen, Eirikur Agustsson, Fabian Mentzer, Michael Tschannen","submitted_at":"2023-09-27T09:13:40Z","abstract_excerpt":"We propose to replace vector quantization (VQ) in the latent representation of VQ-VAEs with a simple scheme termed finite scalar quantization (FSQ), where we project the VAE representation down to a few dimensions (typically less than 10). Each dimension is quantized to a small set of fixed values, leading to an (implicit) codebook given by the product of these sets. By appropriately choosing the number of dimensions and values each dimension can take, we obtain the same codebook size as in VQ. On top of such discrete representations, we can train the same models that have been trained on VQ-V"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Despite the much simpler design of FSQ, we obtain competitive performance in all these tasks. We emphasize that FSQ does not suffer from codebook collapse and does not need the complex machinery employed in VQ (commitment losses, codebook reseeding, code splitting, entropy penalties, etc.) to learn expressive discrete representations.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That projecting the VAE latent to a small number of dimensions (typically less than 10) and quantizing each independently to fixed levels preserves sufficient representational capacity for the downstream tasks to match VQ performance.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Finite scalar quantization simplifies VQ-VAE latents by independently rounding a few dimensions to fixed levels, producing an equivalent-sized implicit codebook with competitive performance and no collapse.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"FSQ replaces vector quantization in VQ-VAEs by projecting latents to a few dimensions and quantizing each independently to fixed levels.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"424a47ba721b65628e105cf5f81374a2933f1b44348becba0bca40b1a56ee31d"},"source":{"id":"2309.15505","kind":"arxiv","version":2},"verdict":{"id":"ffeb40f3-390e-4869-b3d4-2e702891f5db","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T18:28:56.392306Z","strongest_claim":"Despite the much simpler design of FSQ, we obtain competitive performance in all these tasks. We emphasize that FSQ does not suffer from codebook collapse and does not need the complex machinery employed in VQ (commitment losses, codebook reseeding, code splitting, entropy penalties, etc.) to learn expressive discrete representations.","one_line_summary":"Finite scalar quantization simplifies VQ-VAE latents by independently rounding a few dimensions to fixed levels, producing an equivalent-sized implicit codebook with competitive performance and no collapse.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That projecting the VAE latent to a small number of dimensions (typically less than 10) and quantizing each independently to fixed levels preserves sufficient representational capacity for the downstream tasks to match VQ performance.","pith_extraction_headline":"FSQ replaces vector quantization in VQ-VAEs by projecting latents to a few dimensions and quantizing each independently to fixed levels."},"references":{"count":22,"sample":[{"doi":"","year":null,"title":"Cm3: A causal masked multimodal model of the internet","work_id":"a4a6d3b6-13f5-437f-8081-765dd23198b9","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Scaling laws for generative mixed-modal language models.arXiv preprint arXiv:2301.03728","work_id":"22042a59-e502-4dd1-8288-7f2de7d5f7af","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"High Quality Monocular Depth Estimation via Transfer Learning","work_id":"9919fef4-288f-41ca-84ef-aec0a2134c9c","ref_index":3,"cited_arxiv_id":"1812.11941","is_internal_anchor":true},{"doi":"","year":null,"title":"End-to-end optimized image compression","work_id":"84c3cd9a-bc02-4db1-b13c-eb3806b8477f","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation","work_id":"1fe8c7c8-aff7-4b94-9096-e549d7e60789","ref_index":5,"cited_arxiv_id":"1308.3432","is_internal_anchor":true}],"resolved_work":22,"snapshot_sha256":"00eeb833771c1a217378391d710e5dd9be137029044fe49d21b5e7d798026324","internal_anchors":8},"formal_canon":{"evidence_count":2,"snapshot_sha256":"2923d75a6040f9a72fdeb579d88de63a444019396d5de2a7cb5c07eacc20f03e"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"ffeb40f3-390e-4869-b3d4-2e702891f5db"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DF3V43uCUOGR6uuxzbpK3R/lrzf5H7uT3TiHih8Db1z8v6l/AI+0EFumvGm9isihPBXRwA9j8hBZmgL+G6VQDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T06:39:10.642719Z"},"content_sha256":"98ea51386330d505e893bb962fcf858a23f0e8f9fe9cc7c644e540515659bd25","schema_version":"1.0","event_id":"sha256:98ea51386330d505e893bb962fcf858a23f0e8f9fe9cc7c644e540515659bd25"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/U5KALI76GX5OZF7IVTQ3GLLULE/bundle.json","state_url":"https://pith.science/pith/U5KALI76GX5OZF7IVTQ3GLLULE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/U5KALI76GX5OZF7IVTQ3GLLULE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T06:39:10Z","links":{"resolver":"https://pith.science/pith/U5KALI76GX5OZF7IVTQ3GLLULE","bundle":"https://pith.science/pith/U5KALI76GX5OZF7IVTQ3GLLULE/bundle.json","state":"https://pith.science/pith/U5KALI76GX5OZF7IVTQ3GLLULE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/U5KALI76GX5OZF7IVTQ3GLLULE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:U5KALI76GX5OZF7IVTQ3GLLULE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"38f0f105f4217052b88ab6b5dbfc6a4369c298813925934c25e84d837197197d","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-09-27T09:13:40Z","title_canon_sha256":"62083644e49d78ab01722dcc2435bd6d5edf475e93cf1896c3bc04031e788647"},"schema_version":"1.0","source":{"id":"2309.15505","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2309.15505","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2309.15505v2","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2309.15505","created_at":"2026-05-17T23:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"U5KALI76GX5O","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"U5KALI76GX5OZF7I","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"U5KALI76","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:98ea51386330d505e893bb962fcf858a23f0e8f9fe9cc7c644e540515659bd25","target":"graph","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Despite the much simpler design of FSQ, we obtain competitive performance in all these tasks. We emphasize that FSQ does not suffer from codebook collapse and does not need the complex machinery employed in VQ (commitment losses, codebook reseeding, code splitting, entropy penalties, etc.) to learn expressive discrete representations."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That projecting the VAE latent to a small number of dimensions (typically less than 10) and quantizing each independently to fixed levels preserves sufficient representational capacity for the downstream tasks to match VQ performance."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Finite scalar quantization simplifies VQ-VAE latents by independently rounding a few dimensions to fixed levels, producing an equivalent-sized implicit codebook with competitive performance and no collapse."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"FSQ replaces vector quantization in VQ-VAEs by projecting latents to a few dimensions and quantizing each independently to fixed levels."}],"snapshot_sha256":"424a47ba721b65628e105cf5f81374a2933f1b44348becba0bca40b1a56ee31d"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"2923d75a6040f9a72fdeb579d88de63a444019396d5de2a7cb5c07eacc20f03e"},"paper":{"abstract_excerpt":"We propose to replace vector quantization (VQ) in the latent representation of VQ-VAEs with a simple scheme termed finite scalar quantization (FSQ), where we project the VAE representation down to a few dimensions (typically less than 10). Each dimension is quantized to a small set of fixed values, leading to an (implicit) codebook given by the product of these sets. By appropriately choosing the number of dimensions and values each dimension can take, we obtain the same codebook size as in VQ. On top of such discrete representations, we can train the same models that have been trained on VQ-V","authors_text":"David Minnen, Eirikur Agustsson, Fabian Mentzer, Michael Tschannen","cross_cats":["cs.LG"],"headline":"FSQ replaces vector quantization in VQ-VAEs by projecting latents to a few dimensions and quantizing each independently to fixed levels.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-09-27T09:13:40Z","title":"Finite Scalar Quantization: VQ-VAE Made Simple"},"references":{"count":22,"internal_anchors":8,"resolved_work":22,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Cm3: A causal masked multimodal model of the internet","work_id":"a4a6d3b6-13f5-437f-8081-765dd23198b9","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Scaling laws for generative mixed-modal language models.arXiv preprint arXiv:2301.03728","work_id":"22042a59-e502-4dd1-8288-7f2de7d5f7af","year":null},{"cited_arxiv_id":"1812.11941","doi":"","is_internal_anchor":true,"ref_index":3,"title":"High Quality Monocular Depth Estimation via Transfer Learning","work_id":"9919fef4-288f-41ca-84ef-aec0a2134c9c","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"End-to-end optimized image compression","work_id":"84c3cd9a-bc02-4db1-b13c-eb3806b8477f","year":null},{"cited_arxiv_id":"1308.3432","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Estimating or Propagating Gradients Through Stochastic Neurons for Conditional Computation","work_id":"1fe8c7c8-aff7-4b94-9096-e549d7e60789","year":null}],"snapshot_sha256":"00eeb833771c1a217378391d710e5dd9be137029044fe49d21b5e7d798026324"},"source":{"id":"2309.15505","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T18:28:56.392306Z","id":"ffeb40f3-390e-4869-b3d4-2e702891f5db","model_set":{"reader":"grok-4.3"},"one_line_summary":"Finite scalar quantization simplifies VQ-VAE latents by independently rounding a few dimensions to fixed levels, producing an equivalent-sized implicit codebook with competitive performance and no collapse.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"FSQ replaces vector quantization in VQ-VAEs by projecting latents to a few dimensions and quantizing each independently to fixed levels.","strongest_claim":"Despite the much simpler design of FSQ, we obtain competitive performance in all these tasks. We emphasize that FSQ does not suffer from codebook collapse and does not need the complex machinery employed in VQ (commitment losses, codebook reseeding, code splitting, entropy penalties, etc.) to learn expressive discrete representations.","weakest_assumption":"That projecting the VAE latent to a small number of dimensions (typically less than 10) and quantizing each independently to fixed levels preserves sufficient representational capacity for the downstream tasks to match VQ performance."}},"verdict_id":"ffeb40f3-390e-4869-b3d4-2e702891f5db"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:55da68bc49babda01816db52acf680c118d55d330da246fc29b681320facfb85","target":"record","created_at":"2026-05-17T23:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"38f0f105f4217052b88ab6b5dbfc6a4369c298813925934c25e84d837197197d","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-09-27T09:13:40Z","title_canon_sha256":"62083644e49d78ab01722dcc2435bd6d5edf475e93cf1896c3bc04031e788647"},"schema_version":"1.0","source":{"id":"2309.15505","kind":"arxiv","version":2}},"canonical_sha256":"a75405a3fe35faec97e8ace1b32d7459196712903675423b00ec02d36aade776","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a75405a3fe35faec97e8ace1b32d7459196712903675423b00ec02d36aade776","first_computed_at":"2026-05-17T23:38:46.967163Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:46.967163Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"svzM7nOEdKc2Tqqgc6BBB6uwpvkfbvCzbsbthsixs/MxGVWSajxEgoD4o3MZb6wMudOCsgWKIJpFOmxduBnkCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:46.967607Z","signed_message":"canonical_sha256_bytes"},"source_id":"2309.15505","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:55da68bc49babda01816db52acf680c118d55d330da246fc29b681320facfb85","sha256:98ea51386330d505e893bb962fcf858a23f0e8f9fe9cc7c644e540515659bd25"],"state_sha256":"7d6a8a61f26aeb4f5d965cecbcf6bd696cb74aab8d491da86fe837bc1159176d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XnanNOZep5rKvsWE0ppVSttlcJdpIpLdNZEmueQeaEglN2LGc9ozsfy42EbESiOETppj+H3Udc334L6Wi0JtDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T06:39:10.646495Z","bundle_sha256":"f033b028da52d832001b6dfc3f74d110e1015ba27999991011f4ac7822d49232"}}