{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:HDKS4VNRBSHUJ66XRBPHWBDXLC","short_pith_number":"pith:HDKS4VNR","canonical_record":{"source":{"id":"2605.17954","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-18T07:09:46Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"0f2f9be06ae7eb8bd86b03f58641923b1f9c82738c7d79fd65b4e719b6f0a463","abstract_canon_sha256":"d08e35eeb1c849ef157da68b06f3b5f56bb359770956cd0c7d0996827a7e89f9"},"schema_version":"1.0"},"canonical_sha256":"38d52e55b10c8f44fbd7885e7b04775896ff3c2f06fab89330066d723265c7f9","source":{"kind":"arxiv","id":"2605.17954","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17954","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17954v1","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17954","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"pith_short_12","alias_value":"HDKS4VNRBSHU","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"pith_short_16","alias_value":"HDKS4VNRBSHUJ66X","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"pith_short_8","alias_value":"HDKS4VNR","created_at":"2026-05-20T00:05:07Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:HDKS4VNRBSHUJ66XRBPHWBDXLC","target":"record","payload":{"canonical_record":{"source":{"id":"2605.17954","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-18T07:09:46Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"0f2f9be06ae7eb8bd86b03f58641923b1f9c82738c7d79fd65b4e719b6f0a463","abstract_canon_sha256":"d08e35eeb1c849ef157da68b06f3b5f56bb359770956cd0c7d0996827a7e89f9"},"schema_version":"1.0"},"canonical_sha256":"38d52e55b10c8f44fbd7885e7b04775896ff3c2f06fab89330066d723265c7f9","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:05:07.908445Z","signature_b64":"IEpOHANgh0MaSeiV4tJohklcSBC24+XnQZNJkr6trmoCJcXIusCxiSpp6METXSyFPoWHUknf7IPhRKZUMOoyBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"38d52e55b10c8f44fbd7885e7b04775896ff3c2f06fab89330066d723265c7f9","last_reissued_at":"2026-05-20T00:05:07.907603Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:05:07.907603Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.17954","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:05:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZnBPWs7jdmk7OnMsM2rSYKE6UVUhPunygJiMPn99fvSMQYF9KxLq5Ujdedjsrj6lol3EFBFap7uwOUz/yTnFBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T22:15:00.761880Z"},"content_sha256":"e7d28ad725fb96039d63f443621dbece414966be64a1fb9646e01946762f9883","schema_version":"1.0","event_id":"sha256:e7d28ad725fb96039d63f443621dbece414966be64a1fb9646e01946762f9883"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:HDKS4VNRBSHUJ66XRBPHWBDXLC","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A More Word-like Image Tokenization for MLLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CV","authors_text":"Hyemin Jeong, Hyungwook Choi, Hyun Lee, Hyunsoo Cho, Joonseok Lee, Soo Kyung Kim, Yejin Kim","submitted_at":"2026-05-18T07:09:46Z","abstract_excerpt":"Modern multimodal large language models (MLLMs) typically keep the language model fixed and train a visual projector that maps the pixels into a sequence of tokens in its embedding space, so that images can be presented in essentially the same form as text. However, the language model has been optimized to operate on discrete, semantically meaningful tokens, while prevailing visual projectors transform an image into a long stream of continuous and highly correlated embeddings. This causes the visual tokens to behave differently from the word-like units that LLMs are originally trained to under"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17954","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17954/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.594111Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"ae52c79f147974b43cd93d2224351b10941ba4c5dbcd61c77d1bf200ba69f940"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:05:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lJMz9LP5GNVuTF9rk1ke1jLb23bPSfK9EsytFsqjWN2QGN/RhZIr4vTgqWkRQdWUjzx4cK2QPgWjXN8CLZSKBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T22:15:00.762295Z"},"content_sha256":"66323221560fd56fd44c90fa69fad2125fb6995e8ea3fc025423aac25cfc34c5","schema_version":"1.0","event_id":"sha256:66323221560fd56fd44c90fa69fad2125fb6995e8ea3fc025423aac25cfc34c5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC/bundle.json","state_url":"https://pith.science/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T22:15:00Z","links":{"resolver":"https://pith.science/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC","bundle":"https://pith.science/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC/bundle.json","state":"https://pith.science/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HDKS4VNRBSHUJ66XRBPHWBDXLC/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:HDKS4VNRBSHUJ66XRBPHWBDXLC","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d08e35eeb1c849ef157da68b06f3b5f56bb359770956cd0c7d0996827a7e89f9","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-18T07:09:46Z","title_canon_sha256":"0f2f9be06ae7eb8bd86b03f58641923b1f9c82738c7d79fd65b4e719b6f0a463"},"schema_version":"1.0","source":{"id":"2605.17954","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17954","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17954v1","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17954","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"pith_short_12","alias_value":"HDKS4VNRBSHU","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"pith_short_16","alias_value":"HDKS4VNRBSHUJ66X","created_at":"2026-05-20T00:05:07Z"},{"alias_kind":"pith_short_8","alias_value":"HDKS4VNR","created_at":"2026-05-20T00:05:07Z"}],"graph_snapshots":[{"event_id":"sha256:66323221560fd56fd44c90fa69fad2125fb6995e8ea3fc025423aac25cfc34c5","target":"graph","created_at":"2026-05-20T00:05:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.594111Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.17954/integrity.json","findings":[],"snapshot_sha256":"ae52c79f147974b43cd93d2224351b10941ba4c5dbcd61c77d1bf200ba69f940","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Modern multimodal large language models (MLLMs) typically keep the language model fixed and train a visual projector that maps the pixels into a sequence of tokens in its embedding space, so that images can be presented in essentially the same form as text. However, the language model has been optimized to operate on discrete, semantically meaningful tokens, while prevailing visual projectors transform an image into a long stream of continuous and highly correlated embeddings. This causes the visual tokens to behave differently from the word-like units that LLMs are originally trained to under","authors_text":"Hyemin Jeong, Hyungwook Choi, Hyun Lee, Hyunsoo Cho, Joonseok Lee, Soo Kyung Kim, Yejin Kim","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-18T07:09:46Z","title":"A More Word-like Image Tokenization for MLLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17954","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e7d28ad725fb96039d63f443621dbece414966be64a1fb9646e01946762f9883","target":"record","created_at":"2026-05-20T00:05:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d08e35eeb1c849ef157da68b06f3b5f56bb359770956cd0c7d0996827a7e89f9","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-18T07:09:46Z","title_canon_sha256":"0f2f9be06ae7eb8bd86b03f58641923b1f9c82738c7d79fd65b4e719b6f0a463"},"schema_version":"1.0","source":{"id":"2605.17954","kind":"arxiv","version":1}},"canonical_sha256":"38d52e55b10c8f44fbd7885e7b04775896ff3c2f06fab89330066d723265c7f9","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"38d52e55b10c8f44fbd7885e7b04775896ff3c2f06fab89330066d723265c7f9","first_computed_at":"2026-05-20T00:05:07.907603Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:05:07.907603Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IEpOHANgh0MaSeiV4tJohklcSBC24+XnQZNJkr6trmoCJcXIusCxiSpp6METXSyFPoWHUknf7IPhRKZUMOoyBw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:05:07.908445Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.17954","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e7d28ad725fb96039d63f443621dbece414966be64a1fb9646e01946762f9883","sha256:66323221560fd56fd44c90fa69fad2125fb6995e8ea3fc025423aac25cfc34c5"],"state_sha256":"6373bdc9f6c9dd36af611b232e9ceb3717fa173e1161692a506d6c57df909e1e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XoTPFbfQGATwpCrxKt4RJEcKdUk0MtBdhyh7wpTn775TpGjUUfwWio0T6wjAzOlCVzP6ZsUdS3T7yyuRHNWzAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T22:15:00.765712Z","bundle_sha256":"acc0f2cb94b6b104381f8a1ad05c76e7a178ff1664d1be30c792e0cb0ef7a0bd"}}