{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2","short_pith_number":"pith:ZHZ4H5KK","canonical_record":{"source":{"id":"2603.09095","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-10T02:14:23Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"7fc425ac4fb8436d81766722be4f01dd37ac5b3827571403aa42b04dc53fa0fb","abstract_canon_sha256":"e5cace3e6ce94170c42264d27da2c953aec6d780c09aab312461d5cdb2912bbb"},"schema_version":"1.0"},"canonical_sha256":"c9f3c3f54a39989d945fce25a9de027e9501047fcb5ae28ee318320ca7c70d59","source":{"kind":"arxiv","id":"2603.09095","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.09095","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"arxiv_version","alias_value":"2603.09095v2","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.09095","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_12","alias_value":"ZHZ4H5KKHGMJ","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_16","alias_value":"ZHZ4H5KKHGMJ3FC7","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_8","alias_value":"ZHZ4H5KK","created_at":"2026-05-26T01:03:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2","target":"record","payload":{"canonical_record":{"source":{"id":"2603.09095","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-10T02:14:23Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"7fc425ac4fb8436d81766722be4f01dd37ac5b3827571403aa42b04dc53fa0fb","abstract_canon_sha256":"e5cace3e6ce94170c42264d27da2c953aec6d780c09aab312461d5cdb2912bbb"},"schema_version":"1.0"},"canonical_sha256":"c9f3c3f54a39989d945fce25a9de027e9501047fcb5ae28ee318320ca7c70d59","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:27.414392Z","signature_b64":"yK0AILhAoMUownkmna6m2yqULBBOyfbzgLmQIXsZ53tgHbNLHI36nyNSQb9aXdnpAkUno5vKkaNggYS6pM7tAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c9f3c3f54a39989d945fce25a9de027e9501047fcb5ae28ee318320ca7c70d59","last_reissued_at":"2026-05-26T01:03:27.413066Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:27.413066Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.09095","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v8yJNuO4mKswsYft4oFHx1rSJXOBagwOw0zuwj3gJhuZ/n9NGV0KzmzUYgD4yJuEXZqfWjcaNFBmatxH6gA7Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T07:48:10.904730Z"},"content_sha256":"082c30118c625800c5c80b97e0a8799a3ae4d7cc49f4f2b8b0e0992cbcbc08a5","schema_version":"1.0","event_id":"sha256:082c30118c625800c5c80b97e0a8799a3ae4d7cc49f4f2b8b0e0992cbcbc08a5"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reading, Not Thinking: Understanding and Bridging the Modality Gap When Text Becomes Pixels in Multimodal LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Cheng Zhang, Chen Zhao, Fan Bai, Hongjun Liu, Kaiser Sun, Mark Dredze, Xiaochuang Yuan","submitted_at":"2026-03-10T02:14:23Z","abstract_excerpt":"Multimodal large language models (MLLMs) can process text presented as images, yet they often perform worse than when the same content is provided as textual tokens.\n  We systematically diagnose this \"modality gap\" by evaluating seven MLLMs across seven benchmarks in five input modes, spanning both synthetically rendered text and realistic document images from arXiv PDFs to Wikipedia pages.\n  We find that the gap is highly sensitive to rendering choices such as font and resolution, and that natural document images often exhibit much smaller gaps, suggesting the performance difference partly re"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.09095","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.09095/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Mk+H9cB4MkeFFQ0YGf5bFSqQRahYJ98GBT0DprApTuY1FSOk8NW55jeCuVVS1TFPCOGNzGno77jvYwQKj591DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T07:48:10.905165Z"},"content_sha256":"180b77d8c37fb02651c2713c432569aaec73bed639bff66b22e7dcbda267adba","schema_version":"1.0","event_id":"sha256:180b77d8c37fb02651c2713c432569aaec73bed639bff66b22e7dcbda267adba"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2/bundle.json","state_url":"https://pith.science/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T07:48:10Z","links":{"resolver":"https://pith.science/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2","bundle":"https://pith.science/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2/bundle.json","state":"https://pith.science/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ZHZ4H5KKHGMJ3FC7ZYS2TXQCP2","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e5cace3e6ce94170c42264d27da2c953aec6d780c09aab312461d5cdb2912bbb","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-10T02:14:23Z","title_canon_sha256":"7fc425ac4fb8436d81766722be4f01dd37ac5b3827571403aa42b04dc53fa0fb"},"schema_version":"1.0","source":{"id":"2603.09095","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.09095","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"arxiv_version","alias_value":"2603.09095v2","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.09095","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_12","alias_value":"ZHZ4H5KKHGMJ","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_16","alias_value":"ZHZ4H5KKHGMJ3FC7","created_at":"2026-05-26T01:03:27Z"},{"alias_kind":"pith_short_8","alias_value":"ZHZ4H5KK","created_at":"2026-05-26T01:03:27Z"}],"graph_snapshots":[{"event_id":"sha256:180b77d8c37fb02651c2713c432569aaec73bed639bff66b22e7dcbda267adba","target":"graph","created_at":"2026-05-26T01:03:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.09095/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Multimodal large language models (MLLMs) can process text presented as images, yet they often perform worse than when the same content is provided as textual tokens.\n  We systematically diagnose this \"modality gap\" by evaluating seven MLLMs across seven benchmarks in five input modes, spanning both synthetically rendered text and realistic document images from arXiv PDFs to Wikipedia pages.\n  We find that the gap is highly sensitive to rendering choices such as font and resolution, and that natural document images often exhibit much smaller gaps, suggesting the performance difference partly re","authors_text":"Cheng Zhang, Chen Zhao, Fan Bai, Hongjun Liu, Kaiser Sun, Mark Dredze, Xiaochuang Yuan","cross_cats":["cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-10T02:14:23Z","title":"Reading, Not Thinking: Understanding and Bridging the Modality Gap When Text Becomes Pixels in Multimodal LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.09095","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:082c30118c625800c5c80b97e0a8799a3ae4d7cc49f4f2b8b0e0992cbcbc08a5","target":"record","created_at":"2026-05-26T01:03:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e5cace3e6ce94170c42264d27da2c953aec6d780c09aab312461d5cdb2912bbb","cross_cats_sorted":["cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-10T02:14:23Z","title_canon_sha256":"7fc425ac4fb8436d81766722be4f01dd37ac5b3827571403aa42b04dc53fa0fb"},"schema_version":"1.0","source":{"id":"2603.09095","kind":"arxiv","version":2}},"canonical_sha256":"c9f3c3f54a39989d945fce25a9de027e9501047fcb5ae28ee318320ca7c70d59","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c9f3c3f54a39989d945fce25a9de027e9501047fcb5ae28ee318320ca7c70d59","first_computed_at":"2026-05-26T01:03:27.413066Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:27.413066Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yK0AILhAoMUownkmna6m2yqULBBOyfbzgLmQIXsZ53tgHbNLHI36nyNSQb9aXdnpAkUno5vKkaNggYS6pM7tAg==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:27.414392Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.09095","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:082c30118c625800c5c80b97e0a8799a3ae4d7cc49f4f2b8b0e0992cbcbc08a5","sha256:180b77d8c37fb02651c2713c432569aaec73bed639bff66b22e7dcbda267adba"],"state_sha256":"5975c34263ae7e4ce61c7c3931c1432f8f68ba3f19e73d52e74b30ed1c6a48ee"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LylJvURho8swLRN4unYpZTvY1y8WHdZE1LzmI1RIOkyHQAK5AaU1chL0ZL3DVZ4nQ+OBjbrdmi6RJA2IQMWdDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T07:48:10.908448Z","bundle_sha256":"83b929507bcfd5c7877f69b1518a8fa6526a5314e877e01b9407250edeb7bf26"}}