{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:F7OKRRH4TKXYSJMWPJTZ2NYUKF","short_pith_number":"pith:F7OKRRH4","canonical_record":{"source":{"id":"2602.18600","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-20T20:22:18Z","cross_cats_sorted":[],"title_canon_sha256":"aead3dc36643a46d205a2afd44e2e750b4ade9ca6daf75815b2b545167ae6e4b","abstract_canon_sha256":"604fd920c196bb9ec9df9bba21733ab23ef1417cbcc77a45ad2fba8b385cb1e6"},"schema_version":"1.0"},"canonical_sha256":"2fdca8c4fc9aaf8925967a679d371451513b1101e3cac7c7d28ae45bac3a7cc2","source":{"kind":"arxiv","id":"2602.18600","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.18600","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"arxiv_version","alias_value":"2602.18600v3","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.18600","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"pith_short_12","alias_value":"F7OKRRH4TKXY","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"pith_short_16","alias_value":"F7OKRRH4TKXYSJMW","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"pith_short_8","alias_value":"F7OKRRH4","created_at":"2026-05-22T01:03:58Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:F7OKRRH4TKXYSJMWPJTZ2NYUKF","target":"record","payload":{"canonical_record":{"source":{"id":"2602.18600","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-20T20:22:18Z","cross_cats_sorted":[],"title_canon_sha256":"aead3dc36643a46d205a2afd44e2e750b4ade9ca6daf75815b2b545167ae6e4b","abstract_canon_sha256":"604fd920c196bb9ec9df9bba21733ab23ef1417cbcc77a45ad2fba8b385cb1e6"},"schema_version":"1.0"},"canonical_sha256":"2fdca8c4fc9aaf8925967a679d371451513b1101e3cac7c7d28ae45bac3a7cc2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:03:58.681136Z","signature_b64":"i6E1KJjTV8FTvLfZy3AQVQqKdW464yevzLaNudLMC1T9TWHUR7WjzpRoUQst3MQETXIBD8ETXVbtnLrfWm08CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2fdca8c4fc9aaf8925967a679d371451513b1101e3cac7c7d28ae45bac3a7cc2","last_reissued_at":"2026-05-22T01:03:58.680300Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:03:58.680300Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.18600","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VsjDBure7gqArlqBD4aQmzp59JYp4nTPJy1j6NNPs2jVptwWAMu5gRN0MDuIUx6jh7/ARmZpCF9LgHoD6tReCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:42:02.381485Z"},"content_sha256":"2bd81d2318e8e5a69095669a26e39df291495730ca84672456997ceacfc35420","schema_version":"1.0","event_id":"sha256:2bd81d2318e8e5a69095669a26e39df291495730ca84672456997ceacfc35420"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:F7OKRRH4TKXYSJMWPJTZ2NYUKF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"MapTab: Are MLLMs Ready for Multi-Criteria Route Planning in Heterogeneous Graphs?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Multimodal large language models face substantial challenges in multi-criteria route planning that combines map images with tabular data.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bin Liu, Lan-Zhe Guo, Lingyue Ge, Shi-Yu Tian, Weiming Wu, Wenbo Fu, Xiangwen Zhang, Yang Chen, Yu-Feng Li, Yulan Hu, Zhenyu Huang, Zi-Jian Cheng, Ziqiao Shang","submitted_at":"2026-02-20T20:22:18Z","abstract_excerpt":"Systematic evaluation of Multimodal Large Language Models (MLLMs) is crucial for advancing Artificial General Intelligence (AGI). However, existing benchmarks remain insufficient for rigorously assessing their reasoning capabilities under multi-criteria constraints. To bridge this gap, we introduce MapTab, a multimodal benchmark specifically designed to evaluate holistic multi-criteria reasoning in MLLMs via route planning tasks. MapTab requires MLLMs to perceive and ground visual cues from map images alongside route attributes (e.g., Time, Price) from structured tabular data. The benchmark en"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Extensive evaluations across 15 representative MLLMs reveal that current models face substantial challenges in multi-criteria multimodal reasoning. Notably, under conditions of limited visual perception, multimodal collaboration often underperforms compared to unimodal approaches.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The constructed queries and four criteria (Time, Price, Comfort, Reliability) are assumed to form a faithful and sufficiently difficult proxy for real-world multi-criteria route planning that generalizes beyond the chosen 160 cities and 168 attractions.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"MapTab benchmark shows current MLLMs struggle with multi-criteria multimodal route planning and that combining vision and language frequently underperforms single-modality approaches.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Multimodal large language models face substantial challenges in multi-criteria route planning that combines map images with tabular data.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"469eb569c27581f4259585478cdfef5587c77cafeb028f378969d55860fdcda6"},"source":{"id":"2602.18600","kind":"arxiv","version":3},"verdict":{"id":"f19241bb-7831-484f-a52a-2b63bdbcf356","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T20:10:15.420352Z","strongest_claim":"Extensive evaluations across 15 representative MLLMs reveal that current models face substantial challenges in multi-criteria multimodal reasoning. Notably, under conditions of limited visual perception, multimodal collaboration often underperforms compared to unimodal approaches.","one_line_summary":"MapTab benchmark shows current MLLMs struggle with multi-criteria multimodal route planning and that combining vision and language frequently underperforms single-modality approaches.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The constructed queries and four criteria (Time, Price, Comfort, Reliability) are assumed to form a faithful and sufficiently difficult proxy for real-world multi-criteria route planning that generalizes beyond the chosen 160 cities and 168 attractions.","pith_extraction_headline":"Multimodal large language models face substantial challenges in multi-criteria route planning that combines map images with tabular data."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.18600/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"1e15fc5bf186d06a2463660d21ba1b9e305948638302afc84a8b690b274025fe"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"f19241bb-7831-484f-a52a-2b63bdbcf356"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"GrT0mtF7xy0ONFTrl2rpfDD0Uz6v2HOj9m2cHtVJKrmsUofAgQ4zZHMlzqGEo5RxH80J/OO9MBv2HFRaPjTwBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:42:02.382036Z"},"content_sha256":"874b3f2e4d0d0a4c71635a5466686e5b8f5cb29f17a35fddf65190c9bd65cef6","schema_version":"1.0","event_id":"sha256:874b3f2e4d0d0a4c71635a5466686e5b8f5cb29f17a35fddf65190c9bd65cef6"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF/bundle.json","state_url":"https://pith.science/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T19:42:02Z","links":{"resolver":"https://pith.science/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF","bundle":"https://pith.science/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF/bundle.json","state":"https://pith.science/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/F7OKRRH4TKXYSJMWPJTZ2NYUKF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:F7OKRRH4TKXYSJMWPJTZ2NYUKF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"604fd920c196bb9ec9df9bba21733ab23ef1417cbcc77a45ad2fba8b385cb1e6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-20T20:22:18Z","title_canon_sha256":"aead3dc36643a46d205a2afd44e2e750b4ade9ca6daf75815b2b545167ae6e4b"},"schema_version":"1.0","source":{"id":"2602.18600","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.18600","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"arxiv_version","alias_value":"2602.18600v3","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.18600","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"pith_short_12","alias_value":"F7OKRRH4TKXY","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"pith_short_16","alias_value":"F7OKRRH4TKXYSJMW","created_at":"2026-05-22T01:03:58Z"},{"alias_kind":"pith_short_8","alias_value":"F7OKRRH4","created_at":"2026-05-22T01:03:58Z"}],"graph_snapshots":[{"event_id":"sha256:874b3f2e4d0d0a4c71635a5466686e5b8f5cb29f17a35fddf65190c9bd65cef6","target":"graph","created_at":"2026-05-22T01:03:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Extensive evaluations across 15 representative MLLMs reveal that current models face substantial challenges in multi-criteria multimodal reasoning. Notably, under conditions of limited visual perception, multimodal collaboration often underperforms compared to unimodal approaches."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The constructed queries and four criteria (Time, Price, Comfort, Reliability) are assumed to form a faithful and sufficiently difficult proxy for real-world multi-criteria route planning that generalizes beyond the chosen 160 cities and 168 attractions."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"MapTab benchmark shows current MLLMs struggle with multi-criteria multimodal route planning and that combining vision and language frequently underperforms single-modality approaches."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Multimodal large language models face substantial challenges in multi-criteria route planning that combines map images with tabular data."}],"snapshot_sha256":"469eb569c27581f4259585478cdfef5587c77cafeb028f378969d55860fdcda6"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"1e15fc5bf186d06a2463660d21ba1b9e305948638302afc84a8b690b274025fe"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.18600/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Systematic evaluation of Multimodal Large Language Models (MLLMs) is crucial for advancing Artificial General Intelligence (AGI). However, existing benchmarks remain insufficient for rigorously assessing their reasoning capabilities under multi-criteria constraints. To bridge this gap, we introduce MapTab, a multimodal benchmark specifically designed to evaluate holistic multi-criteria reasoning in MLLMs via route planning tasks. MapTab requires MLLMs to perceive and ground visual cues from map images alongside route attributes (e.g., Time, Price) from structured tabular data. The benchmark en","authors_text":"Bin Liu, Lan-Zhe Guo, Lingyue Ge, Shi-Yu Tian, Weiming Wu, Wenbo Fu, Xiangwen Zhang, Yang Chen, Yu-Feng Li, Yulan Hu, Zhenyu Huang, Zi-Jian Cheng, Ziqiao Shang","cross_cats":[],"headline":"Multimodal large language models face substantial challenges in multi-criteria route planning that combines map images with tabular data.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-20T20:22:18Z","title":"MapTab: Are MLLMs Ready for Multi-Criteria Route Planning in Heterogeneous Graphs?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.18600","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-15T20:10:15.420352Z","id":"f19241bb-7831-484f-a52a-2b63bdbcf356","model_set":{"reader":"grok-4.3"},"one_line_summary":"MapTab benchmark shows current MLLMs struggle with multi-criteria multimodal route planning and that combining vision and language frequently underperforms single-modality approaches.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Multimodal large language models face substantial challenges in multi-criteria route planning that combines map images with tabular data.","strongest_claim":"Extensive evaluations across 15 representative MLLMs reveal that current models face substantial challenges in multi-criteria multimodal reasoning. Notably, under conditions of limited visual perception, multimodal collaboration often underperforms compared to unimodal approaches.","weakest_assumption":"The constructed queries and four criteria (Time, Price, Comfort, Reliability) are assumed to form a faithful and sufficiently difficult proxy for real-world multi-criteria route planning that generalizes beyond the chosen 160 cities and 168 attractions."}},"verdict_id":"f19241bb-7831-484f-a52a-2b63bdbcf356"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2bd81d2318e8e5a69095669a26e39df291495730ca84672456997ceacfc35420","target":"record","created_at":"2026-05-22T01:03:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"604fd920c196bb9ec9df9bba21733ab23ef1417cbcc77a45ad2fba8b385cb1e6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-20T20:22:18Z","title_canon_sha256":"aead3dc36643a46d205a2afd44e2e750b4ade9ca6daf75815b2b545167ae6e4b"},"schema_version":"1.0","source":{"id":"2602.18600","kind":"arxiv","version":3}},"canonical_sha256":"2fdca8c4fc9aaf8925967a679d371451513b1101e3cac7c7d28ae45bac3a7cc2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2fdca8c4fc9aaf8925967a679d371451513b1101e3cac7c7d28ae45bac3a7cc2","first_computed_at":"2026-05-22T01:03:58.680300Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:03:58.680300Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"i6E1KJjTV8FTvLfZy3AQVQqKdW464yevzLaNudLMC1T9TWHUR7WjzpRoUQst3MQETXIBD8ETXVbtnLrfWm08CQ==","signature_status":"signed_v1","signed_at":"2026-05-22T01:03:58.681136Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.18600","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2bd81d2318e8e5a69095669a26e39df291495730ca84672456997ceacfc35420","sha256:874b3f2e4d0d0a4c71635a5466686e5b8f5cb29f17a35fddf65190c9bd65cef6"],"state_sha256":"d25039c93ad3cacf75fe3534fb268eb401b3de3153d1c1a903ae39f6d19b3002"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"K2wAX3V1RXA9b8XBxI2RtKPr2xOSnpeaR5V4tWLeXazC8fG+pzL5l83nXpTIjKna76OlhY1FL0rD6VMQmCRuBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T19:42:02.386045Z","bundle_sha256":"66203391581856aec7e4bdefea53b31139500be0c9f93173970858d8dec06825"}}