{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:LLKKIOXEJ6GLDOLV5J5M6EUH26","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"557fa282993801dcca8e4cb76313c41065b849ea3b5e210f1729aa6a9e7ffc60","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T19:57:37Z","title_canon_sha256":"328a1b69fe26d5903f4fb876edd09f204b796c67846000a7a963e21444ecf45e"},"schema_version":"1.0","source":{"id":"2605.12703","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12703","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12703v1","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12703","created_at":"2026-05-18T03:09:49Z"},{"alias_kind":"pith_short_12","alias_value":"LLKKIOXEJ6GL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"LLKKIOXEJ6GLDOLV","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"LLKKIOXE","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:365e5438b9c269b68c73c45ed1773f1a7742ded7ba7566e612918d7ec9e5a0d8","target":"graph","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"current systems remain far from robust multimodal context learning, with even the strongest model solving fewer than one-third of tasks under strict evaluation"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The 102 tasks and rubric-based scoring faithfully isolate multimodal context learning without introducing unintended biases in task selection or evaluation criteria."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"MMCL-Bench shows that even the strongest frontier multimodal models solve fewer than one-third of tasks requiring recovery and application of visual rules, procedures, and empirical patterns."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Current multimodal models solve fewer than one-third of tasks that require learning rules and procedures from visual examples."}],"snapshot_sha256":"8b53c67f97ca0bae1b32f2bfff24cfffa8dc2600f8e7707fca8da30ea8ca02b7"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We introduce MMCL-Bench, a benchmark for multimodal context learning: learning task-local rules, procedures, and empirical patterns from visual or mixed-modality teaching context and applying them to new visual instances. Unlike text-only context learning or standard multimodal question answering, this setting requires models to recover and localize relevant evidence from images, screenshots, manuals, videos, and frame sequences before they can reason over the learned context. MMCL-Bench contains 102 tasks spanning three categories: rule system application, procedural task execution, and empir","authors_text":"Fei Yin, Qingyan Bai, Yifan Chen, Yujiu Yang, Zicheng Lin","cross_cats":["cs.AI"],"headline":"Current multimodal models solve fewer than one-third of tasks that require learning rules and procedures from visual examples.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T19:57:37Z","title":"MMCL-Bench: Multimodal Context Learning from Visual Rules, Procedures, and Evidence"},"references":{"count":25,"internal_anchors":3,"resolved_work":25,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Cl-bench: A benchmark for context learning","work_id":"4d10e4c6-07c6-47a2-bf06-94046a4b3d1a","year":2026},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"CL-bench Team. CL-bench leaderboard.https://www.clbench.com/, 2026. Accessed April 4, 2026","work_id":"e977157b-3f86-456f-8c13-2efe29c96b3d","year":2026},{"cited_arxiv_id":"2308.14508","doi":"","is_internal_anchor":true,"ref_index":3,"title":"LongBench: A Bilingual, Multitask Benchmark for Long Context Understanding","work_id":"ba7831c4-9427-4e0e-a5c1-4e98511f4b53","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Long- context llms struggle with long in-context learning.Computing Research Repository, abs/2404.02060","work_id":"595d1e20-91fe-461e-9442-4baeb3560aee","year":2024},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Mmlong- bench: Benchmarking long-context vision-language models effectively and thoroughly","work_id":"7c57f3bc-9274-4cae-9408-0472fb16354a","year":2025}],"snapshot_sha256":"029c93348fc23134abb07e658ecedbccb1a6cc27530b67ad5d43245b908b555d"},"source":{"id":"2605.12703","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T20:53:08.006051Z","id":"eee80bc9-2dc2-4871-a2df-f72a1d3b5aa1","model_set":{"reader":"grok-4.3"},"one_line_summary":"MMCL-Bench shows that even the strongest frontier multimodal models solve fewer than one-third of tasks requiring recovery and application of visual rules, procedures, and empirical patterns.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Current multimodal models solve fewer than one-third of tasks that require learning rules and procedures from visual examples.","strongest_claim":"current systems remain far from robust multimodal context learning, with even the strongest model solving fewer than one-third of tasks under strict evaluation","weakest_assumption":"The 102 tasks and rubric-based scoring faithfully isolate multimodal context learning without introducing unintended biases in task selection or evaluation criteria."}},"verdict_id":"eee80bc9-2dc2-4871-a2df-f72a1d3b5aa1"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:fcce5aa8953bf813d912665d4a745a97a50f0032c8c31d12f17774fde0ca1d12","target":"record","created_at":"2026-05-18T03:09:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"557fa282993801dcca8e4cb76313c41065b849ea3b5e210f1729aa6a9e7ffc60","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-12T19:57:37Z","title_canon_sha256":"328a1b69fe26d5903f4fb876edd09f204b796c67846000a7a963e21444ecf45e"},"schema_version":"1.0","source":{"id":"2605.12703","kind":"arxiv","version":1}},"canonical_sha256":"5ad4a43ae44f8cb1b975ea7acf1287d7ae0e3c8719c679e9811c6cd22ca90621","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5ad4a43ae44f8cb1b975ea7acf1287d7ae0e3c8719c679e9811c6cd22ca90621","first_computed_at":"2026-05-18T03:09:49.656656Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:49.656656Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0lWU9IYrKxmGsWzigfzFtPCh1SJrmD1BrgN8T73DniqQy9cF4TiHaBtF4EoWUD0LRFa5mOFludKzZ6GUGSc9BQ==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:49.657406Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12703","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:fcce5aa8953bf813d912665d4a745a97a50f0032c8c31d12f17774fde0ca1d12","sha256:365e5438b9c269b68c73c45ed1773f1a7742ded7ba7566e612918d7ec9e5a0d8"],"state_sha256":"161a6775ce5cc1c06c9143a02f168a61923fa49c8e8af166d7f7770970503793"}