{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:S5L2URC5ZEDCPHWNOA6RLD3LUA","short_pith_number":"pith:S5L2URC5","canonical_record":{"source":{"id":"2406.13621","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-19T15:17:10Z","cross_cats_sorted":["cs.CV","cs.LG"],"title_canon_sha256":"3323804a7f6c9c26d4273267201917dfc865b4ec8dcdb409912136f4cfa59e39","abstract_canon_sha256":"04930fdf8e8782c55db1e2c1aa8993fb1767e9b84afe016ade8dc829e439e4bb"},"schema_version":"1.0"},"canonical_sha256":"9757aa445dc906279ecd703d158f6ba01526152019d022966772f137a68c3dbd","source":{"kind":"arxiv","id":"2406.13621","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.13621","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"arxiv_version","alias_value":"2406.13621v2","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.13621","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"pith_short_12","alias_value":"S5L2URC5ZEDC","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"pith_short_16","alias_value":"S5L2URC5ZEDCPHWN","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"pith_short_8","alias_value":"S5L2URC5","created_at":"2026-05-18T15:04:06Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:S5L2URC5ZEDCPHWNOA6RLD3LUA","target":"record","payload":{"canonical_record":{"source":{"id":"2406.13621","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-19T15:17:10Z","cross_cats_sorted":["cs.CV","cs.LG"],"title_canon_sha256":"3323804a7f6c9c26d4273267201917dfc865b4ec8dcdb409912136f4cfa59e39","abstract_canon_sha256":"04930fdf8e8782c55db1e2c1aa8993fb1767e9b84afe016ade8dc829e439e4bb"},"schema_version":"1.0"},"canonical_sha256":"9757aa445dc906279ecd703d158f6ba01526152019d022966772f137a68c3dbd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T15:04:06.551346Z","signature_b64":"wLwdPeJW/jXG4KB4FbVVI4qIdbLa1Q+Y02YfqclEaeRzTJXS8vH5gKqo2pYgsxcmP7B3AsBKabIjG6MejXTLCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9757aa445dc906279ecd703d158f6ba01526152019d022966772f137a68c3dbd","last_reissued_at":"2026-05-18T15:04:06.548944Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T15:04:06.548944Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2406.13621","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T15:04:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gCkNfC4f4k4/gKJezHHi5lTEw95JHddE+edUXTp1IaWQxt1IlMe+rtYyvKngeXjukzKTVw3GuPTS/SmTvuIhCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T03:05:26.509615Z"},"content_sha256":"087306c5e5323065659291176aa535bbdcfb53c44ae8797a6180b2e50b233071","schema_version":"1.0","event_id":"sha256:087306c5e5323065659291176aa535bbdcfb53c44ae8797a6180b2e50b233071"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:S5L2URC5ZEDCPHWNOA6RLD3LUA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"LaMI: Augmenting Large Language Models via Late Multi-Image Fusion","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV","cs.LG"],"primary_cat":"cs.CL","authors_text":"Guy Yariv, Idan Schwartz, Sagie Benaim, Yossi Adi","submitted_at":"2024-06-19T15:17:10Z","abstract_excerpt":"Commonsense reasoning often requires both textual and visual knowledge, yet Large Language Models (LLMs) trained solely on text lack visual grounding (e.g., \"what color is an emperor penguin's belly?\"). Visual Language Models (VLMs) perform better on visually grounded tasks but face two limitations: (i) often reduced performance on text-only commonsense reasoning compared to text-trained LLMs, and (ii) adapting newly released LLMs to vision input typically requires costly multimodal training. An alternative augments LLMs with test-time visual signals, improving visual commonsense without harmi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2406.13621","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T15:04:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IVs5G0JOD4UgvYGAiPxmewkfJh/6mtbS5UBiH4dyIi839ScrDc5j0+FuRK6o0H7OkNBB6mA3XU6fGuPyJX4bAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T03:05:26.510290Z"},"content_sha256":"2e2a2511ba0c779fa3f8e218ea1b5f70a2cbd18f579922ac8a5e595d3408f543","schema_version":"1.0","event_id":"sha256:2e2a2511ba0c779fa3f8e218ea1b5f70a2cbd18f579922ac8a5e595d3408f543"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA/bundle.json","state_url":"https://pith.science/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T03:05:26Z","links":{"resolver":"https://pith.science/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA","bundle":"https://pith.science/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA/bundle.json","state":"https://pith.science/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/S5L2URC5ZEDCPHWNOA6RLD3LUA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:S5L2URC5ZEDCPHWNOA6RLD3LUA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"04930fdf8e8782c55db1e2c1aa8993fb1767e9b84afe016ade8dc829e439e4bb","cross_cats_sorted":["cs.CV","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-19T15:17:10Z","title_canon_sha256":"3323804a7f6c9c26d4273267201917dfc865b4ec8dcdb409912136f4cfa59e39"},"schema_version":"1.0","source":{"id":"2406.13621","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2406.13621","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"arxiv_version","alias_value":"2406.13621v2","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2406.13621","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"pith_short_12","alias_value":"S5L2URC5ZEDC","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"pith_short_16","alias_value":"S5L2URC5ZEDCPHWN","created_at":"2026-05-18T15:04:06Z"},{"alias_kind":"pith_short_8","alias_value":"S5L2URC5","created_at":"2026-05-18T15:04:06Z"}],"graph_snapshots":[{"event_id":"sha256:2e2a2511ba0c779fa3f8e218ea1b5f70a2cbd18f579922ac8a5e595d3408f543","target":"graph","created_at":"2026-05-18T15:04:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Commonsense reasoning often requires both textual and visual knowledge, yet Large Language Models (LLMs) trained solely on text lack visual grounding (e.g., \"what color is an emperor penguin's belly?\"). Visual Language Models (VLMs) perform better on visually grounded tasks but face two limitations: (i) often reduced performance on text-only commonsense reasoning compared to text-trained LLMs, and (ii) adapting newly released LLMs to vision input typically requires costly multimodal training. An alternative augments LLMs with test-time visual signals, improving visual commonsense without harmi","authors_text":"Guy Yariv, Idan Schwartz, Sagie Benaim, Yossi Adi","cross_cats":["cs.CV","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-19T15:17:10Z","title":"LaMI: Augmenting Large Language Models via Late Multi-Image Fusion"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2406.13621","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:087306c5e5323065659291176aa535bbdcfb53c44ae8797a6180b2e50b233071","target":"record","created_at":"2026-05-18T15:04:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"04930fdf8e8782c55db1e2c1aa8993fb1767e9b84afe016ade8dc829e439e4bb","cross_cats_sorted":["cs.CV","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2024-06-19T15:17:10Z","title_canon_sha256":"3323804a7f6c9c26d4273267201917dfc865b4ec8dcdb409912136f4cfa59e39"},"schema_version":"1.0","source":{"id":"2406.13621","kind":"arxiv","version":2}},"canonical_sha256":"9757aa445dc906279ecd703d158f6ba01526152019d022966772f137a68c3dbd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9757aa445dc906279ecd703d158f6ba01526152019d022966772f137a68c3dbd","first_computed_at":"2026-05-18T15:04:06.548944Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T15:04:06.548944Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wLwdPeJW/jXG4KB4FbVVI4qIdbLa1Q+Y02YfqclEaeRzTJXS8vH5gKqo2pYgsxcmP7B3AsBKabIjG6MejXTLCw==","signature_status":"signed_v1","signed_at":"2026-05-18T15:04:06.551346Z","signed_message":"canonical_sha256_bytes"},"source_id":"2406.13621","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:087306c5e5323065659291176aa535bbdcfb53c44ae8797a6180b2e50b233071","sha256:2e2a2511ba0c779fa3f8e218ea1b5f70a2cbd18f579922ac8a5e595d3408f543"],"state_sha256":"5623d9144422e243bfb8936b89ba43eb7e74a31e26cc6793629eb32c57da80b1"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JpE+t8oYOEtIVFkMiXevkk1WHI+sEfkZXlBgpvSuEXCy/77/KLmyRbaloR1GEgeeaoDFeFlh4vDlnuXknxiyAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T03:05:26.513843Z","bundle_sha256":"53928d0029cb86306c8ed9b464e15eff3e4b6f6fcd16ca8fdd9933266bc1e9c4"}}