{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:UMTFWY7FF7LPB4AG4LC3PPN4NN","short_pith_number":"pith:UMTFWY7F","canonical_record":{"source":{"id":"2311.10122","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-11-16T10:59:44Z","cross_cats_sorted":[],"title_canon_sha256":"169b24f471d2208db1ce36173b5691902e0fd44518285d76760c7236864b0685","abstract_canon_sha256":"c80295a5762ecdeb6e65c5f49691842dea7b0fe27da82d7476d300d10c866324"},"schema_version":"1.0"},"canonical_sha256":"a3265b63e52fd6f0f006e2c5b7bdbc6b59ff9c3187e6dd8a99d4b5d12ca0d596","source":{"kind":"arxiv","id":"2311.10122","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2311.10122","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"arxiv_version","alias_value":"2311.10122v3","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2311.10122","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"pith_short_12","alias_value":"UMTFWY7FF7LP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"UMTFWY7FF7LPB4AG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"UMTFWY7F","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:UMTFWY7FF7LPB4AG4LC3PPN4NN","target":"record","payload":{"canonical_record":{"source":{"id":"2311.10122","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-11-16T10:59:44Z","cross_cats_sorted":[],"title_canon_sha256":"169b24f471d2208db1ce36173b5691902e0fd44518285d76760c7236864b0685","abstract_canon_sha256":"c80295a5762ecdeb6e65c5f49691842dea7b0fe27da82d7476d300d10c866324"},"schema_version":"1.0"},"canonical_sha256":"a3265b63e52fd6f0f006e2c5b7bdbc6b59ff9c3187e6dd8a99d4b5d12ca0d596","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:22.232498Z","signature_b64":"Mv5IzXuboGLvQpANpwzEHBK9a0MKyollJFy2nrJXcRvXtzJS2UTaxRQIE3PObq5hhWM/S8YcNpdVIX3txITnDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a3265b63e52fd6f0f006e2c5b7bdbc6b59ff9c3187e6dd8a99d4b5d12ca0d596","last_reissued_at":"2026-05-17T23:39:22.231807Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:22.231807Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2311.10122","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UE0BaMD4pWrWcQzPqY5hpCcNgL5LLXqGpNke1sma7oRhigFOfVCps/ZzX6m5fF6aGeAUe/Dwq/6nU5pzZD2qAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T23:33:58.576806Z"},"content_sha256":"d0f59a9cd3597f4e3d3942e1ed1ed738f11cd57c39409bc493ec4d29e23811c0","schema_version":"1.0","event_id":"sha256:d0f59a9cd3597f4e3d3942e1ed1ed738f11cd57c39409bc493ec4d29e23811c0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:UMTFWY7FF7LPB4AG4LC3PPN4NN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Video-LLaVA: Learning United Visual Representation by Alignment Before Projection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"By aligning images and videos into the language feature space before projection, a single LLM processes both modalities and lets them improve each other.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bin Lin, Bin Zhu, Jiaxi Cui, Li Yuan, Munan Ning, Peng Jin, Yang Ye","submitted_at":"2023-11-16T10:59:44Z","abstract_excerpt":"The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in visual-language understanding. Most existing approaches encode images and videos into separate feature spaces, which are then fed as inputs to large language models. However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it becomes challenging for a Large Language Model (LLM) to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the found"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA, which learns from a mixed dataset of images and videos, mutually enhancing each other.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"due to the lack of unified tokenization for images and videos, namely misalignment before projection, it becomes challenging for a Large Language Model (LLM) to learn multi-modal interactions from several poor projection layers.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Video-LLaVA creates a unified visual representation for images and videos via pre-projection alignment, enabling mutual enhancement from joint training and strong results on image and video benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"By aligning images and videos into the language feature space before projection, a single LLM processes both modalities and lets them improve each other.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"f2b3bac15ee6a45f3d1362e8edf3ba5003a44ec70f4824434b8a35147eb74b6c"},"source":{"id":"2311.10122","kind":"arxiv","version":3},"verdict":{"id":"5a5bd3af-90b1-4325-aaba-e920b4087d41","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T18:00:44.719539Z","strongest_claim":"we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA, which learns from a mixed dataset of images and videos, mutually enhancing each other.","one_line_summary":"Video-LLaVA creates a unified visual representation for images and videos via pre-projection alignment, enabling mutual enhancement from joint training and strong results on image and video benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"due to the lack of unified tokenization for images and videos, namely misalignment before projection, it becomes challenging for a Large Language Model (LLM) to learn multi-modal interactions from several poor projection layers.","pith_extraction_headline":"By aligning images and videos into the language feature space before projection, a single LLM processes both modalities and lets them improve each other."},"references":{"count":87,"sample":[{"doi":"","year":2022,"title":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model","work_id":"af714d03-fb34-46cc-9760-9ea257b01f78","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Max Bain, Arsha Nagrani, G \\\"u l Varol, and Andrew Zisserman. 2021. Frozen in time: A joint video and image encoder for end-to-end retrieval. In Proceedings of the IEEE/CVF International Conference on","work_id":"22f85f0f-8960-47a7-905b-bcc8a2bd58d4","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot lear","work_id":"50684699-ce18-4086-8bac-7cecd178fad0","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2011,"title":"David Chen and William B Dolan. 2011. Collecting highly parallel data for paraphrase evaluation. In Proceedings of the 49th annual meeting of the association for computational linguistics: human langu","work_id":"a7625cc6-f851-46ec-8b30-c2ba62f4d93a","ref_index":6,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2023,"title":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al. 2023. Vicuna: An open-source chatbot impressing gpt-4 wi","work_id":"32e12fcf-bb8e-4e6a-a249-13cd0e7d6e3f","ref_index":8,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":87,"snapshot_sha256":"dbcc5bd6d4f0f7258d0fe8ec6e440f6997594d8719a2176a8137e66cc8a4a412","internal_anchors":28},"formal_canon":{"evidence_count":1,"snapshot_sha256":"3c56d0259743582676dd476e1e36786cab235bdfc345a80f6bb28cfddd36bd3d"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"5a5bd3af-90b1-4325-aaba-e920b4087d41"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6WRUf0uMOxzTowLzY9TdGWoJ0EuP4D6ktt6pAFjh9hCubkd/hmka+z3kuYN5pZiTSnLrh3QrYeNEL7fRa23hDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T23:33:58.577880Z"},"content_sha256":"3024a27500d4166116b3d9255536ef759b6273ecd6167862b5057b217375dacd","schema_version":"1.0","event_id":"sha256:3024a27500d4166116b3d9255536ef759b6273ecd6167862b5057b217375dacd"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN/bundle.json","state_url":"https://pith.science/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T23:33:58Z","links":{"resolver":"https://pith.science/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN","bundle":"https://pith.science/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN/bundle.json","state":"https://pith.science/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UMTFWY7FF7LPB4AG4LC3PPN4NN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:UMTFWY7FF7LPB4AG4LC3PPN4NN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c80295a5762ecdeb6e65c5f49691842dea7b0fe27da82d7476d300d10c866324","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-11-16T10:59:44Z","title_canon_sha256":"169b24f471d2208db1ce36173b5691902e0fd44518285d76760c7236864b0685"},"schema_version":"1.0","source":{"id":"2311.10122","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2311.10122","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"arxiv_version","alias_value":"2311.10122v3","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2311.10122","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"pith_short_12","alias_value":"UMTFWY7FF7LP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"UMTFWY7FF7LPB4AG","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"UMTFWY7F","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:3024a27500d4166116b3d9255536ef759b6273ecd6167862b5057b217375dacd","target":"graph","created_at":"2026-05-17T23:39:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA, which learns from a mixed dataset of images and videos, mutually enhancing each other."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"due to the lack of unified tokenization for images and videos, namely misalignment before projection, it becomes challenging for a Large Language Model (LLM) to learn multi-modal interactions from several poor projection layers."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Video-LLaVA creates a unified visual representation for images and videos via pre-projection alignment, enabling mutual enhancement from joint training and strong results on image and video benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"By aligning images and videos into the language feature space before projection, a single LLM processes both modalities and lets them improve each other."}],"snapshot_sha256":"f2b3bac15ee6a45f3d1362e8edf3ba5003a44ec70f4824434b8a35147eb74b6c"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"3c56d0259743582676dd476e1e36786cab235bdfc345a80f6bb28cfddd36bd3d"},"paper":{"abstract_excerpt":"The Large Vision-Language Model (LVLM) has enhanced the performance of various downstream tasks in visual-language understanding. Most existing approaches encode images and videos into separate feature spaces, which are then fed as inputs to large language models. However, due to the lack of unified tokenization for images and videos, namely misalignment before projection, it becomes challenging for a Large Language Model (LLM) to learn multi-modal interactions from several poor projection layers. In this work, we unify visual representation into the language feature space to advance the found","authors_text":"Bin Lin, Bin Zhu, Jiaxi Cui, Li Yuan, Munan Ning, Peng Jin, Yang Ye","cross_cats":[],"headline":"By aligning images and videos into the language feature space before projection, a single LLM processes both modalities and lets them improve each other.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-11-16T10:59:44Z","title":"Video-LLaVA: Learning United Visual Representation by Alignment Before Projection"},"references":{"count":87,"internal_anchors":28,"resolved_work":87,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al. 2022. Flamingo: a visual language model","work_id":"af714d03-fb34-46cc-9760-9ea257b01f78","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Max Bain, Arsha Nagrani, G \\\"u l Varol, and Andrew Zisserman. 2021. Frozen in time: A joint video and image encoder for end-to-end retrieval. In Proceedings of the IEEE/CVF International Conference on","work_id":"22f85f0f-8960-47a7-905b-bcc8a2bd58d4","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, et al. 2020. Language models are few-shot lear","work_id":"50684699-ce18-4086-8bac-7cecd178fad0","year":2020},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":6,"title":"David Chen and William B Dolan. 2011. Collecting highly parallel data for paraphrase evaluation. In Proceedings of the 49th annual meeting of the association for computational linguistics: human langu","work_id":"a7625cc6-f851-46ec-8b30-c2ba62f4d93a","year":2011},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":8,"title":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al. 2023. Vicuna: An open-source chatbot impressing gpt-4 wi","work_id":"32e12fcf-bb8e-4e6a-a249-13cd0e7d6e3f","year":2023}],"snapshot_sha256":"dbcc5bd6d4f0f7258d0fe8ec6e440f6997594d8719a2176a8137e66cc8a4a412"},"source":{"id":"2311.10122","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-14T18:00:44.719539Z","id":"5a5bd3af-90b1-4325-aaba-e920b4087d41","model_set":{"reader":"grok-4.3"},"one_line_summary":"Video-LLaVA creates a unified visual representation for images and videos via pre-projection alignment, enabling mutual enhancement from joint training and strong results on image and video benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"By aligning images and videos into the language feature space before projection, a single LLM processes both modalities and lets them improve each other.","strongest_claim":"we unify visual representation into the language feature space to advance the foundational LLM towards a unified LVLM. As a result, we establish a simple but robust LVLM baseline, Video-LLaVA, which learns from a mixed dataset of images and videos, mutually enhancing each other.","weakest_assumption":"due to the lack of unified tokenization for images and videos, namely misalignment before projection, it becomes challenging for a Large Language Model (LLM) to learn multi-modal interactions from several poor projection layers."}},"verdict_id":"5a5bd3af-90b1-4325-aaba-e920b4087d41"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d0f59a9cd3597f4e3d3942e1ed1ed738f11cd57c39409bc493ec4d29e23811c0","target":"record","created_at":"2026-05-17T23:39:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c80295a5762ecdeb6e65c5f49691842dea7b0fe27da82d7476d300d10c866324","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-11-16T10:59:44Z","title_canon_sha256":"169b24f471d2208db1ce36173b5691902e0fd44518285d76760c7236864b0685"},"schema_version":"1.0","source":{"id":"2311.10122","kind":"arxiv","version":3}},"canonical_sha256":"a3265b63e52fd6f0f006e2c5b7bdbc6b59ff9c3187e6dd8a99d4b5d12ca0d596","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a3265b63e52fd6f0f006e2c5b7bdbc6b59ff9c3187e6dd8a99d4b5d12ca0d596","first_computed_at":"2026-05-17T23:39:22.231807Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:22.231807Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Mv5IzXuboGLvQpANpwzEHBK9a0MKyollJFy2nrJXcRvXtzJS2UTaxRQIE3PObq5hhWM/S8YcNpdVIX3txITnDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:22.232498Z","signed_message":"canonical_sha256_bytes"},"source_id":"2311.10122","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d0f59a9cd3597f4e3d3942e1ed1ed738f11cd57c39409bc493ec4d29e23811c0","sha256:3024a27500d4166116b3d9255536ef759b6273ecd6167862b5057b217375dacd"],"state_sha256":"9cae2702f38af1ec1faa6ab55660e266b736728c5ef9e6c373d2892699ca4403"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kgoldqJD0/tBaK8xNiA+tOugcG8A+G2TA5xb5Gb2Lm9jsgQtwGG/5IH5fgoM8ZsjD6ulMZNYqj+B8WrjyMRLAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T23:33:58.582264Z","bundle_sha256":"373c87b8892835e752555c7eab0fe7c73b4ea2f5b2627b3b707ecba2feca106d"}}