{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:AVXRGRV62GYUNBMDUJ3TPMRJ4A","short_pith_number":"pith:AVXRGRV6","canonical_record":{"source":{"id":"2605.22344","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T11:30:29Z","cross_cats_sorted":["cs.AI","cs.MM"],"title_canon_sha256":"f64b978cbed7b5e1b39546a2b000f50920758893e0e748fccdd7b090b986c86f","abstract_canon_sha256":"ed3bc75f5db709753d937e0bbe65f4de99679ea1ad84dc71bdd21e9ba5b36b44"},"schema_version":"1.0"},"canonical_sha256":"056f1346bed1b1468583a27737b229e01940ecc5b1245a0468ff24d2480c3b0a","source":{"kind":"arxiv","id":"2605.22344","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.22344","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"arxiv_version","alias_value":"2605.22344v1","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22344","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"pith_short_12","alias_value":"AVXRGRV62GYU","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"pith_short_16","alias_value":"AVXRGRV62GYUNBMD","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"pith_short_8","alias_value":"AVXRGRV6","created_at":"2026-05-22T01:04:38Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:AVXRGRV62GYUNBMDUJ3TPMRJ4A","target":"record","payload":{"canonical_record":{"source":{"id":"2605.22344","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T11:30:29Z","cross_cats_sorted":["cs.AI","cs.MM"],"title_canon_sha256":"f64b978cbed7b5e1b39546a2b000f50920758893e0e748fccdd7b090b986c86f","abstract_canon_sha256":"ed3bc75f5db709753d937e0bbe65f4de99679ea1ad84dc71bdd21e9ba5b36b44"},"schema_version":"1.0"},"canonical_sha256":"056f1346bed1b1468583a27737b229e01940ecc5b1245a0468ff24d2480c3b0a","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:38.839124Z","signature_b64":"R8bvnkf5v0G0CC7lSysWLBfFNP2f5ZP72ntIxS5QhybJdTaapZC3rGxKkjbfzrJTgQHoKcFRc/2hbmJbK6CbAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"056f1346bed1b1468583a27737b229e01940ecc5b1245a0468ff24d2480c3b0a","last_reissued_at":"2026-05-22T01:04:38.838406Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:38.838406Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.22344","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2BY2SBJ2Ad95TMgwPa5P74EDCSwgjAuonEbktrDqIh8AzQzOo8ZW6R5rVY3MxeppfeHShmhYy3F6JGGk0eyLBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T23:56:11.863119Z"},"content_sha256":"f3bb94fc2d9089ed38ee70342b14f0b4a5c8f469473fff2930952c7993bce1c8","schema_version":"1.0","event_id":"sha256:f3bb94fc2d9089ed38ee70342b14f0b4a5c8f469473fff2930952c7993bce1c8"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:AVXRGRV62GYUNBMDUJ3TPMRJ4A","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bernini: Latent Semantic Planning for Video Diffusion","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.MM"],"primary_cat":"cs.CV","authors_text":"Bernini Team: Chenchen Liu, Ge Bai, Junyi Chen, Lei Li, Lu Chi, Mingzhen Sun, Ruoyu Guo, Yi Fu, Yiheng Wu, Zehuan Yuan, Zhuoying Li","submitted_at":"2026-05-21T11:30:29Z","abstract_excerpt":"Multimodal large language models (MLLMs) and diffusion models have each reached remarkable maturity: MLLMs excel at reasoning over heterogeneous multimodal inputs with strong semantic grounding, while diffusion models synthesize images and videos with photorealistic fidelity. We argue that these two families can be unified through a simple division of labor: MLLMs perform semantic planning, while diffusion models render pixels from high-level semantic guidance and low-level visual features. Building on this idea, we propose Bernini, a unified framework for video generation and editing. An MLLM"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22344","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.22344/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:04:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8JK006UT7uXfC7GSCj6T8g1Q8H/1TL1LmIrpJTrFeJEn5yP0krJqfDxiypFCzxXYYD/mRyl6hZnSuT/0jO5YAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T23:56:11.863670Z"},"content_sha256":"5520ed1917dd4e1f900e71b63e9288bcdda386d6a1aa1413149ebd20360a1d98","schema_version":"1.0","event_id":"sha256:5520ed1917dd4e1f900e71b63e9288bcdda386d6a1aa1413149ebd20360a1d98"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A/bundle.json","state_url":"https://pith.science/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T23:56:11Z","links":{"resolver":"https://pith.science/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A","bundle":"https://pith.science/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A/bundle.json","state":"https://pith.science/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AVXRGRV62GYUNBMDUJ3TPMRJ4A/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AVXRGRV62GYUNBMDUJ3TPMRJ4A","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ed3bc75f5db709753d937e0bbe65f4de99679ea1ad84dc71bdd21e9ba5b36b44","cross_cats_sorted":["cs.AI","cs.MM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T11:30:29Z","title_canon_sha256":"f64b978cbed7b5e1b39546a2b000f50920758893e0e748fccdd7b090b986c86f"},"schema_version":"1.0","source":{"id":"2605.22344","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.22344","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"arxiv_version","alias_value":"2605.22344v1","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22344","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"pith_short_12","alias_value":"AVXRGRV62GYU","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"pith_short_16","alias_value":"AVXRGRV62GYUNBMD","created_at":"2026-05-22T01:04:38Z"},{"alias_kind":"pith_short_8","alias_value":"AVXRGRV6","created_at":"2026-05-22T01:04:38Z"}],"graph_snapshots":[{"event_id":"sha256:5520ed1917dd4e1f900e71b63e9288bcdda386d6a1aa1413149ebd20360a1d98","target":"graph","created_at":"2026-05-22T01:04:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.22344/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Multimodal large language models (MLLMs) and diffusion models have each reached remarkable maturity: MLLMs excel at reasoning over heterogeneous multimodal inputs with strong semantic grounding, while diffusion models synthesize images and videos with photorealistic fidelity. We argue that these two families can be unified through a simple division of labor: MLLMs perform semantic planning, while diffusion models render pixels from high-level semantic guidance and low-level visual features. Building on this idea, we propose Bernini, a unified framework for video generation and editing. An MLLM","authors_text":"Bernini Team: Chenchen Liu, Ge Bai, Junyi Chen, Lei Li, Lu Chi, Mingzhen Sun, Ruoyu Guo, Yi Fu, Yiheng Wu, Zehuan Yuan, Zhuoying Li","cross_cats":["cs.AI","cs.MM"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T11:30:29Z","title":"Bernini: Latent Semantic Planning for Video Diffusion"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22344","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f3bb94fc2d9089ed38ee70342b14f0b4a5c8f469473fff2930952c7993bce1c8","target":"record","created_at":"2026-05-22T01:04:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ed3bc75f5db709753d937e0bbe65f4de99679ea1ad84dc71bdd21e9ba5b36b44","cross_cats_sorted":["cs.AI","cs.MM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T11:30:29Z","title_canon_sha256":"f64b978cbed7b5e1b39546a2b000f50920758893e0e748fccdd7b090b986c86f"},"schema_version":"1.0","source":{"id":"2605.22344","kind":"arxiv","version":1}},"canonical_sha256":"056f1346bed1b1468583a27737b229e01940ecc5b1245a0468ff24d2480c3b0a","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"056f1346bed1b1468583a27737b229e01940ecc5b1245a0468ff24d2480c3b0a","first_computed_at":"2026-05-22T01:04:38.838406Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:04:38.838406Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"R8bvnkf5v0G0CC7lSysWLBfFNP2f5ZP72ntIxS5QhybJdTaapZC3rGxKkjbfzrJTgQHoKcFRc/2hbmJbK6CbAw==","signature_status":"signed_v1","signed_at":"2026-05-22T01:04:38.839124Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.22344","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f3bb94fc2d9089ed38ee70342b14f0b4a5c8f469473fff2930952c7993bce1c8","sha256:5520ed1917dd4e1f900e71b63e9288bcdda386d6a1aa1413149ebd20360a1d98"],"state_sha256":"d404d6fffe45bf7766e580387491f5e966e65280ddeff8671a71550d4cdbf3c4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W9xKQCYaQ3PEl8ir8vgZjoj4/tDvtW7r8yIHwu1ASbJH54w/GRrEY5zs6DJE3Gn0J3ff6+j2Bz019rvLuwHODg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T23:56:11.867060Z","bundle_sha256":"61ef133103c62ae96eafbf2c260875dccf6a0013a233234bf67d60d96aee9168"}}