{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:BPM5DLXPZ3GPTD5JTHONLAM3NE","short_pith_number":"pith:BPM5DLXP","canonical_record":{"source":{"id":"2605.14988","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T15:50:25Z","cross_cats_sorted":[],"title_canon_sha256":"5afb4ca929f5de20e8461ed3b9288ecc10723187c8b03f4163ec65273a0f5fb1","abstract_canon_sha256":"e318ebcafe42ee76e3ab5a8d23809421c2976d2333f8a0bc194c77f63d6f722c"},"schema_version":"1.0"},"canonical_sha256":"0bd9d1aeefceccf98fa999dcd5819b693b2774cfcd434b8cb030096c38350566","source":{"kind":"arxiv","id":"2605.14988","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14988","created_at":"2026-05-17T23:38:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14988v1","created_at":"2026-05-17T23:38:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14988","created_at":"2026-05-17T23:38:55Z"},{"alias_kind":"pith_short_12","alias_value":"BPM5DLXPZ3GP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"BPM5DLXPZ3GPTD5J","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"BPM5DLXP","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:BPM5DLXPZ3GPTD5JTHONLAM3NE","target":"record","payload":{"canonical_record":{"source":{"id":"2605.14988","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T15:50:25Z","cross_cats_sorted":[],"title_canon_sha256":"5afb4ca929f5de20e8461ed3b9288ecc10723187c8b03f4163ec65273a0f5fb1","abstract_canon_sha256":"e318ebcafe42ee76e3ab5a8d23809421c2976d2333f8a0bc194c77f63d6f722c"},"schema_version":"1.0"},"canonical_sha256":"0bd9d1aeefceccf98fa999dcd5819b693b2774cfcd434b8cb030096c38350566","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:55.044145Z","signature_b64":"iH8OEkAQRmOTXkRnCKLfnAejKH294jKa4L03l3dOXwVRDgpPxuneOYXiwEKi1QPWzJ19winw+Uqpe7ZuWJtoDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0bd9d1aeefceccf98fa999dcd5819b693b2774cfcd434b8cb030096c38350566","last_reissued_at":"2026-05-17T23:38:55.043644Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:55.043644Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.14988","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kVTUxvb4y4k48XZ//x3LyturORlfk3rHY/EIAcI9Qqe3vvwpVKYEhpOyY988wBv7UCUcGfbG5E9Q0/2Au45xCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T15:03:31.759515Z"},"content_sha256":"5fa7596cf95ad4c993287095d96cacade7159e4c3ac2248b3c521b76c9a742d2","schema_version":"1.0","event_id":"sha256:5fa7596cf95ad4c993287095d96cacade7159e4c3ac2248b3c521b76c9a742d2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:BPM5DLXPZ3GPTD5JTHONLAM3NE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Compositional Video Generation via Inference-Time Guidance","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Amit Edenzon, Ariel Shaulov, Eitan Shaar, Gal Chechik, Lior Wolf","submitted_at":"2026-05-14T15:50:25Z","abstract_excerpt":"Text-to-video diffusion models generate realistic videos, but often fail on prompts requiring fine-grained compositional understanding, such as relations between entities, attributes, actions, and motion directions. We hypothesize that these failures need not be addressed by retraining the generator, but can instead be mitigated by steering the denoising process using the model's own internal grounding signals. We propose \\textbf{CVG}, an inference-time guidance method for improving compositional faithfulness in frozen text-to-video models. Our key observation is that cross-attention maps alre"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.14988","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cLzcNUhNhgTvTo2Jif7NZEPHYmYLa+rGoBGAmJmPgqLn9ufqjF3zaRwBESmYKyuZw9LlYciLFSZobbK3Lrn5DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-03T15:03:31.759875Z"},"content_sha256":"b342f5896fbccd83995f6a85e5a6eac96cff3efc1ad8f03b4dd5061e94f283d4","schema_version":"1.0","event_id":"sha256:b342f5896fbccd83995f6a85e5a6eac96cff3efc1ad8f03b4dd5061e94f283d4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE/bundle.json","state_url":"https://pith.science/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-03T15:03:31Z","links":{"resolver":"https://pith.science/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE","bundle":"https://pith.science/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE/bundle.json","state":"https://pith.science/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/BPM5DLXPZ3GPTD5JTHONLAM3NE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:BPM5DLXPZ3GPTD5JTHONLAM3NE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e318ebcafe42ee76e3ab5a8d23809421c2976d2333f8a0bc194c77f63d6f722c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T15:50:25Z","title_canon_sha256":"5afb4ca929f5de20e8461ed3b9288ecc10723187c8b03f4163ec65273a0f5fb1"},"schema_version":"1.0","source":{"id":"2605.14988","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14988","created_at":"2026-05-17T23:38:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14988v1","created_at":"2026-05-17T23:38:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14988","created_at":"2026-05-17T23:38:55Z"},{"alias_kind":"pith_short_12","alias_value":"BPM5DLXPZ3GP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"BPM5DLXPZ3GPTD5J","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"BPM5DLXP","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:b342f5896fbccd83995f6a85e5a6eac96cff3efc1ad8f03b4dd5061e94f283d4","target":"graph","created_at":"2026-05-17T23:38:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Text-to-video diffusion models generate realistic videos, but often fail on prompts requiring fine-grained compositional understanding, such as relations between entities, attributes, actions, and motion directions. We hypothesize that these failures need not be addressed by retraining the generator, but can instead be mitigated by steering the denoising process using the model's own internal grounding signals. We propose \\textbf{CVG}, an inference-time guidance method for improving compositional faithfulness in frozen text-to-video models. Our key observation is that cross-attention maps alre","authors_text":"Amit Edenzon, Ariel Shaulov, Eitan Shaar, Gal Chechik, Lior Wolf","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T15:50:25Z","title":"Compositional Video Generation via Inference-Time Guidance"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.14988","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5fa7596cf95ad4c993287095d96cacade7159e4c3ac2248b3c521b76c9a742d2","target":"record","created_at":"2026-05-17T23:38:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e318ebcafe42ee76e3ab5a8d23809421c2976d2333f8a0bc194c77f63d6f722c","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-14T15:50:25Z","title_canon_sha256":"5afb4ca929f5de20e8461ed3b9288ecc10723187c8b03f4163ec65273a0f5fb1"},"schema_version":"1.0","source":{"id":"2605.14988","kind":"arxiv","version":1}},"canonical_sha256":"0bd9d1aeefceccf98fa999dcd5819b693b2774cfcd434b8cb030096c38350566","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0bd9d1aeefceccf98fa999dcd5819b693b2774cfcd434b8cb030096c38350566","first_computed_at":"2026-05-17T23:38:55.043644Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:55.043644Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"iH8OEkAQRmOTXkRnCKLfnAejKH294jKa4L03l3dOXwVRDgpPxuneOYXiwEKi1QPWzJ19winw+Uqpe7ZuWJtoDA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:55.044145Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14988","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5fa7596cf95ad4c993287095d96cacade7159e4c3ac2248b3c521b76c9a742d2","sha256:b342f5896fbccd83995f6a85e5a6eac96cff3efc1ad8f03b4dd5061e94f283d4"],"state_sha256":"272d0dedb56510ae1d827a7adb14f062c6fb81642010168deadbd5498e84fe32"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ELW7z9iMMAxvdLsioYbrT/463XzFyKe7iKS35W9tRPb8rG25+VqBFoO40JMgeJUCg4OswDJMY5mIQBA/75y6CQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-03T15:03:31.761807Z","bundle_sha256":"38f8223bd638cf879aa95a0338376843ac3624e0cd7dd8085099bfa1dfbc97ca"}}