{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:244N6LUAE5EOMS2MABMCSN73IR","short_pith_number":"pith:244N6LUA","canonical_record":{"source":{"id":"2606.07182","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T11:46:28Z","cross_cats_sorted":[],"title_canon_sha256":"bcdb31f7c7e24e7c2ee1067af2d13138f6be4f6e6462a534b171860de21a5a90","abstract_canon_sha256":"421a7db6beab604d53d31cb7061e0323bf18d23f903d898f2e736283b8a54279"},"schema_version":"1.0"},"canonical_sha256":"d738df2e802748e64b4c00582937fb444fd079bf7e6af2fec9f10c013b3cf833","source":{"kind":"arxiv","id":"2606.07182","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07182","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07182v1","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07182","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"244N6LUAE5EO","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"244N6LUAE5EOMS2M","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"244N6LUA","created_at":"2026-06-08T01:04:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:244N6LUAE5EOMS2MABMCSN73IR","target":"record","payload":{"canonical_record":{"source":{"id":"2606.07182","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T11:46:28Z","cross_cats_sorted":[],"title_canon_sha256":"bcdb31f7c7e24e7c2ee1067af2d13138f6be4f6e6462a534b171860de21a5a90","abstract_canon_sha256":"421a7db6beab604d53d31cb7061e0323bf18d23f903d898f2e736283b8a54279"},"schema_version":"1.0"},"canonical_sha256":"d738df2e802748e64b4c00582937fb444fd079bf7e6af2fec9f10c013b3cf833","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-08T01:04:51.283692Z","signature_b64":"gwzjKD+UG/yHHxwryXJ/3Vj0MN04KkHGZM+9NIdqHfFnU1J44X+4y2RiobwXe0R2lm/IKY8fxIsOu+QkPsfhAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d738df2e802748e64b4c00582937fb444fd079bf7e6af2fec9f10c013b3cf833","last_reissued_at":"2026-06-08T01:04:51.282864Z","signature_status":"signed_v1","first_computed_at":"2026-06-08T01:04:51.282864Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.07182","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:04:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"s0TdHWpJSsqbJH9h54EIns5Khq3EvzJKixfiq1tkiuZSXr1p4PkwT/SbifnnJafpz7bluBxlYi3/KzKxyy4+BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-20T06:19:40.284458Z"},"content_sha256":"a74b1236fb1bc44bb279a8de0d4656ba3b1dbfcd6b3de1c13352ba7fbca3599a","schema_version":"1.0","event_id":"sha256:a74b1236fb1bc44bb279a8de0d4656ba3b1dbfcd6b3de1c13352ba7fbca3599a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:244N6LUAE5EOMS2MABMCSN73IR","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Audio Imitator: Controlling Timbre and Tempo in Video2Audio Synthesis with Audio Reference","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"eess.AS","authors_text":"Cheng Gong, Chunyu Qiang, Feng Deng, Jiahui Zhao, Longbiao Wang, Tianrui Wang, Xijuan Zeng","submitted_at":"2026-06-05T11:46:28Z","abstract_excerpt":"Video-to-audio generation has made significant progress in achieving semantic consistency and temporal alignment from silent videos. However, audio contains rich stylistic attributes such as timbre and tempo that are difficult to infer from visual and textual inputs alone. While reference audio can serve as additional conditioning, it is typically treated as a holistic signal, limiting fine-grained style control. We propose AudioIM, an attribute-aware framework that explicitly models timbre and tempo as separate control factors rather than relying on holistic prompt conditioning. Dual encoders"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07182","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.07182/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-08T01:04:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ra3tTHBjevT5zFfB1747TOU2ZiiKhfHJd/2/pAobdkIJ2S9j70ZX3ykvMjtIPzagqM1BxMUslBjcSUkIFS16Aw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-20T06:19:40.284834Z"},"content_sha256":"90bf23c46ce756241348931cb6a82ad8f3176f2b943ce8017112dc7f237b7cc7","schema_version":"1.0","event_id":"sha256:90bf23c46ce756241348931cb6a82ad8f3176f2b943ce8017112dc7f237b7cc7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/244N6LUAE5EOMS2MABMCSN73IR/bundle.json","state_url":"https://pith.science/pith/244N6LUAE5EOMS2MABMCSN73IR/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/244N6LUAE5EOMS2MABMCSN73IR/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-20T06:19:40Z","links":{"resolver":"https://pith.science/pith/244N6LUAE5EOMS2MABMCSN73IR","bundle":"https://pith.science/pith/244N6LUAE5EOMS2MABMCSN73IR/bundle.json","state":"https://pith.science/pith/244N6LUAE5EOMS2MABMCSN73IR/state.json","well_known_bundle":"https://pith.science/.well-known/pith/244N6LUAE5EOMS2MABMCSN73IR/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:244N6LUAE5EOMS2MABMCSN73IR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"421a7db6beab604d53d31cb7061e0323bf18d23f903d898f2e736283b8a54279","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T11:46:28Z","title_canon_sha256":"bcdb31f7c7e24e7c2ee1067af2d13138f6be4f6e6462a534b171860de21a5a90"},"schema_version":"1.0","source":{"id":"2606.07182","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.07182","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2606.07182v1","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.07182","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"244N6LUAE5EO","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"244N6LUAE5EOMS2M","created_at":"2026-06-08T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"244N6LUA","created_at":"2026-06-08T01:04:51Z"}],"graph_snapshots":[{"event_id":"sha256:90bf23c46ce756241348931cb6a82ad8f3176f2b943ce8017112dc7f237b7cc7","target":"graph","created_at":"2026-06-08T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.07182/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Video-to-audio generation has made significant progress in achieving semantic consistency and temporal alignment from silent videos. However, audio contains rich stylistic attributes such as timbre and tempo that are difficult to infer from visual and textual inputs alone. While reference audio can serve as additional conditioning, it is typically treated as a holistic signal, limiting fine-grained style control. We propose AudioIM, an attribute-aware framework that explicitly models timbre and tempo as separate control factors rather than relying on holistic prompt conditioning. Dual encoders","authors_text":"Cheng Gong, Chunyu Qiang, Feng Deng, Jiahui Zhao, Longbiao Wang, Tianrui Wang, Xijuan Zeng","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T11:46:28Z","title":"Audio Imitator: Controlling Timbre and Tempo in Video2Audio Synthesis with Audio Reference"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.07182","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a74b1236fb1bc44bb279a8de0d4656ba3b1dbfcd6b3de1c13352ba7fbca3599a","target":"record","created_at":"2026-06-08T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"421a7db6beab604d53d31cb7061e0323bf18d23f903d898f2e736283b8a54279","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"eess.AS","submitted_at":"2026-06-05T11:46:28Z","title_canon_sha256":"bcdb31f7c7e24e7c2ee1067af2d13138f6be4f6e6462a534b171860de21a5a90"},"schema_version":"1.0","source":{"id":"2606.07182","kind":"arxiv","version":1}},"canonical_sha256":"d738df2e802748e64b4c00582937fb444fd079bf7e6af2fec9f10c013b3cf833","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"d738df2e802748e64b4c00582937fb444fd079bf7e6af2fec9f10c013b3cf833","first_computed_at":"2026-06-08T01:04:51.282864Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:04:51.282864Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gwzjKD+UG/yHHxwryXJ/3Vj0MN04KkHGZM+9NIdqHfFnU1J44X+4y2RiobwXe0R2lm/IKY8fxIsOu+QkPsfhAw==","signature_status":"signed_v1","signed_at":"2026-06-08T01:04:51.283692Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.07182","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a74b1236fb1bc44bb279a8de0d4656ba3b1dbfcd6b3de1c13352ba7fbca3599a","sha256:90bf23c46ce756241348931cb6a82ad8f3176f2b943ce8017112dc7f237b7cc7"],"state_sha256":"bc517975a1f8dc42d15c90c3f30bccd16ef1fb158e46ef262bdf2c445b521ce4"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mj/eGRS2wTc4MDTt9mFnsmbYPWWxzcmjLfkXX12yAi0fFpSyfo/BfUL99dyNcp86+jQkioEIJ4woaeW9ZtTUCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-20T06:19:40.286867Z","bundle_sha256":"7ccdc1c54276e42e372ed9af06ee8715e142f1113629c0367f81a6ce86597128"}}