{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:RQBKPCVERAPPALYBQZY6HRHF5K","short_pith_number":"pith:RQBKPCVE","schema_version":"1.0","canonical_sha256":"8c02a78aa4881ef02f018671e3c4e5ea9c89ac509cf69e09d05a96ffe544cf6e","source":{"kind":"arxiv","id":"2310.15247","version":1},"attestation_state":"computed","paper":{"title":"SyncFusion: Multimodal Onset-synchronized Video-to-Audio Foley Synthesis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV","cs.LG","cs.MM","eess.AS"],"primary_cat":"cs.SD","authors_text":"Danilo Comminiello, Emanuele Rodol\\`a, Emilian Postolache, Joshua D. Reiss, Marco Comunit\\`a, Riccardo F. Gramaccioni","submitted_at":"2023-10-23T18:01:36Z","abstract_excerpt":"Sound design involves creatively selecting, recording, and editing sound effects for various media like cinema, video games, and virtual/augmented reality. One of the most time-consuming steps when designing sound is synchronizing audio with video. In some cases, environmental recordings from video shoots are available, which can aid in the process. However, in video games and animations, no reference audio exists, requiring manual annotation of event timings from the video. We propose a system to extract repetitive actions onsets from a video, which are then used - in conjunction with audio o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2310.15247","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2023-10-23T18:01:36Z","cross_cats_sorted":["cs.CV","cs.LG","cs.MM","eess.AS"],"title_canon_sha256":"8acc98361e728934f6e55434c3841ef86ae47296595d457bb42d8117a0086f59","abstract_canon_sha256":"dbae0792a48e4c0bb693e6fe40dd8de38cfed09bc53cdfcc563f647cc577b241"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T07:04:23.334468Z","signature_b64":"Jsp8ibZshdRhBYMc9IbVnot7POaM1ZrnpjvTfGB/W52GMvBtgzUWJXtnHdh2Ig5toZvGCi3o/sCytYkG/2y0Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8c02a78aa4881ef02f018671e3c4e5ea9c89ac509cf69e09d05a96ffe544cf6e","last_reissued_at":"2026-07-05T07:04:23.334071Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T07:04:23.334071Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SyncFusion: Multimodal Onset-synchronized Video-to-Audio Foley Synthesis","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CV","cs.LG","cs.MM","eess.AS"],"primary_cat":"cs.SD","authors_text":"Danilo Comminiello, Emanuele Rodol\\`a, Emilian Postolache, Joshua D. Reiss, Marco Comunit\\`a, Riccardo F. Gramaccioni","submitted_at":"2023-10-23T18:01:36Z","abstract_excerpt":"Sound design involves creatively selecting, recording, and editing sound effects for various media like cinema, video games, and virtual/augmented reality. One of the most time-consuming steps when designing sound is synchronizing audio with video. In some cases, environmental recordings from video shoots are available, which can aid in the process. However, in video games and animations, no reference audio exists, requiring manual annotation of event timings from the video. We propose a system to extract repetitive actions onsets from a video, which are then used - in conjunction with audio o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2310.15247","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2310.15247/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2310.15247","created_at":"2026-07-05T07:04:23.334126+00:00"},{"alias_kind":"arxiv_version","alias_value":"2310.15247v1","created_at":"2026-07-05T07:04:23.334126+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2310.15247","created_at":"2026-07-05T07:04:23.334126+00:00"},{"alias_kind":"pith_short_12","alias_value":"RQBKPCVERAPP","created_at":"2026-07-05T07:04:23.334126+00:00"},{"alias_kind":"pith_short_16","alias_value":"RQBKPCVERAPPALYB","created_at":"2026-07-05T07:04:23.334126+00:00"},{"alias_kind":"pith_short_8","alias_value":"RQBKPCVE","created_at":"2026-07-05T07:04:23.334126+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K","json":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K.json","graph_json":"https://pith.science/api/pith-number/RQBKPCVERAPPALYBQZY6HRHF5K/graph.json","events_json":"https://pith.science/api/pith-number/RQBKPCVERAPPALYBQZY6HRHF5K/events.json","paper":"https://pith.science/paper/RQBKPCVE"},"agent_actions":{"view_html":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K","download_json":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K.json","view_paper":"https://pith.science/paper/RQBKPCVE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2310.15247&json=true","fetch_graph":"https://pith.science/api/pith-number/RQBKPCVERAPPALYBQZY6HRHF5K/graph.json","fetch_events":"https://pith.science/api/pith-number/RQBKPCVERAPPALYBQZY6HRHF5K/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K/action/storage_attestation","attest_author":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K/action/author_attestation","sign_citation":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K/action/citation_signature","submit_replication":"https://pith.science/pith/RQBKPCVERAPPALYBQZY6HRHF5K/action/replication_record"}},"created_at":"2026-07-05T07:04:23.334126+00:00","updated_at":"2026-07-05T07:04:23.334126+00:00"}