{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:SBAEC6BK2JXDCDZCKDISLLMFIN","short_pith_number":"pith:SBAEC6BK","schema_version":"1.0","canonical_sha256":"904041782ad26e310f2250d125ad854343b6414b54954fc3c82699d45c6c06c2","source":{"kind":"arxiv","id":"1803.03849","version":1},"attestation_state":"computed","paper":{"title":"Learning to Localize Sound Source in Visual Scenes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MM"],"primary_cat":"cs.CV","authors_text":"Arda Senocak, In So Kweon, Junsik Kim, Ming-Hsuan Yang, Tae-Hyun Oh","submitted_at":"2018-03-10T18:19:02Z","abstract_excerpt":"Visual events are usually accompanied by sounds in our daily lives. We pose the question: Can the machine learn the correspondence between visual scene and the sound, and localize the sound source only by observing sound and visual scene pairs like human? In this paper, we propose a novel unsupervised algorithm to address the problem of localizing the sound source in visual scenes. A two-stream network structure which handles each modality, with attention mechanism is developed for sound source localization. Moreover, although our network is formulated within the unsupervised learning framewor"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1803.03849","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-03-10T18:19:02Z","cross_cats_sorted":["cs.AI","cs.MM"],"title_canon_sha256":"1ef374221d35fc09b9d6742e7bce58df9b2b4b8eab1400e30d30e5e041754aff","abstract_canon_sha256":"573b5d9a692c282a6678ec14e140814cd89cd8dcaba53b05eec5f6dca6141eaa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:53:57.864202Z","signature_b64":"pwShC5mDGs/UKFyzc8dCGtE1CleDmF28oy48qhIHnO8g9PYHhcMcVleqdi8V9K+NHSbKI4Syuy1rM0MWD+J0DA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"904041782ad26e310f2250d125ad854343b6414b54954fc3c82699d45c6c06c2","last_reissued_at":"2026-05-17T23:53:57.863479Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:53:57.863479Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning to Localize Sound Source in Visual Scenes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.MM"],"primary_cat":"cs.CV","authors_text":"Arda Senocak, In So Kweon, Junsik Kim, Ming-Hsuan Yang, Tae-Hyun Oh","submitted_at":"2018-03-10T18:19:02Z","abstract_excerpt":"Visual events are usually accompanied by sounds in our daily lives. We pose the question: Can the machine learn the correspondence between visual scene and the sound, and localize the sound source only by observing sound and visual scene pairs like human? In this paper, we propose a novel unsupervised algorithm to address the problem of localizing the sound source in visual scenes. A two-stream network structure which handles each modality, with attention mechanism is developed for sound source localization. Moreover, although our network is formulated within the unsupervised learning framewor"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.03849","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1803.03849","created_at":"2026-05-17T23:53:57.863604+00:00"},{"alias_kind":"arxiv_version","alias_value":"1803.03849v1","created_at":"2026-05-17T23:53:57.863604+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.03849","created_at":"2026-05-17T23:53:57.863604+00:00"},{"alias_kind":"pith_short_12","alias_value":"SBAEC6BK2JXD","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_16","alias_value":"SBAEC6BK2JXDCDZC","created_at":"2026-05-18T12:32:50.500415+00:00"},{"alias_kind":"pith_short_8","alias_value":"SBAEC6BK","created_at":"2026-05-18T12:32:50.500415+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN","json":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN.json","graph_json":"https://pith.science/api/pith-number/SBAEC6BK2JXDCDZCKDISLLMFIN/graph.json","events_json":"https://pith.science/api/pith-number/SBAEC6BK2JXDCDZCKDISLLMFIN/events.json","paper":"https://pith.science/paper/SBAEC6BK"},"agent_actions":{"view_html":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN","download_json":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN.json","view_paper":"https://pith.science/paper/SBAEC6BK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1803.03849&json=true","fetch_graph":"https://pith.science/api/pith-number/SBAEC6BK2JXDCDZCKDISLLMFIN/graph.json","fetch_events":"https://pith.science/api/pith-number/SBAEC6BK2JXDCDZCKDISLLMFIN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN/action/storage_attestation","attest_author":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN/action/author_attestation","sign_citation":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN/action/citation_signature","submit_replication":"https://pith.science/pith/SBAEC6BK2JXDCDZCKDISLLMFIN/action/replication_record"}},"created_at":"2026-05-17T23:53:57.863604+00:00","updated_at":"2026-05-17T23:53:57.863604+00:00"}