{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:T6QEZPKAQ6HMK5RM74EADQE2OJ","short_pith_number":"pith:T6QEZPKA","schema_version":"1.0","canonical_sha256":"9fa04cbd40878ec5762cff0801c09a72564bdf5d1b9f79d0d91277856b59164f","source":{"kind":"arxiv","id":"2603.12837","version":2},"attestation_state":"computed","paper":{"title":"Mask2Flow-TSE: Two-Stage Target Speaker Extraction with Masking and Flow Matching","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SD","authors_text":"Hansol Park, Heeseung Kim, Hoseong Ahn, Hyunjin Choi, Junwon Moon, Kyuhong Shim, Seungbeom Kim","submitted_at":"2026-03-13T09:40:13Z","abstract_excerpt":"Target speaker extraction (TSE) extracts the target speaker's voice from overlapping speech given a reference utterance. Existing masking-based approaches are lightweight and effective but suffer from an inability to synthesize missing content, leading to degraded perceptual quality. On the other hand, recent generative TSE models typically synthesize high-quality speech with diffusion, but require numerous iterative steps resulting in high computational costs and latency. We propose Mask2Flow-TSE, a two-stage framework combining the strengths of both paradigms. We introduce the deletion/inser"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.12837","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.SD","submitted_at":"2026-03-13T09:40:13Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"c828d970f0ecc8a84fe383c1fca8b8ae0b8754c4f1d41573663f41977a65bab6","abstract_canon_sha256":"962e8e4ee76df7254be3f9b3bd8df37e17780ae65a309059900f630fe5ab4b09"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:13:56.199738Z","signature_b64":"qW12grO3nwxtxELpWHkBNiMHr7MkhmIZfg54HEcNl722HSWeiBmdaMjTFoSMnvqFzjv2edl5m9cMi8H28rSOBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9fa04cbd40878ec5762cff0801c09a72564bdf5d1b9f79d0d91277856b59164f","last_reissued_at":"2026-06-23T03:13:56.199243Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:13:56.199243Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mask2Flow-TSE: Two-Stage Target Speaker Extraction with Masking and Flow Matching","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.SD","authors_text":"Hansol Park, Heeseung Kim, Hoseong Ahn, Hyunjin Choi, Junwon Moon, Kyuhong Shim, Seungbeom Kim","submitted_at":"2026-03-13T09:40:13Z","abstract_excerpt":"Target speaker extraction (TSE) extracts the target speaker's voice from overlapping speech given a reference utterance. Existing masking-based approaches are lightweight and effective but suffer from an inability to synthesize missing content, leading to degraded perceptual quality. On the other hand, recent generative TSE models typically synthesize high-quality speech with diffusion, but require numerous iterative steps resulting in high computational costs and latency. We propose Mask2Flow-TSE, a two-stage framework combining the strengths of both paradigms. We introduce the deletion/inser"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.12837","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.12837/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.12837","created_at":"2026-06-23T03:13:56.199302+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.12837v2","created_at":"2026-06-23T03:13:56.199302+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.12837","created_at":"2026-06-23T03:13:56.199302+00:00"},{"alias_kind":"pith_short_12","alias_value":"T6QEZPKAQ6HM","created_at":"2026-06-23T03:13:56.199302+00:00"},{"alias_kind":"pith_short_16","alias_value":"T6QEZPKAQ6HMK5RM","created_at":"2026-06-23T03:13:56.199302+00:00"},{"alias_kind":"pith_short_8","alias_value":"T6QEZPKA","created_at":"2026-06-23T03:13:56.199302+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ","json":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ.json","graph_json":"https://pith.science/api/pith-number/T6QEZPKAQ6HMK5RM74EADQE2OJ/graph.json","events_json":"https://pith.science/api/pith-number/T6QEZPKAQ6HMK5RM74EADQE2OJ/events.json","paper":"https://pith.science/paper/T6QEZPKA"},"agent_actions":{"view_html":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ","download_json":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ.json","view_paper":"https://pith.science/paper/T6QEZPKA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.12837&json=true","fetch_graph":"https://pith.science/api/pith-number/T6QEZPKAQ6HMK5RM74EADQE2OJ/graph.json","fetch_events":"https://pith.science/api/pith-number/T6QEZPKAQ6HMK5RM74EADQE2OJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ/action/storage_attestation","attest_author":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ/action/author_attestation","sign_citation":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ/action/citation_signature","submit_replication":"https://pith.science/pith/T6QEZPKAQ6HMK5RM74EADQE2OJ/action/replication_record"}},"created_at":"2026-06-23T03:13:56.199302+00:00","updated_at":"2026-06-23T03:13:56.199302+00:00"}