{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:AUD4GHR2WLTXDECEDMBBWMFE5L","short_pith_number":"pith:AUD4GHR2","schema_version":"1.0","canonical_sha256":"0507c31e3ab2e77190441b021b30a4ead8b66f0097cfdb0b901ced4fadda4fbc","source":{"kind":"arxiv","id":"1805.05826","version":1},"attestation_state":"computed","paper":{"title":"A Purely End-to-end System for Multi-speaker Speech Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","eess.AS","stat.ML"],"primary_cat":"cs.SD","authors_text":"Hiroshi Seki, John R. Hershey, Jonathan Le Roux, Shinji Watanabe, Takaaki Hori","submitted_at":"2018-05-15T14:45:33Z","abstract_excerpt":"Recently, there has been growing interest in multi-speaker speech recognition, where the utterances of multiple speakers are recognized from their mixture. Promising techniques have been proposed for this task, but earlier works have required additional training data such as isolated source signals or senone alignments for effective learning. In this paper, we propose a new sequence-to-sequence framework to directly decode multiple label sequences from a single speech sequence by unifying source separation and speech recognition functions in an end-to-end manner. We further propose a new objec"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1805.05826","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2018-05-15T14:45:33Z","cross_cats_sorted":["cs.CL","eess.AS","stat.ML"],"title_canon_sha256":"321cec239315a43b8847b8c4f3959c04cc6915a4340d5a5ca0c49ccfdfc2cc7d","abstract_canon_sha256":"103df90f3c5aafff803f9668185a3f5de7bd53909dca425170edb4e2e22161ae"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:15:53.555254Z","signature_b64":"Hgzf/Srwit0fa0QERXrwgZTuszXKf+MWUL1MZcCuI0A8WebhK0sESxhMIOQRrP8EVx+L9nEAfzIwQOyUEpB7CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0507c31e3ab2e77190441b021b30a4ead8b66f0097cfdb0b901ced4fadda4fbc","last_reissued_at":"2026-05-18T00:15:53.554542Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:15:53.554542Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Purely End-to-end System for Multi-speaker Speech Recognition","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","eess.AS","stat.ML"],"primary_cat":"cs.SD","authors_text":"Hiroshi Seki, John R. Hershey, Jonathan Le Roux, Shinji Watanabe, Takaaki Hori","submitted_at":"2018-05-15T14:45:33Z","abstract_excerpt":"Recently, there has been growing interest in multi-speaker speech recognition, where the utterances of multiple speakers are recognized from their mixture. Promising techniques have been proposed for this task, but earlier works have required additional training data such as isolated source signals or senone alignments for effective learning. In this paper, we propose a new sequence-to-sequence framework to directly decode multiple label sequences from a single speech sequence by unifying source separation and speech recognition functions in an end-to-end manner. We further propose a new objec"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.05826","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1805.05826","created_at":"2026-05-18T00:15:53.554650+00:00"},{"alias_kind":"arxiv_version","alias_value":"1805.05826v1","created_at":"2026-05-18T00:15:53.554650+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.05826","created_at":"2026-05-18T00:15:53.554650+00:00"},{"alias_kind":"pith_short_12","alias_value":"AUD4GHR2WLTX","created_at":"2026-05-18T12:32:13.499390+00:00"},{"alias_kind":"pith_short_16","alias_value":"AUD4GHR2WLTXDECE","created_at":"2026-05-18T12:32:13.499390+00:00"},{"alias_kind":"pith_short_8","alias_value":"AUD4GHR2","created_at":"2026-05-18T12:32:13.499390+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L","json":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L.json","graph_json":"https://pith.science/api/pith-number/AUD4GHR2WLTXDECEDMBBWMFE5L/graph.json","events_json":"https://pith.science/api/pith-number/AUD4GHR2WLTXDECEDMBBWMFE5L/events.json","paper":"https://pith.science/paper/AUD4GHR2"},"agent_actions":{"view_html":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L","download_json":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L.json","view_paper":"https://pith.science/paper/AUD4GHR2","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1805.05826&json=true","fetch_graph":"https://pith.science/api/pith-number/AUD4GHR2WLTXDECEDMBBWMFE5L/graph.json","fetch_events":"https://pith.science/api/pith-number/AUD4GHR2WLTXDECEDMBBWMFE5L/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L/action/timestamp_anchor","attest_storage":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L/action/storage_attestation","attest_author":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L/action/author_attestation","sign_citation":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L/action/citation_signature","submit_replication":"https://pith.science/pith/AUD4GHR2WLTXDECEDMBBWMFE5L/action/replication_record"}},"created_at":"2026-05-18T00:15:53.554650+00:00","updated_at":"2026-05-18T00:15:53.554650+00:00"}