{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:DBG7USJ4IIW7B2HNXIXOIYFSQZ","short_pith_number":"pith:DBG7USJ4","schema_version":"1.0","canonical_sha256":"184dfa493c422df0e8edba2ee460b286718e9e285179514c33dae8196e1a5abd","source":{"kind":"arxiv","id":"1805.03647","version":1},"attestation_state":"computed","paper":{"title":"End-to-End Polyphonic Sound Event Detection Using Convolutional Recurrent Neural Networks with Learned Time-Frequency Representation Input","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","eess.AS","stat.ML"],"primary_cat":"cs.SD","authors_text":"Emre \\c{C}ak{\\i}r, Tuomas Virtanen","submitted_at":"2018-05-09T15:10:57Z","abstract_excerpt":"Sound event detection systems typically consist of two stages: extracting hand-crafted features from the raw audio waveform, and learning a mapping between these features and the target sound events using a classifier. Recently, the focus of sound event detection research has been mostly shifted to the latter stage using standard features such as mel spectrogram as the input for classifiers such as deep neural networks. In this work, we utilize end-to-end approach and propose to combine these two stages in a single deep neural network classifier. The feature extraction over the raw waveform is"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1805.03647","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2018-05-09T15:10:57Z","cross_cats_sorted":["cs.LG","eess.AS","stat.ML"],"title_canon_sha256":"b2ce2c96910fb1b7947896d41514584a200645c19b905bcb352080ac1907c9c4","abstract_canon_sha256":"b49e207a065b268eb0fea1d821c1cf53ce701cf74a9fb41634efe82a8746894d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:16:19.042011Z","signature_b64":"OVFjvUrTricgz+ofgPn3M1bcJd+EFzzIUqL1iF5Q3GnSlIGfuHzCUoTtVhc/wxNsrEdAQC8PHKSqLcEi/c5pAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"184dfa493c422df0e8edba2ee460b286718e9e285179514c33dae8196e1a5abd","last_reissued_at":"2026-05-18T00:16:19.041476Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:16:19.041476Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"End-to-End Polyphonic Sound Event Detection Using Convolutional Recurrent Neural Networks with Learned Time-Frequency Representation Input","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","eess.AS","stat.ML"],"primary_cat":"cs.SD","authors_text":"Emre \\c{C}ak{\\i}r, Tuomas Virtanen","submitted_at":"2018-05-09T15:10:57Z","abstract_excerpt":"Sound event detection systems typically consist of two stages: extracting hand-crafted features from the raw audio waveform, and learning a mapping between these features and the target sound events using a classifier. Recently, the focus of sound event detection research has been mostly shifted to the latter stage using standard features such as mel spectrogram as the input for classifiers such as deep neural networks. In this work, we utilize end-to-end approach and propose to combine these two stages in a single deep neural network classifier. The feature extraction over the raw waveform is"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1805.03647","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1805.03647","created_at":"2026-05-18T00:16:19.041554+00:00"},{"alias_kind":"arxiv_version","alias_value":"1805.03647v1","created_at":"2026-05-18T00:16:19.041554+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1805.03647","created_at":"2026-05-18T00:16:19.041554+00:00"},{"alias_kind":"pith_short_12","alias_value":"DBG7USJ4IIW7","created_at":"2026-05-18T12:32:19.392346+00:00"},{"alias_kind":"pith_short_16","alias_value":"DBG7USJ4IIW7B2HN","created_at":"2026-05-18T12:32:19.392346+00:00"},{"alias_kind":"pith_short_8","alias_value":"DBG7USJ4","created_at":"2026-05-18T12:32:19.392346+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ","json":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ.json","graph_json":"https://pith.science/api/pith-number/DBG7USJ4IIW7B2HNXIXOIYFSQZ/graph.json","events_json":"https://pith.science/api/pith-number/DBG7USJ4IIW7B2HNXIXOIYFSQZ/events.json","paper":"https://pith.science/paper/DBG7USJ4"},"agent_actions":{"view_html":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ","download_json":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ.json","view_paper":"https://pith.science/paper/DBG7USJ4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1805.03647&json=true","fetch_graph":"https://pith.science/api/pith-number/DBG7USJ4IIW7B2HNXIXOIYFSQZ/graph.json","fetch_events":"https://pith.science/api/pith-number/DBG7USJ4IIW7B2HNXIXOIYFSQZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ/action/storage_attestation","attest_author":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ/action/author_attestation","sign_citation":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ/action/citation_signature","submit_replication":"https://pith.science/pith/DBG7USJ4IIW7B2HNXIXOIYFSQZ/action/replication_record"}},"created_at":"2026-05-18T00:16:19.041554+00:00","updated_at":"2026-05-18T00:16:19.041554+00:00"}