{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:IILJQ37FZPDI3UQ6ISTOR6253Q","short_pith_number":"pith:IILJQ37F","schema_version":"1.0","canonical_sha256":"4216986fe5cbc68dd21e44a6e8fb5ddc0b533cb1cff624d5ad199d4d87998092","source":{"kind":"arxiv","id":"1706.07156","version":1},"attestation_state":"computed","paper":{"title":"Comparison of Time-Frequency Representations for Environmental Sound Classification using Convolutional Neural Networks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"M. Huzaifah","submitted_at":"2017-06-22T03:23:09Z","abstract_excerpt":"Recent successful applications of convolutional neural networks (CNNs) to audio classification and speech recognition have motivated the search for better input representations for more efficient training. Visual displays of an audio signal, through various time-frequency representations such as spectrograms offer a rich representation of the temporal and spectral structure of the original signal. In this letter, we compare various popular signal processing methods to obtain this representation, such as short-time Fourier transform (STFT) with linear and Mel scales, constant-Q transform (CQT) "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1706.07156","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2017-06-22T03:23:09Z","cross_cats_sorted":[],"title_canon_sha256":"0f597447fbd6ed874e6c296fc7150b518e801ef479ac2d0927e7990aa720c423","abstract_canon_sha256":"0c3dc65708dd9556c6a079ec52d72a7674b765e53cbba961a3d8133caa640ecd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:41:52.493389Z","signature_b64":"ggz/6cYSsEUJPAeiY8TDjkXB/uIafbFJm2mMHOEVDcvhwSWCblH4cD+pSWvp+B5wYadGaE06PqCitY9loUDjAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4216986fe5cbc68dd21e44a6e8fb5ddc0b533cb1cff624d5ad199d4d87998092","last_reissued_at":"2026-05-18T00:41:52.492867Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:41:52.492867Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Comparison of Time-Frequency Representations for Environmental Sound Classification using Convolutional Neural Networks","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"M. Huzaifah","submitted_at":"2017-06-22T03:23:09Z","abstract_excerpt":"Recent successful applications of convolutional neural networks (CNNs) to audio classification and speech recognition have motivated the search for better input representations for more efficient training. Visual displays of an audio signal, through various time-frequency representations such as spectrograms offer a rich representation of the temporal and spectral structure of the original signal. In this letter, we compare various popular signal processing methods to obtain this representation, such as short-time Fourier transform (STFT) with linear and Mel scales, constant-Q transform (CQT) "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1706.07156","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1706.07156","created_at":"2026-05-18T00:41:52.492944+00:00"},{"alias_kind":"arxiv_version","alias_value":"1706.07156v1","created_at":"2026-05-18T00:41:52.492944+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1706.07156","created_at":"2026-05-18T00:41:52.492944+00:00"},{"alias_kind":"pith_short_12","alias_value":"IILJQ37FZPDI","created_at":"2026-05-18T12:31:21.493067+00:00"},{"alias_kind":"pith_short_16","alias_value":"IILJQ37FZPDI3UQ6","created_at":"2026-05-18T12:31:21.493067+00:00"},{"alias_kind":"pith_short_8","alias_value":"IILJQ37F","created_at":"2026-05-18T12:31:21.493067+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2402.07619","citing_title":"Developing a Multi-variate Prediction Model For COVID-19 From Crowd-sourced Respiratory Voice Data","ref_index":34,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q","json":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q.json","graph_json":"https://pith.science/api/pith-number/IILJQ37FZPDI3UQ6ISTOR6253Q/graph.json","events_json":"https://pith.science/api/pith-number/IILJQ37FZPDI3UQ6ISTOR6253Q/events.json","paper":"https://pith.science/paper/IILJQ37F"},"agent_actions":{"view_html":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q","download_json":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q.json","view_paper":"https://pith.science/paper/IILJQ37F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1706.07156&json=true","fetch_graph":"https://pith.science/api/pith-number/IILJQ37FZPDI3UQ6ISTOR6253Q/graph.json","fetch_events":"https://pith.science/api/pith-number/IILJQ37FZPDI3UQ6ISTOR6253Q/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q/action/timestamp_anchor","attest_storage":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q/action/storage_attestation","attest_author":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q/action/author_attestation","sign_citation":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q/action/citation_signature","submit_replication":"https://pith.science/pith/IILJQ37FZPDI3UQ6ISTOR6253Q/action/replication_record"}},"created_at":"2026-05-18T00:41:52.492944+00:00","updated_at":"2026-05-18T00:41:52.492944+00:00"}