{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:H4NC22H5PNRUU2TW77HCGEP6C5","short_pith_number":"pith:H4NC22H5","schema_version":"1.0","canonical_sha256":"3f1a2d68fd7b634a6a76ffce2311fe1752befa222c28aaca0cf73d9e1baab502","source":{"kind":"arxiv","id":"1904.03543","version":2},"attestation_state":"computed","paper":{"title":"Spatio-Temporal Attention Pooling for Audio Scene Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","eess.AS","stat.ML"],"primary_cat":"cs.SD","authors_text":"Alfred Mertins, Huy Phan, Ian McLoughlin, Lam Pham, Maarten De Vos, Oliver Y. Ch\\'en, Philipp Koch","submitted_at":"2019-04-06T22:49:20Z","abstract_excerpt":"Acoustic scenes are rich and redundant in their content. In this work, we present a spatio-temporal attention pooling layer coupled with a convolutional recurrent neural network to learn from patterns that are discriminative while suppressing those that are irrelevant for acoustic scene classification. The convolutional layers in this network learn invariant features from time-frequency input. The bidirectional recurrent layers are then able to encode the temporal dynamics of the resulting convolutional features. Afterwards, a two-dimensional attention mask is formed via the outer product of t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1904.03543","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-04-06T22:49:20Z","cross_cats_sorted":["cs.LG","eess.AS","stat.ML"],"title_canon_sha256":"6790f29349a944b973d999c5d956280849a86cb78dfd93711b4f0bc601c5b703","abstract_canon_sha256":"27577d66d2cb073f00f6673db52460d913a0fb413310b480bbe7283e42b16bd2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:02.320817Z","signature_b64":"XlzGmIOan+yKS0pIrIRvCVA0DSE2YOblqjDjnjp7cwTaXcaysNByiE9bolfvgk4pNj987d2h+0uIRr7kf4szAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3f1a2d68fd7b634a6a76ffce2311fe1752befa222c28aaca0cf73d9e1baab502","last_reissued_at":"2026-05-17T23:42:02.320202Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:02.320202Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Spatio-Temporal Attention Pooling for Audio Scene Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","eess.AS","stat.ML"],"primary_cat":"cs.SD","authors_text":"Alfred Mertins, Huy Phan, Ian McLoughlin, Lam Pham, Maarten De Vos, Oliver Y. Ch\\'en, Philipp Koch","submitted_at":"2019-04-06T22:49:20Z","abstract_excerpt":"Acoustic scenes are rich and redundant in their content. In this work, we present a spatio-temporal attention pooling layer coupled with a convolutional recurrent neural network to learn from patterns that are discriminative while suppressing those that are irrelevant for acoustic scene classification. The convolutional layers in this network learn invariant features from time-frequency input. The bidirectional recurrent layers are then able to encode the temporal dynamics of the resulting convolutional features. Afterwards, a two-dimensional attention mask is formed via the outer product of t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1904.03543","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1904.03543","created_at":"2026-05-17T23:42:02.320295+00:00"},{"alias_kind":"arxiv_version","alias_value":"1904.03543v2","created_at":"2026-05-17T23:42:02.320295+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1904.03543","created_at":"2026-05-17T23:42:02.320295+00:00"},{"alias_kind":"pith_short_12","alias_value":"H4NC22H5PNRU","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_16","alias_value":"H4NC22H5PNRUU2TW","created_at":"2026-05-18T12:33:18.533446+00:00"},{"alias_kind":"pith_short_8","alias_value":"H4NC22H5","created_at":"2026-05-18T12:33:18.533446+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5","json":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5.json","graph_json":"https://pith.science/api/pith-number/H4NC22H5PNRUU2TW77HCGEP6C5/graph.json","events_json":"https://pith.science/api/pith-number/H4NC22H5PNRUU2TW77HCGEP6C5/events.json","paper":"https://pith.science/paper/H4NC22H5"},"agent_actions":{"view_html":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5","download_json":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5.json","view_paper":"https://pith.science/paper/H4NC22H5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1904.03543&json=true","fetch_graph":"https://pith.science/api/pith-number/H4NC22H5PNRUU2TW77HCGEP6C5/graph.json","fetch_events":"https://pith.science/api/pith-number/H4NC22H5PNRUU2TW77HCGEP6C5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5/action/storage_attestation","attest_author":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5/action/author_attestation","sign_citation":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5/action/citation_signature","submit_replication":"https://pith.science/pith/H4NC22H5PNRUU2TW77HCGEP6C5/action/replication_record"}},"created_at":"2026-05-17T23:42:02.320295+00:00","updated_at":"2026-05-17T23:42:02.320295+00:00"}