{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:XV2J7JQMYRQZ6ANMIMLA32ZO42","short_pith_number":"pith:XV2J7JQM","canonical_record":{"source":{"id":"1609.09430","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2016-09-29T17:04:50Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"2853a10db4c1fcaf74041e8b710ebb3b43b68bbd6bc8b916e25d04567d707403","abstract_canon_sha256":"2214dfd581fb6dd510609c20a83afec5bbcf301ca86d1e473e47c8c938e8993f"},"schema_version":"1.0"},"canonical_sha256":"bd749fa60cc4619f01ac43160deb2ee696e9d18003ec19a6569971fb4a208b63","source":{"kind":"arxiv","id":"1609.09430","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1609.09430","created_at":"2026-05-18T00:53:07Z"},{"alias_kind":"arxiv_version","alias_value":"1609.09430v2","created_at":"2026-05-18T00:53:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1609.09430","created_at":"2026-05-18T00:53:07Z"},{"alias_kind":"pith_short_12","alias_value":"XV2J7JQMYRQZ","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"XV2J7JQMYRQZ6ANM","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"XV2J7JQM","created_at":"2026-05-18T12:30:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:XV2J7JQMYRQZ6ANMIMLA32ZO42","target":"record","payload":{"canonical_record":{"source":{"id":"1609.09430","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2016-09-29T17:04:50Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"2853a10db4c1fcaf74041e8b710ebb3b43b68bbd6bc8b916e25d04567d707403","abstract_canon_sha256":"2214dfd581fb6dd510609c20a83afec5bbcf301ca86d1e473e47c8c938e8993f"},"schema_version":"1.0"},"canonical_sha256":"bd749fa60cc4619f01ac43160deb2ee696e9d18003ec19a6569971fb4a208b63","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:53:07.349026Z","signature_b64":"hPibe8AVY18eZXz8i4ci6+aqPUW1hzuKdwW8VU9oo1nO+GzXPDa+l8cohcJI9IBLG8hZ3V6nSVmtAkqLutORCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bd749fa60cc4619f01ac43160deb2ee696e9d18003ec19a6569971fb4a208b63","last_reissued_at":"2026-05-18T00:53:07.348612Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:53:07.348612Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1609.09430","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:53:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"H6938LejqsnNIoGw8z8B3/s5KXld3w9oqF6TKsaIyh15kj7pi3mNPhx+seO/2huW+fSBONoU+cP2T4jpEeW0DA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T19:57:26.883019Z"},"content_sha256":"828db03fe8a65049dce20f051d1bc9ec74fe693fd3a7e3773b0ffb6ed03cd4ad","schema_version":"1.0","event_id":"sha256:828db03fe8a65049dce20f051d1bc9ec74fe693fd3a7e3773b0ffb6ed03cd4ad"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:XV2J7JQMYRQZ6ANMIMLA32ZO42","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"CNN Architectures for Large-Scale Audio Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.SD","authors_text":"Aren Jansen, Bryan Seybold, Daniel P. W. Ellis, Devin Platt, Jort F. Gemmeke, Kevin Wilson, Malcolm Slaney, Manoj Plakal, R. Channing Moore, Rif A. Saurous, Ron J. Weiss, Shawn Hershey, Sourish Chaudhuri","submitted_at":"2016-09-29T17:04:50Z","abstract_excerpt":"Convolutional Neural Networks (CNNs) have proven very effective in image classification and show promise for audio. We use various CNN architectures to classify the soundtracks of a dataset of 70M training videos (5.24 million hours) with 30,871 video-level labels. We examine fully connected Deep Neural Networks (DNNs), AlexNet [1], VGG [2], Inception [3], and ResNet [4]. We investigate varying the size of both training set and label vocabulary, finding that analogs of the CNNs used in image classification do well on our audio classification task, and larger training and label sets help up to "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1609.09430","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:53:07Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JWmseknMe1m/BcrAD0mVugUvD5T5I/8ALPeBd4KLOrN5O2LDC0/H9LlVCHlgkEUsQtPHep45kt0FaPDXnj/yAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T19:57:26.883692Z"},"content_sha256":"2be65281662e90f1dd99c459cb648cf5bd96722a90fddead594b67eabe849285","schema_version":"1.0","event_id":"sha256:2be65281662e90f1dd99c459cb648cf5bd96722a90fddead594b67eabe849285"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42/bundle.json","state_url":"https://pith.science/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T19:57:26Z","links":{"resolver":"https://pith.science/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42","bundle":"https://pith.science/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42/bundle.json","state":"https://pith.science/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XV2J7JQMYRQZ6ANMIMLA32ZO42/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:XV2J7JQMYRQZ6ANMIMLA32ZO42","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2214dfd581fb6dd510609c20a83afec5bbcf301ca86d1e473e47c8c938e8993f","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2016-09-29T17:04:50Z","title_canon_sha256":"2853a10db4c1fcaf74041e8b710ebb3b43b68bbd6bc8b916e25d04567d707403"},"schema_version":"1.0","source":{"id":"1609.09430","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1609.09430","created_at":"2026-05-18T00:53:07Z"},{"alias_kind":"arxiv_version","alias_value":"1609.09430v2","created_at":"2026-05-18T00:53:07Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1609.09430","created_at":"2026-05-18T00:53:07Z"},{"alias_kind":"pith_short_12","alias_value":"XV2J7JQMYRQZ","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_16","alias_value":"XV2J7JQMYRQZ6ANM","created_at":"2026-05-18T12:30:51Z"},{"alias_kind":"pith_short_8","alias_value":"XV2J7JQM","created_at":"2026-05-18T12:30:51Z"}],"graph_snapshots":[{"event_id":"sha256:2be65281662e90f1dd99c459cb648cf5bd96722a90fddead594b67eabe849285","target":"graph","created_at":"2026-05-18T00:53:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Convolutional Neural Networks (CNNs) have proven very effective in image classification and show promise for audio. We use various CNN architectures to classify the soundtracks of a dataset of 70M training videos (5.24 million hours) with 30,871 video-level labels. We examine fully connected Deep Neural Networks (DNNs), AlexNet [1], VGG [2], Inception [3], and ResNet [4]. We investigate varying the size of both training set and label vocabulary, finding that analogs of the CNNs used in image classification do well on our audio classification task, and larger training and label sets help up to ","authors_text":"Aren Jansen, Bryan Seybold, Daniel P. W. Ellis, Devin Platt, Jort F. Gemmeke, Kevin Wilson, Malcolm Slaney, Manoj Plakal, R. Channing Moore, Rif A. Saurous, Ron J. Weiss, Shawn Hershey, Sourish Chaudhuri","cross_cats":["cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2016-09-29T17:04:50Z","title":"CNN Architectures for Large-Scale Audio Classification"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1609.09430","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:828db03fe8a65049dce20f051d1bc9ec74fe693fd3a7e3773b0ffb6ed03cd4ad","target":"record","created_at":"2026-05-18T00:53:07Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2214dfd581fb6dd510609c20a83afec5bbcf301ca86d1e473e47c8c938e8993f","cross_cats_sorted":["cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2016-09-29T17:04:50Z","title_canon_sha256":"2853a10db4c1fcaf74041e8b710ebb3b43b68bbd6bc8b916e25d04567d707403"},"schema_version":"1.0","source":{"id":"1609.09430","kind":"arxiv","version":2}},"canonical_sha256":"bd749fa60cc4619f01ac43160deb2ee696e9d18003ec19a6569971fb4a208b63","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bd749fa60cc4619f01ac43160deb2ee696e9d18003ec19a6569971fb4a208b63","first_computed_at":"2026-05-18T00:53:07.348612Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:53:07.348612Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"hPibe8AVY18eZXz8i4ci6+aqPUW1hzuKdwW8VU9oo1nO+GzXPDa+l8cohcJI9IBLG8hZ3V6nSVmtAkqLutORCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:53:07.349026Z","signed_message":"canonical_sha256_bytes"},"source_id":"1609.09430","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:828db03fe8a65049dce20f051d1bc9ec74fe693fd3a7e3773b0ffb6ed03cd4ad","sha256:2be65281662e90f1dd99c459cb648cf5bd96722a90fddead594b67eabe849285"],"state_sha256":"4527f7278048e862bab220d88fd05f61c264ef60d2ef6f5094154a92597cd1bb"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8VyeN/MSNIcISQLPMJl6hA5MOutPyK/Z7lvBafSSDJLFhLWLDLZTDjgQDLoCfwHrgg7ZJ65mVbDemedWwxL3Cg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T19:57:26.887410Z","bundle_sha256":"063713014754be6f228dd27edbb0aab38b49658dae2c33b847de753c56b7fcb3"}}