{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:GYH7CDH2MYVQDOBCTPNBUIJNCI","short_pith_number":"pith:GYH7CDH2","canonical_record":{"source":{"id":"1610.09001","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-10-27T20:23:39Z","cross_cats_sorted":["cs.LG","cs.SD"],"title_canon_sha256":"6e9e9fc61154ff2c229adf94f2c792e9bc10f1e6e910901bd2c2b9622615cc8c","abstract_canon_sha256":"bb8002760f14eb7ab5d01df07e279cbdc89e61f194f5aea18d086dfa97a6661c"},"schema_version":"1.0"},"canonical_sha256":"360ff10cfa662b01b8229bda1a212d122987e9076931cf2c14eb8c4252b52c93","source":{"kind":"arxiv","id":"1610.09001","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1610.09001","created_at":"2026-05-18T01:00:59Z"},{"alias_kind":"arxiv_version","alias_value":"1610.09001v1","created_at":"2026-05-18T01:00:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1610.09001","created_at":"2026-05-18T01:00:59Z"},{"alias_kind":"pith_short_12","alias_value":"GYH7CDH2MYVQ","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"GYH7CDH2MYVQDOBC","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"GYH7CDH2","created_at":"2026-05-18T12:30:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:GYH7CDH2MYVQDOBCTPNBUIJNCI","target":"record","payload":{"canonical_record":{"source":{"id":"1610.09001","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-10-27T20:23:39Z","cross_cats_sorted":["cs.LG","cs.SD"],"title_canon_sha256":"6e9e9fc61154ff2c229adf94f2c792e9bc10f1e6e910901bd2c2b9622615cc8c","abstract_canon_sha256":"bb8002760f14eb7ab5d01df07e279cbdc89e61f194f5aea18d086dfa97a6661c"},"schema_version":"1.0"},"canonical_sha256":"360ff10cfa662b01b8229bda1a212d122987e9076931cf2c14eb8c4252b52c93","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:00:59.467359Z","signature_b64":"UW1fMMod3VOn4bHYyTkiyWY6m/mDuKz1GK5Jt+/e8/vDzlckJnsgoSPjqLo1tT1H9qLgelHaE2aKbZ/a14w6CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"360ff10cfa662b01b8229bda1a212d122987e9076931cf2c14eb8c4252b52c93","last_reissued_at":"2026-05-18T01:00:59.466815Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:00:59.466815Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1610.09001","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:00:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NUsTfH6rd/X7vl2kJ1wC9Q40ynyM7ZX7cUMR7BP3HfhqSRtQ3z6VDxeFLxuF5FiWx5oY1uMagzZEq9kF5ijkBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:38:34.086125Z"},"content_sha256":"ff6724abc48066aee197564152ede6e524d132d6f39b058d3dbd93fc8ffa4189","schema_version":"1.0","event_id":"sha256:ff6724abc48066aee197564152ede6e524d132d6f39b058d3dbd93fc8ffa4189"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:GYH7CDH2MYVQDOBCTPNBUIJNCI","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"SoundNet: Learning Sound Representations from Unlabeled Video","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.SD"],"primary_cat":"cs.CV","authors_text":"Antonio Torralba, Carl Vondrick, Yusuf Aytar","submitted_at":"2016-10-27T20:23:39Z","abstract_excerpt":"We learn rich natural sound representations by capitalizing on large amounts of unlabeled sound data collected in the wild. We leverage the natural synchronization between vision and sound to learn an acoustic representation using two-million unlabeled videos. Unlabeled video has the advantage that it can be economically acquired at massive scales, yet contains useful signals about natural sound. We propose a student-teacher training procedure which transfers discriminative visual knowledge from well established visual recognition models into the sound modality using unlabeled video as a bridg"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1610.09001","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:00:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"485VVyvEdniT9NV0Wd5wPoVWwKAAMhEEBKIev22FoCA/2cbEDsRnp3sNHHpquCIzrMCyvubht4KUNWUi2tRcBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:38:34.086502Z"},"content_sha256":"aa9be432db93dc5bf727f0fdc1ce2ea125894f23238d738db88a4a28d64910d4","schema_version":"1.0","event_id":"sha256:aa9be432db93dc5bf727f0fdc1ce2ea125894f23238d738db88a4a28d64910d4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI/bundle.json","state_url":"https://pith.science/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T08:38:34Z","links":{"resolver":"https://pith.science/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI","bundle":"https://pith.science/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI/bundle.json","state":"https://pith.science/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GYH7CDH2MYVQDOBCTPNBUIJNCI/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:GYH7CDH2MYVQDOBCTPNBUIJNCI","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bb8002760f14eb7ab5d01df07e279cbdc89e61f194f5aea18d086dfa97a6661c","cross_cats_sorted":["cs.LG","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-10-27T20:23:39Z","title_canon_sha256":"6e9e9fc61154ff2c229adf94f2c792e9bc10f1e6e910901bd2c2b9622615cc8c"},"schema_version":"1.0","source":{"id":"1610.09001","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1610.09001","created_at":"2026-05-18T01:00:59Z"},{"alias_kind":"arxiv_version","alias_value":"1610.09001v1","created_at":"2026-05-18T01:00:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1610.09001","created_at":"2026-05-18T01:00:59Z"},{"alias_kind":"pith_short_12","alias_value":"GYH7CDH2MYVQ","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_16","alias_value":"GYH7CDH2MYVQDOBC","created_at":"2026-05-18T12:30:19Z"},{"alias_kind":"pith_short_8","alias_value":"GYH7CDH2","created_at":"2026-05-18T12:30:19Z"}],"graph_snapshots":[{"event_id":"sha256:aa9be432db93dc5bf727f0fdc1ce2ea125894f23238d738db88a4a28d64910d4","target":"graph","created_at":"2026-05-18T01:00:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We learn rich natural sound representations by capitalizing on large amounts of unlabeled sound data collected in the wild. We leverage the natural synchronization between vision and sound to learn an acoustic representation using two-million unlabeled videos. Unlabeled video has the advantage that it can be economically acquired at massive scales, yet contains useful signals about natural sound. We propose a student-teacher training procedure which transfers discriminative visual knowledge from well established visual recognition models into the sound modality using unlabeled video as a bridg","authors_text":"Antonio Torralba, Carl Vondrick, Yusuf Aytar","cross_cats":["cs.LG","cs.SD"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-10-27T20:23:39Z","title":"SoundNet: Learning Sound Representations from Unlabeled Video"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1610.09001","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ff6724abc48066aee197564152ede6e524d132d6f39b058d3dbd93fc8ffa4189","target":"record","created_at":"2026-05-18T01:00:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bb8002760f14eb7ab5d01df07e279cbdc89e61f194f5aea18d086dfa97a6661c","cross_cats_sorted":["cs.LG","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-10-27T20:23:39Z","title_canon_sha256":"6e9e9fc61154ff2c229adf94f2c792e9bc10f1e6e910901bd2c2b9622615cc8c"},"schema_version":"1.0","source":{"id":"1610.09001","kind":"arxiv","version":1}},"canonical_sha256":"360ff10cfa662b01b8229bda1a212d122987e9076931cf2c14eb8c4252b52c93","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"360ff10cfa662b01b8229bda1a212d122987e9076931cf2c14eb8c4252b52c93","first_computed_at":"2026-05-18T01:00:59.466815Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:00:59.466815Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"UW1fMMod3VOn4bHYyTkiyWY6m/mDuKz1GK5Jt+/e8/vDzlckJnsgoSPjqLo1tT1H9qLgelHaE2aKbZ/a14w6CA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:00:59.467359Z","signed_message":"canonical_sha256_bytes"},"source_id":"1610.09001","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ff6724abc48066aee197564152ede6e524d132d6f39b058d3dbd93fc8ffa4189","sha256:aa9be432db93dc5bf727f0fdc1ce2ea125894f23238d738db88a4a28d64910d4"],"state_sha256":"7fb5bf0d654c2f7672277953387e64b4eb8d51a3633e7446160a6c0c812aad1c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4mjKHiu4AxM/2UD2nl+X+wAk7FhjEnBCSNFmeWshzb3StCZi2NzwEm7XGprB3fsS0UHn4fNbLE8vi7UzAWYwCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T08:38:34.088775Z","bundle_sha256":"c867333612e2916b417838827a6b751b4e099f4d5d38e0190f89c207a2cef061"}}