{"paper":{"title":"Video Representation Learning and Latent Concept Mining for Large-scale Multi-label Video Classification","license":"http://creativecommons.org/publicdomain/zero/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Alexander G. Hauptmann, Lu Jiang, Po-Yao Huang, Ye Yuan, Zhenzhong Lan","submitted_at":"2017-07-05T14:15:06Z","abstract_excerpt":"We report on CMU Informedia Lab's system used in Google's YouTube 8 Million Video Understanding Challenge. In this multi-label video classification task, our pipeline achieved 84.675% and 84.662% GAP on our evaluation split and the official test set. We attribute the good performance to three components: 1) Refined video representation learning with residual links and hypercolumns 2) Latent concept mining which captures interactions among concepts. 3) Learning with temporal segments and weighted multi-model ensemble. We conduct experiments to validate and analyze the contribution of our models"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.01408","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"}