{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:IKYBZ75ATQ2GKEGMZHOAY5AISS","short_pith_number":"pith:IKYBZ75A","canonical_record":{"source":{"id":"1507.04831","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-07-17T04:13:12Z","cross_cats_sorted":["cs.LG","cs.MM","cs.SD"],"title_canon_sha256":"83d0d47ac6cc447b13bb8f6bde3f23637fec04155c70eceb7631ba2894f89daf","abstract_canon_sha256":"0bb5beb192f6e9f4e74ff885150f39847c632166034fec7695ee3090f57e5278"},"schema_version":"1.0"},"canonical_sha256":"42b01cffa09c346510ccc9dc0c740894b431a48d47ce0d1d1a9eba9f97cffc73","source":{"kind":"arxiv","id":"1507.04831","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1507.04831","created_at":"2026-05-18T01:36:42Z"},{"alias_kind":"arxiv_version","alias_value":"1507.04831v1","created_at":"2026-05-18T01:36:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.04831","created_at":"2026-05-18T01:36:42Z"},{"alias_kind":"pith_short_12","alias_value":"IKYBZ75ATQ2G","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_16","alias_value":"IKYBZ75ATQ2GKEGM","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_8","alias_value":"IKYBZ75A","created_at":"2026-05-18T12:29:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:IKYBZ75ATQ2GKEGMZHOAY5AISS","target":"record","payload":{"canonical_record":{"source":{"id":"1507.04831","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-07-17T04:13:12Z","cross_cats_sorted":["cs.LG","cs.MM","cs.SD"],"title_canon_sha256":"83d0d47ac6cc447b13bb8f6bde3f23637fec04155c70eceb7631ba2894f89daf","abstract_canon_sha256":"0bb5beb192f6e9f4e74ff885150f39847c632166034fec7695ee3090f57e5278"},"schema_version":"1.0"},"canonical_sha256":"42b01cffa09c346510ccc9dc0c740894b431a48d47ce0d1d1a9eba9f97cffc73","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:36:42.673788Z","signature_b64":"+r1YMX0vHlR7vJ9Kql52OlElen22pn+dVEMA38PXcI40WLTCSOvieCYAppmKHdk1n2nL/Rq5XHGZFNMGdFw+Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"42b01cffa09c346510ccc9dc0c740894b431a48d47ce0d1d1a9eba9f97cffc73","last_reissued_at":"2026-05-18T01:36:42.672664Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:36:42.672664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1507.04831","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:36:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W8unq2nC0d+3L41zhimGZ8ZGKpejVbSwIGz0cWtjaTl70lUPDmZdPyS0LGs1y7KyDq/VQ8ZBZAzj6NalhCkJDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:16:01.430942Z"},"content_sha256":"6d5a1a7f12826bcf5ea63fdb6b77ded90baae56031aa1ced68967d080407ebfd","schema_version":"1.0","event_id":"sha256:6d5a1a7f12826bcf5ea63fdb6b77ded90baae56031aa1ced68967d080407ebfd"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:IKYBZ75ATQ2GKEGMZHOAY5AISS","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Deep Multimodal Speaker Naming","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","cs.MM","cs.SD"],"primary_cat":"cs.CV","authors_text":"Chang Yuan, Jimmy Ren, Jingwen Dai, Li Xu, Wenping Wang, Yongtao Hu","submitted_at":"2015-07-17T04:13:12Z","abstract_excerpt":"Automatic speaker naming is the problem of localizing as well as identifying each speaking character in a TV/movie/live show video. This is a challenging problem mainly attributes to its multimodal nature, namely face cue alone is insufficient to achieve good performance. Previous multimodal approaches to this problem usually process the data of different modalities individually and merge them using handcrafted heuristics. Such approaches work well for simple scenes, but fail to achieve high performance for speakers with large appearance variations. In this paper, we propose a novel convolutio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.04831","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:36:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lyvBq4AMlro6XS17bM/Mkf75K11ld0K61QV/wmb8v2vFgaU4bqNmgdXxPffOXVERLb3R5DA+t33qtdZB1eSWAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T20:16:01.431626Z"},"content_sha256":"4ce4c1819c9db100c4c90434ed51bdad6331dd4de4e081737e09185f1d8bd907","schema_version":"1.0","event_id":"sha256:4ce4c1819c9db100c4c90434ed51bdad6331dd4de4e081737e09185f1d8bd907"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS/bundle.json","state_url":"https://pith.science/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T20:16:01Z","links":{"resolver":"https://pith.science/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS","bundle":"https://pith.science/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS/bundle.json","state":"https://pith.science/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IKYBZ75ATQ2GKEGMZHOAY5AISS/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:IKYBZ75ATQ2GKEGMZHOAY5AISS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0bb5beb192f6e9f4e74ff885150f39847c632166034fec7695ee3090f57e5278","cross_cats_sorted":["cs.LG","cs.MM","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-07-17T04:13:12Z","title_canon_sha256":"83d0d47ac6cc447b13bb8f6bde3f23637fec04155c70eceb7631ba2894f89daf"},"schema_version":"1.0","source":{"id":"1507.04831","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1507.04831","created_at":"2026-05-18T01:36:42Z"},{"alias_kind":"arxiv_version","alias_value":"1507.04831v1","created_at":"2026-05-18T01:36:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.04831","created_at":"2026-05-18T01:36:42Z"},{"alias_kind":"pith_short_12","alias_value":"IKYBZ75ATQ2G","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_16","alias_value":"IKYBZ75ATQ2GKEGM","created_at":"2026-05-18T12:29:25Z"},{"alias_kind":"pith_short_8","alias_value":"IKYBZ75A","created_at":"2026-05-18T12:29:25Z"}],"graph_snapshots":[{"event_id":"sha256:4ce4c1819c9db100c4c90434ed51bdad6331dd4de4e081737e09185f1d8bd907","target":"graph","created_at":"2026-05-18T01:36:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Automatic speaker naming is the problem of localizing as well as identifying each speaking character in a TV/movie/live show video. This is a challenging problem mainly attributes to its multimodal nature, namely face cue alone is insufficient to achieve good performance. Previous multimodal approaches to this problem usually process the data of different modalities individually and merge them using handcrafted heuristics. Such approaches work well for simple scenes, but fail to achieve high performance for speakers with large appearance variations. In this paper, we propose a novel convolutio","authors_text":"Chang Yuan, Jimmy Ren, Jingwen Dai, Li Xu, Wenping Wang, Yongtao Hu","cross_cats":["cs.LG","cs.MM","cs.SD"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-07-17T04:13:12Z","title":"Deep Multimodal Speaker Naming"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.04831","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6d5a1a7f12826bcf5ea63fdb6b77ded90baae56031aa1ced68967d080407ebfd","target":"record","created_at":"2026-05-18T01:36:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0bb5beb192f6e9f4e74ff885150f39847c632166034fec7695ee3090f57e5278","cross_cats_sorted":["cs.LG","cs.MM","cs.SD"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-07-17T04:13:12Z","title_canon_sha256":"83d0d47ac6cc447b13bb8f6bde3f23637fec04155c70eceb7631ba2894f89daf"},"schema_version":"1.0","source":{"id":"1507.04831","kind":"arxiv","version":1}},"canonical_sha256":"42b01cffa09c346510ccc9dc0c740894b431a48d47ce0d1d1a9eba9f97cffc73","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"42b01cffa09c346510ccc9dc0c740894b431a48d47ce0d1d1a9eba9f97cffc73","first_computed_at":"2026-05-18T01:36:42.672664Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:36:42.672664Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+r1YMX0vHlR7vJ9Kql52OlElen22pn+dVEMA38PXcI40WLTCSOvieCYAppmKHdk1n2nL/Rq5XHGZFNMGdFw+Bg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:36:42.673788Z","signed_message":"canonical_sha256_bytes"},"source_id":"1507.04831","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6d5a1a7f12826bcf5ea63fdb6b77ded90baae56031aa1ced68967d080407ebfd","sha256:4ce4c1819c9db100c4c90434ed51bdad6331dd4de4e081737e09185f1d8bd907"],"state_sha256":"22a96a34b7ea9e83c6bbca6083c99f5f507870a98e3317631a11db20ee9a12f8"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yfm1BqJ1e4p0taa1GCcpchPpT8CX0iQOhE6uyA8WTm/a2EuyCB9jYvFkN/HGd9kKWZlnCeOp1k4bviY54mm9Dg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T20:16:01.435217Z","bundle_sha256":"36a7407d70d4077602bc56a2238a3776e4f5cd4c92db05c3de59e04cbf702cee"}}