{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2016:UDTYG4NJBVFS4JLNNQO2UQDHPK","short_pith_number":"pith:UDTYG4NJ","canonical_record":{"source":{"id":"1608.02367","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-08T09:54:15Z","cross_cats_sorted":[],"title_canon_sha256":"887dd5e5900f9ac72d898ecdaea6ed7cbf4b076804c56c9631ec8c97f04eb237","abstract_canon_sha256":"7c55d1664f92f3a2413ca01113a6a9f848b4e17a57030f4554be7fb1ed98d11a"},"schema_version":"1.0"},"canonical_sha256":"a0e78371a90d4b2e256d6c1daa40677a84ed9ba4e727ba2836a48d5eca1078dc","source":{"kind":"arxiv","id":"1608.02367","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1608.02367","created_at":"2026-05-18T01:09:40Z"},{"alias_kind":"arxiv_version","alias_value":"1608.02367v1","created_at":"2026-05-18T01:09:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1608.02367","created_at":"2026-05-18T01:09:40Z"},{"alias_kind":"pith_short_12","alias_value":"UDTYG4NJBVFS","created_at":"2026-05-18T12:30:46Z"},{"alias_kind":"pith_short_16","alias_value":"UDTYG4NJBVFS4JLN","created_at":"2026-05-18T12:30:46Z"},{"alias_kind":"pith_short_8","alias_value":"UDTYG4NJ","created_at":"2026-05-18T12:30:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2016:UDTYG4NJBVFS4JLNNQO2UQDHPK","target":"record","payload":{"canonical_record":{"source":{"id":"1608.02367","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-08T09:54:15Z","cross_cats_sorted":[],"title_canon_sha256":"887dd5e5900f9ac72d898ecdaea6ed7cbf4b076804c56c9631ec8c97f04eb237","abstract_canon_sha256":"7c55d1664f92f3a2413ca01113a6a9f848b4e17a57030f4554be7fb1ed98d11a"},"schema_version":"1.0"},"canonical_sha256":"a0e78371a90d4b2e256d6c1daa40677a84ed9ba4e727ba2836a48d5eca1078dc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:09:40.389238Z","signature_b64":"laV9sKBRd91iGOaIff5mkZFceJzPKJvNomTl2UdI/bqv5BDHucR3+/2QKnKFZlgTsIUfef0lEouonXHFbfURDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a0e78371a90d4b2e256d6c1daa40677a84ed9ba4e727ba2836a48d5eca1078dc","last_reissued_at":"2026-05-18T01:09:40.388803Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:09:40.388803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1608.02367","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:09:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DgHXwq3t5tan8YhWaC2mA0DSV8h+RvPkmEKqQdiIcZIOY5PRPdQL/YWgQwk5x7EqNdkq4qIaTgzFPpgjPJSdBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T11:24:34.525107Z"},"content_sha256":"2179bdbc449248ab20350295a2d2da0de33b4945943532ea05ff78b260485be9","schema_version":"1.0","event_id":"sha256:2179bdbc449248ab20350295a2d2da0de33b4945943532ea05ff78b260485be9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2016:UDTYG4NJBVFS4JLNNQO2UQDHPK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning Joint Representations of Videos and Sentences with Web Image Search","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Esa Rahtu, Janne Heikkil\\\"a, Mayu Otani, Naokazu Yokoya, Yuta Nakashima","submitted_at":"2016-08-08T09:54:15Z","abstract_excerpt":"Our objective is video retrieval based on natural language queries. In addition, we consider the analogous problem of retrieving sentences or generating descriptions given an input video. Recent work has addressed the problem by embedding visual and textual inputs into a common space where semantic similarities correlate to distances. We also adopt the embedding approach, and make the following contributions: First, we utilize web image search in sentence embedding process to disambiguate fine-grained visual concepts. Second, we propose embedding models for sentence, image, and video inputs wh"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1608.02367","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:09:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"uE+JomGWPbiSxoa194zPNzw7dza08nKFvziKFa8Ubw/dZ9YwVRkoG49gEsQB+mfmFtWoHfls1jBDmOsJOhh1CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T11:24:34.525802Z"},"content_sha256":"f48348cfaedb5508ccb1d3ac531cea91fb50b8c9b4d964a38023eb0d22149cb0","schema_version":"1.0","event_id":"sha256:f48348cfaedb5508ccb1d3ac531cea91fb50b8c9b4d964a38023eb0d22149cb0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK/bundle.json","state_url":"https://pith.science/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T11:24:34Z","links":{"resolver":"https://pith.science/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK","bundle":"https://pith.science/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK/bundle.json","state":"https://pith.science/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UDTYG4NJBVFS4JLNNQO2UQDHPK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2016:UDTYG4NJBVFS4JLNNQO2UQDHPK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7c55d1664f92f3a2413ca01113a6a9f848b4e17a57030f4554be7fb1ed98d11a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-08T09:54:15Z","title_canon_sha256":"887dd5e5900f9ac72d898ecdaea6ed7cbf4b076804c56c9631ec8c97f04eb237"},"schema_version":"1.0","source":{"id":"1608.02367","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1608.02367","created_at":"2026-05-18T01:09:40Z"},{"alias_kind":"arxiv_version","alias_value":"1608.02367v1","created_at":"2026-05-18T01:09:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1608.02367","created_at":"2026-05-18T01:09:40Z"},{"alias_kind":"pith_short_12","alias_value":"UDTYG4NJBVFS","created_at":"2026-05-18T12:30:46Z"},{"alias_kind":"pith_short_16","alias_value":"UDTYG4NJBVFS4JLN","created_at":"2026-05-18T12:30:46Z"},{"alias_kind":"pith_short_8","alias_value":"UDTYG4NJ","created_at":"2026-05-18T12:30:46Z"}],"graph_snapshots":[{"event_id":"sha256:f48348cfaedb5508ccb1d3ac531cea91fb50b8c9b4d964a38023eb0d22149cb0","target":"graph","created_at":"2026-05-18T01:09:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Our objective is video retrieval based on natural language queries. In addition, we consider the analogous problem of retrieving sentences or generating descriptions given an input video. Recent work has addressed the problem by embedding visual and textual inputs into a common space where semantic similarities correlate to distances. We also adopt the embedding approach, and make the following contributions: First, we utilize web image search in sentence embedding process to disambiguate fine-grained visual concepts. Second, we propose embedding models for sentence, image, and video inputs wh","authors_text":"Esa Rahtu, Janne Heikkil\\\"a, Mayu Otani, Naokazu Yokoya, Yuta Nakashima","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-08T09:54:15Z","title":"Learning Joint Representations of Videos and Sentences with Web Image Search"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1608.02367","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2179bdbc449248ab20350295a2d2da0de33b4945943532ea05ff78b260485be9","target":"record","created_at":"2026-05-18T01:09:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7c55d1664f92f3a2413ca01113a6a9f848b4e17a57030f4554be7fb1ed98d11a","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-08-08T09:54:15Z","title_canon_sha256":"887dd5e5900f9ac72d898ecdaea6ed7cbf4b076804c56c9631ec8c97f04eb237"},"schema_version":"1.0","source":{"id":"1608.02367","kind":"arxiv","version":1}},"canonical_sha256":"a0e78371a90d4b2e256d6c1daa40677a84ed9ba4e727ba2836a48d5eca1078dc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a0e78371a90d4b2e256d6c1daa40677a84ed9ba4e727ba2836a48d5eca1078dc","first_computed_at":"2026-05-18T01:09:40.388803Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:09:40.388803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"laV9sKBRd91iGOaIff5mkZFceJzPKJvNomTl2UdI/bqv5BDHucR3+/2QKnKFZlgTsIUfef0lEouonXHFbfURDA==","signature_status":"signed_v1","signed_at":"2026-05-18T01:09:40.389238Z","signed_message":"canonical_sha256_bytes"},"source_id":"1608.02367","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2179bdbc449248ab20350295a2d2da0de33b4945943532ea05ff78b260485be9","sha256:f48348cfaedb5508ccb1d3ac531cea91fb50b8c9b4d964a38023eb0d22149cb0"],"state_sha256":"5b886cf501f3e902dbfc8b50342223b0edc462d7834e561822e0a5c97354fcfc"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"C4MP0NfFF78soMf0ermdoRJW53dUaf0a9RGpxhiWfFNGYVwcL0S7aV3OFlGQUYNaKO2fi//2K9hh8/LX7p2LCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T11:24:34.529743Z","bundle_sha256":"ebc9e2b3bb1a682101b457d12f932cf6c68b6237d933ce47b15e3def4765ab5b"}}