{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:CICUUY4DEIQAXB4MVEOW6FH4WL","short_pith_number":"pith:CICUUY4D","canonical_record":{"source":{"id":"1811.11683","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-11-28T17:05:27Z","cross_cats_sorted":["cs.CL","cs.LG","eess.IV"],"title_canon_sha256":"e138a4a0b76e3600ae1e2299b53451e85dc308dbbf0ba3b05e631d693cee6c51","abstract_canon_sha256":"d2280ec7d09b81bd946b930d01f5280badc9340fd29fd6912df373bb150a21ad"},"schema_version":"1.0"},"canonical_sha256":"12054a638322200b878ca91d6f14fcb2e20d29e93c470d097e39fe24d6331771","source":{"kind":"arxiv","id":"1811.11683","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.11683","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.11683v2","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.11683","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"pith_short_12","alias_value":"CICUUY4DEIQA","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"CICUUY4DEIQAXB4M","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"CICUUY4D","created_at":"2026-05-18T12:32:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:CICUUY4DEIQAXB4MVEOW6FH4WL","target":"record","payload":{"canonical_record":{"source":{"id":"1811.11683","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-11-28T17:05:27Z","cross_cats_sorted":["cs.CL","cs.LG","eess.IV"],"title_canon_sha256":"e138a4a0b76e3600ae1e2299b53451e85dc308dbbf0ba3b05e631d693cee6c51","abstract_canon_sha256":"d2280ec7d09b81bd946b930d01f5280badc9340fd29fd6912df373bb150a21ad"},"schema_version":"1.0"},"canonical_sha256":"12054a638322200b878ca91d6f14fcb2e20d29e93c470d097e39fe24d6331771","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:44:42.811512Z","signature_b64":"7mHgIWYnDYs14AtFYVkYDyfQCvAl6LAIBMtWDEDr+JOkdaa4kV/Z6aE/UT62jNltcDG7fRtWAV+n/3tjSOZ5DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"12054a638322200b878ca91d6f14fcb2e20d29e93c470d097e39fe24d6331771","last_reissued_at":"2026-05-17T23:44:42.810993Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:44:42.810993Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1811.11683","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9nCUeRxAMxC+Zhi7SCOKiRl/BOrkAOuhPpqxXcCPJZlaDyeaBlVicQ4TVwM6C9bhBmo0ZQTHoYcNMH6XtaFCCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:31:39.190723Z"},"content_sha256":"e29db3d73bad90a9bf199e86b65945057040725e050e820e40f23f8e23416aee","schema_version":"1.0","event_id":"sha256:e29db3d73bad90a9bf199e86b65945057040725e050e820e40f23f8e23416aee"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:CICUUY4DEIQAXB4MVEOW6FH4WL","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Multi-level Multimodal Common Semantic Space for Image-Phrase Grounding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.LG","eess.IV"],"primary_cat":"cs.CV","authors_text":"Brian Chen, Carl Vondrick, Hassan Akbari, Shih-Fu Chang, Surabhi Bhargava, Svebor Karaman","submitted_at":"2018-11-28T17:05:27Z","abstract_excerpt":"We address the problem of phrase grounding by lear ing a multi-level common semantic space shared by the textual and visual modalities. We exploit multiple levels of feature maps of a Deep Convolutional Neural Network, as well as contextualized word and sentence embeddings extracted from a character-based language model. Following dedicated non-linear mappings for visual features at each level, word, and sentence embeddings, we obtain multiple instantiations of our common semantic space in which comparisons between any target text and the visual content is performed with cosine similarity. We "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.11683","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:44:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kbjNYm2oqubb7RYc7/eOHv1M3N1Qg9Z1MEG2lZYRdix2PslyZYpHou8OYy2sRsTYNfrRRn83O18dOOvxt9/aAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:31:39.191075Z"},"content_sha256":"0ce78e15f7df7ab55f9d057a25f67f413ce55f0ceb9d9e2639a695ab7a3a1459","schema_version":"1.0","event_id":"sha256:0ce78e15f7df7ab55f9d057a25f67f413ce55f0ceb9d9e2639a695ab7a3a1459"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CICUUY4DEIQAXB4MVEOW6FH4WL/bundle.json","state_url":"https://pith.science/pith/CICUUY4DEIQAXB4MVEOW6FH4WL/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CICUUY4DEIQAXB4MVEOW6FH4WL/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T03:31:39Z","links":{"resolver":"https://pith.science/pith/CICUUY4DEIQAXB4MVEOW6FH4WL","bundle":"https://pith.science/pith/CICUUY4DEIQAXB4MVEOW6FH4WL/bundle.json","state":"https://pith.science/pith/CICUUY4DEIQAXB4MVEOW6FH4WL/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CICUUY4DEIQAXB4MVEOW6FH4WL/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:CICUUY4DEIQAXB4MVEOW6FH4WL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d2280ec7d09b81bd946b930d01f5280badc9340fd29fd6912df373bb150a21ad","cross_cats_sorted":["cs.CL","cs.LG","eess.IV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-11-28T17:05:27Z","title_canon_sha256":"e138a4a0b76e3600ae1e2299b53451e85dc308dbbf0ba3b05e631d693cee6c51"},"schema_version":"1.0","source":{"id":"1811.11683","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1811.11683","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"arxiv_version","alias_value":"1811.11683v2","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.11683","created_at":"2026-05-17T23:44:42Z"},{"alias_kind":"pith_short_12","alias_value":"CICUUY4DEIQA","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_16","alias_value":"CICUUY4DEIQAXB4M","created_at":"2026-05-18T12:32:16Z"},{"alias_kind":"pith_short_8","alias_value":"CICUUY4D","created_at":"2026-05-18T12:32:16Z"}],"graph_snapshots":[{"event_id":"sha256:0ce78e15f7df7ab55f9d057a25f67f413ce55f0ceb9d9e2639a695ab7a3a1459","target":"graph","created_at":"2026-05-17T23:44:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We address the problem of phrase grounding by lear ing a multi-level common semantic space shared by the textual and visual modalities. We exploit multiple levels of feature maps of a Deep Convolutional Neural Network, as well as contextualized word and sentence embeddings extracted from a character-based language model. Following dedicated non-linear mappings for visual features at each level, word, and sentence embeddings, we obtain multiple instantiations of our common semantic space in which comparisons between any target text and the visual content is performed with cosine similarity. We ","authors_text":"Brian Chen, Carl Vondrick, Hassan Akbari, Shih-Fu Chang, Surabhi Bhargava, Svebor Karaman","cross_cats":["cs.CL","cs.LG","eess.IV"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-11-28T17:05:27Z","title":"Multi-level Multimodal Common Semantic Space for Image-Phrase Grounding"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.11683","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e29db3d73bad90a9bf199e86b65945057040725e050e820e40f23f8e23416aee","target":"record","created_at":"2026-05-17T23:44:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d2280ec7d09b81bd946b930d01f5280badc9340fd29fd6912df373bb150a21ad","cross_cats_sorted":["cs.CL","cs.LG","eess.IV"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2018-11-28T17:05:27Z","title_canon_sha256":"e138a4a0b76e3600ae1e2299b53451e85dc308dbbf0ba3b05e631d693cee6c51"},"schema_version":"1.0","source":{"id":"1811.11683","kind":"arxiv","version":2}},"canonical_sha256":"12054a638322200b878ca91d6f14fcb2e20d29e93c470d097e39fe24d6331771","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"12054a638322200b878ca91d6f14fcb2e20d29e93c470d097e39fe24d6331771","first_computed_at":"2026-05-17T23:44:42.810993Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:44:42.810993Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"7mHgIWYnDYs14AtFYVkYDyfQCvAl6LAIBMtWDEDr+JOkdaa4kV/Z6aE/UT62jNltcDG7fRtWAV+n/3tjSOZ5DQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:44:42.811512Z","signed_message":"canonical_sha256_bytes"},"source_id":"1811.11683","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e29db3d73bad90a9bf199e86b65945057040725e050e820e40f23f8e23416aee","sha256:0ce78e15f7df7ab55f9d057a25f67f413ce55f0ceb9d9e2639a695ab7a3a1459"],"state_sha256":"2f3c57840445a0e3ba7937a5e81f6d607c88ecf1b1511379aa74b084dff7720f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qqQExxaHNDhBrx+uS5T6/nkm/rvk7zwY8ko1Qtt+kCgx9+SHu/SSCUmYa50NrbgWxmNW7d412NqNOu3XuWkKDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T03:31:39.193295Z","bundle_sha256":"1c14fc5e37336a66fbe7d12b9f64a82201aee96f6674ea95b6f2a47f18f1510a"}}