{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:O5E5ZGNDT6RT2I3N563XLXHUBG","short_pith_number":"pith:O5E5ZGND","schema_version":"1.0","canonical_sha256":"7749dc99a39fa33d236defb775dcf4099af8635d54832e7b083a9ecc642b7fc2","source":{"kind":"arxiv","id":"1511.07067","version":2},"attestation_state":"computed","paper":{"title":"Visual Word2Vec (vis-w2v): Learning Visually Grounded Word Embeddings Using Abstract Scenes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Devi Parikh, Jos\\'e M. F. Moura, Ramakrishna Vedantam, Satwik Kottur","submitted_at":"2015-11-22T20:46:42Z","abstract_excerpt":"We propose a model to learn visually grounded word embeddings (vis-w2v) to capture visual notions of semantic relatedness. While word embeddings trained using text have been extremely successful, they cannot uncover notions of semantic relatedness implicit in our visual world. For instance, although \"eats\" and \"stares at\" seem unrelated in text, they share semantics visually. When people are eating something, they also tend to stare at the food. Grounding diverse relations like \"eats\" and \"stares at\" into vision remains challenging, despite recent progress in vision. We note that the visual gr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1511.07067","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-11-22T20:46:42Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"6f6a7619f5528cd94851505d1291608d06fdee3c5131aac61222df6860af883a","abstract_canon_sha256":"724f735a8767f768e6612e3d9390d64f0a6b164d95116822e037b6daa0b6bca3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:11:43.724759Z","signature_b64":"pO6SlexBAhebwNnJVy+/JE23cEeEqtuTbsETvRWeCBfp4aCU0kkOJQxHSxQYkMzNkWwJymGmulDBgP849kliDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7749dc99a39fa33d236defb775dcf4099af8635d54832e7b083a9ecc642b7fc2","last_reissued_at":"2026-05-18T01:11:43.724388Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:11:43.724388Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Visual Word2Vec (vis-w2v): Learning Visually Grounded Word Embeddings Using Abstract Scenes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Devi Parikh, Jos\\'e M. F. Moura, Ramakrishna Vedantam, Satwik Kottur","submitted_at":"2015-11-22T20:46:42Z","abstract_excerpt":"We propose a model to learn visually grounded word embeddings (vis-w2v) to capture visual notions of semantic relatedness. While word embeddings trained using text have been extremely successful, they cannot uncover notions of semantic relatedness implicit in our visual world. For instance, although \"eats\" and \"stares at\" seem unrelated in text, they share semantics visually. When people are eating something, they also tend to stare at the food. Grounding diverse relations like \"eats\" and \"stares at\" into vision remains challenging, despite recent progress in vision. We note that the visual gr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1511.07067","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1511.07067","created_at":"2026-05-18T01:11:43.724450+00:00"},{"alias_kind":"arxiv_version","alias_value":"1511.07067v2","created_at":"2026-05-18T01:11:43.724450+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1511.07067","created_at":"2026-05-18T01:11:43.724450+00:00"},{"alias_kind":"pith_short_12","alias_value":"O5E5ZGNDT6RT","created_at":"2026-05-18T12:29:34.919912+00:00"},{"alias_kind":"pith_short_16","alias_value":"O5E5ZGNDT6RT2I3N","created_at":"2026-05-18T12:29:34.919912+00:00"},{"alias_kind":"pith_short_8","alias_value":"O5E5ZGND","created_at":"2026-05-18T12:29:34.919912+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG","json":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG.json","graph_json":"https://pith.science/api/pith-number/O5E5ZGNDT6RT2I3N563XLXHUBG/graph.json","events_json":"https://pith.science/api/pith-number/O5E5ZGNDT6RT2I3N563XLXHUBG/events.json","paper":"https://pith.science/paper/O5E5ZGND"},"agent_actions":{"view_html":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG","download_json":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG.json","view_paper":"https://pith.science/paper/O5E5ZGND","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1511.07067&json=true","fetch_graph":"https://pith.science/api/pith-number/O5E5ZGNDT6RT2I3N563XLXHUBG/graph.json","fetch_events":"https://pith.science/api/pith-number/O5E5ZGNDT6RT2I3N563XLXHUBG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG/action/storage_attestation","attest_author":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG/action/author_attestation","sign_citation":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG/action/citation_signature","submit_replication":"https://pith.science/pith/O5E5ZGNDT6RT2I3N563XLXHUBG/action/replication_record"}},"created_at":"2026-05-18T01:11:43.724450+00:00","updated_at":"2026-05-18T01:11:43.724450+00:00"}