{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:A2CIO3CNIMAE37R6BP6XRELEQK","short_pith_number":"pith:A2CIO3CN","schema_version":"1.0","canonical_sha256":"0684876c4d43004dfe3e0bfd78916482aa6893867fc0a7a1ac1cff33d9f64eca","source":{"kind":"arxiv","id":"1902.00378","version":1},"attestation_state":"computed","paper":{"title":"Self-Supervised Visual Representations for Cross-Modal Retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"C.V. Jawahar, Dimosthenis Karatzas, Lluis Gomez, Mar\\c{c}al Rusi\\~nol, Yash Patel","submitted_at":"2019-01-31T09:17:07Z","abstract_excerpt":"Cross-modal retrieval methods have been significantly improved in last years with the use of deep neural networks and large-scale annotated datasets such as ImageNet and Places. However, collecting and annotating such datasets requires a tremendous amount of human effort and, besides, their annotations are usually limited to discrete sets of popular visual classes that may not be representative of the richer semantics found on large-scale cross-modal retrieval datasets. In this paper, we present a self-supervised cross-modal retrieval framework that leverages as training data the correlations "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1902.00378","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-31T09:17:07Z","cross_cats_sorted":[],"title_canon_sha256":"937c49da7db7c6d4adc01ae12e0f202b511af0c7efdfe6d4142cc46823814ac5","abstract_canon_sha256":"801f872455e6c065d22ac9dc3ee6b946de397a45840a1482cbdbd924b124aadd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:54:57.606018Z","signature_b64":"BoatO38eTkjnXBgSgXwrSWwWAxFgzkPXsSt7Sd2QJ3WmBXjHBDz9yeuHQOuSghiYg3pH2kpZ31lhm6av1wEoBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0684876c4d43004dfe3e0bfd78916482aa6893867fc0a7a1ac1cff33d9f64eca","last_reissued_at":"2026-05-17T23:54:57.605349Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:54:57.605349Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Self-Supervised Visual Representations for Cross-Modal Retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"C.V. Jawahar, Dimosthenis Karatzas, Lluis Gomez, Mar\\c{c}al Rusi\\~nol, Yash Patel","submitted_at":"2019-01-31T09:17:07Z","abstract_excerpt":"Cross-modal retrieval methods have been significantly improved in last years with the use of deep neural networks and large-scale annotated datasets such as ImageNet and Places. However, collecting and annotating such datasets requires a tremendous amount of human effort and, besides, their annotations are usually limited to discrete sets of popular visual classes that may not be representative of the richer semantics found on large-scale cross-modal retrieval datasets. In this paper, we present a self-supervised cross-modal retrieval framework that leverages as training data the correlations "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.00378","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1902.00378","created_at":"2026-05-17T23:54:57.605466+00:00"},{"alias_kind":"arxiv_version","alias_value":"1902.00378v1","created_at":"2026-05-17T23:54:57.605466+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.00378","created_at":"2026-05-17T23:54:57.605466+00:00"},{"alias_kind":"pith_short_12","alias_value":"A2CIO3CNIMAE","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_16","alias_value":"A2CIO3CNIMAE37R6","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_8","alias_value":"A2CIO3CN","created_at":"2026-05-18T12:33:12.712433+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK","json":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK.json","graph_json":"https://pith.science/api/pith-number/A2CIO3CNIMAE37R6BP6XRELEQK/graph.json","events_json":"https://pith.science/api/pith-number/A2CIO3CNIMAE37R6BP6XRELEQK/events.json","paper":"https://pith.science/paper/A2CIO3CN"},"agent_actions":{"view_html":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK","download_json":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK.json","view_paper":"https://pith.science/paper/A2CIO3CN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1902.00378&json=true","fetch_graph":"https://pith.science/api/pith-number/A2CIO3CNIMAE37R6BP6XRELEQK/graph.json","fetch_events":"https://pith.science/api/pith-number/A2CIO3CNIMAE37R6BP6XRELEQK/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK/action/timestamp_anchor","attest_storage":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK/action/storage_attestation","attest_author":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK/action/author_attestation","sign_citation":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK/action/citation_signature","submit_replication":"https://pith.science/pith/A2CIO3CNIMAE37R6BP6XRELEQK/action/replication_record"}},"created_at":"2026-05-17T23:54:57.605466+00:00","updated_at":"2026-05-17T23:54:57.605466+00:00"}