{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2Y7Z4U6BOBEPIBGVCL7STPXCTX","short_pith_number":"pith:2Y7Z4U6B","schema_version":"1.0","canonical_sha256":"d63f9e53c17048f404d512ff29bee29dd39c46599d486fa1b4b485fe738575a4","source":{"kind":"arxiv","id":"2605.15868","version":1},"attestation_state":"computed","paper":{"title":"SOLAR: Self-supervised Joint Learning for Symmetric Multimodal Retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hang Yu, Peng Di, Wenjie Yang, Yuyu Guo","submitted_at":"2026-05-15T11:36:01Z","abstract_excerpt":"In this work, we address the critical yet underexplored challenge of symmetric multimodal-to-multimodal (MM2MM) retrieval, where queries and contexts are interchangeable. Existing universal multimodal retrieval works struggle with this task, as they are constrained by the labeled asymmetric datasets used. We produce SOLAR (Self-supervised jOint LeArning for symmetric multimodal Retrieval), a novel two-stage self-supervised framework that leverages readily available unlabeled web-scale image-text pairs. Based on the observation that both semantic alignment and discrepancies exist between two mo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.15868","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-15T11:36:01Z","cross_cats_sorted":[],"title_canon_sha256":"64f7ba788fa8f607807245da56a13cbe3deef9aecf915fcc712ef8a62f932aa4","abstract_canon_sha256":"baa7bcb3e393ef59073810dd256b8287e3b7819efa0c4832395ac1783fdda289"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:22.826399Z","signature_b64":"8zD2zNmTzy0RzuAXNulhge9S00dD2frUshX+1X+7+g01pUI4RxDZe+COrDQ/V2u/oEk6F2aju+JZ3ZrLXsf2Ag==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d63f9e53c17048f404d512ff29bee29dd39c46599d486fa1b4b485fe738575a4","last_reissued_at":"2026-05-20T00:01:22.825280Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:22.825280Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SOLAR: Self-supervised Joint Learning for Symmetric Multimodal Retrieval","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hang Yu, Peng Di, Wenjie Yang, Yuyu Guo","submitted_at":"2026-05-15T11:36:01Z","abstract_excerpt":"In this work, we address the critical yet underexplored challenge of symmetric multimodal-to-multimodal (MM2MM) retrieval, where queries and contexts are interchangeable. Existing universal multimodal retrieval works struggle with this task, as they are constrained by the labeled asymmetric datasets used. We produce SOLAR (Self-supervised jOint LeArning for symmetric multimodal Retrieval), a novel two-stage self-supervised framework that leverages readily available unlabeled web-scale image-text pairs. Based on the observation that both semantic alignment and discrepancies exist between two mo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.15868","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15868/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"ai_meta_artifact","ran_at":"2026-05-19T17:33:48.695232Z","status":"skipped","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T17:01:55.808543Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"892412e6fade64dc3de013a6a260703db8768a34fc4b8de8e9afebf37d34f12a"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15868","created_at":"2026-05-20T00:01:22.825420+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15868v1","created_at":"2026-05-20T00:01:22.825420+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15868","created_at":"2026-05-20T00:01:22.825420+00:00"},{"alias_kind":"pith_short_12","alias_value":"2Y7Z4U6BOBEP","created_at":"2026-05-20T00:01:22.825420+00:00"},{"alias_kind":"pith_short_16","alias_value":"2Y7Z4U6BOBEPIBGV","created_at":"2026-05-20T00:01:22.825420+00:00"},{"alias_kind":"pith_short_8","alias_value":"2Y7Z4U6B","created_at":"2026-05-20T00:01:22.825420+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX","json":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX.json","graph_json":"https://pith.science/api/pith-number/2Y7Z4U6BOBEPIBGVCL7STPXCTX/graph.json","events_json":"https://pith.science/api/pith-number/2Y7Z4U6BOBEPIBGVCL7STPXCTX/events.json","paper":"https://pith.science/paper/2Y7Z4U6B"},"agent_actions":{"view_html":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX","download_json":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX.json","view_paper":"https://pith.science/paper/2Y7Z4U6B","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15868&json=true","fetch_graph":"https://pith.science/api/pith-number/2Y7Z4U6BOBEPIBGVCL7STPXCTX/graph.json","fetch_events":"https://pith.science/api/pith-number/2Y7Z4U6BOBEPIBGVCL7STPXCTX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX/action/storage_attestation","attest_author":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX/action/author_attestation","sign_citation":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX/action/citation_signature","submit_replication":"https://pith.science/pith/2Y7Z4U6BOBEPIBGVCL7STPXCTX/action/replication_record"}},"created_at":"2026-05-20T00:01:22.825420+00:00","updated_at":"2026-05-20T00:01:22.825420+00:00"}