{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:4ZE5Q6363C7QJSGRO3TRM6NE4U","short_pith_number":"pith:4ZE5Q636","schema_version":"1.0","canonical_sha256":"e649d87b7ed8bf04c8d176e71679a4e53bb8499ee2264dba6f5a89e455cca69c","source":{"kind":"arxiv","id":"2605.13544","version":1},"attestation_state":"computed","paper":{"title":"CA-GCL: Cross-Anatomy Global-Local Contrastive Learning for Robust 3D Medical Image Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A global contrastive objective separates anatomical categories to stop text embedding collapse in 3D medical vision-language models.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Die Dai, Hanwen Zhang, Jiaye Yang, Peng Wang, Qiao Liu, Yao Liu, Yutong Xie","submitted_at":"2026-05-13T13:54:27Z","abstract_excerpt":"Fine-grained Vision-Language Pre-training (FVLP) demonstrates significant potential in 3D medical image understanding by aligning anatomy-level visual representations with corresponding textual descriptions. However, existing FVLP paradigms often suffer from severe representation collapse in the textual embedding space, where text embeddings of distinct anatomical structures become highly clustered and indistinguishable. This distributional degeneracy renders the model hypersensitive to prompt variations, hindering reliable clinical deployment. To address these challenges, we propose a novel C"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.13544","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T13:54:27Z","cross_cats_sorted":[],"title_canon_sha256":"e8c6ad4bf420f1db3ce99e20e0df7d98162996275d8ae588a0028d263f3eed7a","abstract_canon_sha256":"7465aeb75dc44eaf69b6ed5af7df4dbe8244ca6fa06afc461193238e902fa86a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:44:23.939760Z","signature_b64":"csUJzZpI9az2+SUSxw4lzJ4iOOhUyi9751rWBtXP2ZNwA5pO5/AEH3/IyZHZhto4leMMSiLHGk0tsgQh3oXqBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e649d87b7ed8bf04c8d176e71679a4e53bb8499ee2264dba6f5a89e455cca69c","last_reissued_at":"2026-05-18T02:44:23.939242Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:44:23.939242Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CA-GCL: Cross-Anatomy Global-Local Contrastive Learning for Robust 3D Medical Image Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A global contrastive objective separates anatomical categories to stop text embedding collapse in 3D medical vision-language models.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Die Dai, Hanwen Zhang, Jiaye Yang, Peng Wang, Qiao Liu, Yao Liu, Yutong Xie","submitted_at":"2026-05-13T13:54:27Z","abstract_excerpt":"Fine-grained Vision-Language Pre-training (FVLP) demonstrates significant potential in 3D medical image understanding by aligning anatomy-level visual representations with corresponding textual descriptions. However, existing FVLP paradigms often suffer from severe representation collapse in the textual embedding space, where text embeddings of distinct anatomical structures become highly clustered and indistinguishable. This distributional degeneracy renders the model hypersensitive to prompt variations, hindering reliable clinical deployment. To address these challenges, we propose a novel C"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"CA-GCL consistently outperforms existing VLP paradigms in zero-shot abnormality detection, achieving superior performance while exhibiting strong cross-dataset generalization. Crucially, CA-GCL reduces performance variance across diverse prompt templates, transforming the collapsed textual similarity distribution into a bell-shaped distribution.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That enforcing global separation between anatomical categories via contrastive objectives will counteract local alignment collapse without degrading fine-grained visual-textual correspondences or introducing new instabilities in the latent space.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"CA-GCL adds global contrastive separation and clinical text augmentation to fine-grained vision-language pretraining, reducing textual embedding collapse and prompt variance in 3D medical image tasks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A global contrastive objective separates anatomical categories to stop text embedding collapse in 3D medical vision-language models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d0f830468eedd94cb8ce8b4038982a87a70b9c1a026039ce04d27e6343ffb1a8"},"source":{"id":"2605.13544","kind":"arxiv","version":1},"verdict":{"id":"68079bdf-1cad-4e6c-99d7-c7478705b059","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T19:54:17.227053Z","strongest_claim":"CA-GCL consistently outperforms existing VLP paradigms in zero-shot abnormality detection, achieving superior performance while exhibiting strong cross-dataset generalization. Crucially, CA-GCL reduces performance variance across diverse prompt templates, transforming the collapsed textual similarity distribution into a bell-shaped distribution.","one_line_summary":"CA-GCL adds global contrastive separation and clinical text augmentation to fine-grained vision-language pretraining, reducing textual embedding collapse and prompt variance in 3D medical image tasks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That enforcing global separation between anatomical categories via contrastive objectives will counteract local alignment collapse without degrading fine-grained visual-textual correspondences or introducing new instabilities in the latent space.","pith_extraction_headline":"A global contrastive objective separates anatomical categories to stop text embedding collapse in 3D medical vision-language models."},"references":{"count":26,"sample":[{"doi":"","year":2024,"title":"arXiv preprint arXiv:2404.00578 (2024)","work_id":"7bc29714-95ad-4249-a8ca-eabb142c8078","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Research Square pp","work_id":"e5973c5e-1a26-462d-a35d-c787e277c808","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2022,"title":"In: European conference on computer vision","work_id":"ca26f33f-f4b6-4b94-b8da-5c2b1d263de2","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2025,"title":"In: Proceedings of the IEEE/CVF International Conference on Computer Vision","work_id":"473639fc-af1c-4ff5-9887-22dacb201ae0","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"In: Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition","work_id":"3b96e836-d0c6-4774-8cb5-58a014d65c5f","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":26,"snapshot_sha256":"c7985de29124b0478d43bd63f9d6fd991016d088089ea2421a232caf9dabe5bc","internal_anchors":3},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.13544","created_at":"2026-05-18T02:44:23.939330+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.13544v1","created_at":"2026-05-18T02:44:23.939330+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.13544","created_at":"2026-05-18T02:44:23.939330+00:00"},{"alias_kind":"pith_short_12","alias_value":"4ZE5Q6363C7Q","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"4ZE5Q6363C7QJSGR","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"4ZE5Q636","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U","json":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U.json","graph_json":"https://pith.science/api/pith-number/4ZE5Q6363C7QJSGRO3TRM6NE4U/graph.json","events_json":"https://pith.science/api/pith-number/4ZE5Q6363C7QJSGRO3TRM6NE4U/events.json","paper":"https://pith.science/paper/4ZE5Q636"},"agent_actions":{"view_html":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U","download_json":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U.json","view_paper":"https://pith.science/paper/4ZE5Q636","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.13544&json=true","fetch_graph":"https://pith.science/api/pith-number/4ZE5Q6363C7QJSGRO3TRM6NE4U/graph.json","fetch_events":"https://pith.science/api/pith-number/4ZE5Q6363C7QJSGRO3TRM6NE4U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U/action/storage_attestation","attest_author":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U/action/author_attestation","sign_citation":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U/action/citation_signature","submit_replication":"https://pith.science/pith/4ZE5Q6363C7QJSGRO3TRM6NE4U/action/replication_record"}},"created_at":"2026-05-18T02:44:23.939330+00:00","updated_at":"2026-05-18T02:44:23.939330+00:00"}