{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:HJZXXW5RYSZL5DFNN3EHFSC3MF","short_pith_number":"pith:HJZXXW5R","schema_version":"1.0","canonical_sha256":"3a737bdbb1c4b2be8cad6ec872c85b61457161e5281c84a5a5ab48d536f1f6e7","source":{"kind":"arxiv","id":"2607.01908","version":1},"attestation_state":"computed","paper":{"title":"Towards Real-World Ultrasound Understanding: Large Vision-Language Models from Multi-Image Examinations with Long-Form Reports","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bingcong Yan, Chunlei Li, Jingliang Hu, Lichao Mou, Xiao Xiang Zhu, Yilei Shi","submitted_at":"2026-07-02T09:08:46Z","abstract_excerpt":"Large vision-language models (LVLMs) have achieved strong performance across many medical imaging tasks, yet their application to ultrasound remains limited due to its inherent complexity and variability. In this work, we revisit what is truly needed to enable real-world ultrasound understanding. Instead of introducing complex architectures or elaborate training strategies, we show that data scale and clinically faithful data alignment are the key factors. We construct a large-scale dataset of 1.5M real-world ultrasound examinations, containing 17.7M images, multi-organ coverage, and paired un"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2607.01908","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-07-02T09:08:46Z","cross_cats_sorted":[],"title_canon_sha256":"b4637b6b529501ec4d8f18ce56186ae70c4c93e30826503dcea31e9c5c5e939d","abstract_canon_sha256":"db6c095dcd0c6bf26098eb151e39f67947af22ef6fde8552d9fde54b1f4912d4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:17:33.096092Z","signature_b64":"3K+MlhpS2qaza/cxkNxwahIDHsv24yMW2xBDCcd2rwMp700Uyc2zHg4yg0/PzMm+bZ1Sx6tlh8jXZQfd8L4EAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3a737bdbb1c4b2be8cad6ec872c85b61457161e5281c84a5a5ab48d536f1f6e7","last_reissued_at":"2026-07-03T01:17:33.095703Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:17:33.095703Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Towards Real-World Ultrasound Understanding: Large Vision-Language Models from Multi-Image Examinations with Long-Form Reports","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bingcong Yan, Chunlei Li, Jingliang Hu, Lichao Mou, Xiao Xiang Zhu, Yilei Shi","submitted_at":"2026-07-02T09:08:46Z","abstract_excerpt":"Large vision-language models (LVLMs) have achieved strong performance across many medical imaging tasks, yet their application to ultrasound remains limited due to its inherent complexity and variability. In this work, we revisit what is truly needed to enable real-world ultrasound understanding. Instead of introducing complex architectures or elaborate training strategies, we show that data scale and clinically faithful data alignment are the key factors. We construct a large-scale dataset of 1.5M real-world ultrasound examinations, containing 17.7M images, multi-organ coverage, and paired un"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.01908","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.01908/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2607.01908","created_at":"2026-07-03T01:17:33.095762+00:00"},{"alias_kind":"arxiv_version","alias_value":"2607.01908v1","created_at":"2026-07-03T01:17:33.095762+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.01908","created_at":"2026-07-03T01:17:33.095762+00:00"},{"alias_kind":"pith_short_12","alias_value":"HJZXXW5RYSZL","created_at":"2026-07-03T01:17:33.095762+00:00"},{"alias_kind":"pith_short_16","alias_value":"HJZXXW5RYSZL5DFN","created_at":"2026-07-03T01:17:33.095762+00:00"},{"alias_kind":"pith_short_8","alias_value":"HJZXXW5R","created_at":"2026-07-03T01:17:33.095762+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF","json":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF.json","graph_json":"https://pith.science/api/pith-number/HJZXXW5RYSZL5DFNN3EHFSC3MF/graph.json","events_json":"https://pith.science/api/pith-number/HJZXXW5RYSZL5DFNN3EHFSC3MF/events.json","paper":"https://pith.science/paper/HJZXXW5R"},"agent_actions":{"view_html":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF","download_json":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF.json","view_paper":"https://pith.science/paper/HJZXXW5R","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2607.01908&json=true","fetch_graph":"https://pith.science/api/pith-number/HJZXXW5RYSZL5DFNN3EHFSC3MF/graph.json","fetch_events":"https://pith.science/api/pith-number/HJZXXW5RYSZL5DFNN3EHFSC3MF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF/action/storage_attestation","attest_author":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF/action/author_attestation","sign_citation":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF/action/citation_signature","submit_replication":"https://pith.science/pith/HJZXXW5RYSZL5DFNN3EHFSC3MF/action/replication_record"}},"created_at":"2026-07-03T01:17:33.095762+00:00","updated_at":"2026-07-03T01:17:33.095762+00:00"}