{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:YAZRXLIOVTW7Y4ULJ4U44RQMNM","short_pith_number":"pith:YAZRXLIO","schema_version":"1.0","canonical_sha256":"c0331bad0eacedfc728b4f29ce460c6b38ab2b70a8643d5e9381d4e97e6acda7","source":{"kind":"arxiv","id":"2509.19671","version":3},"attestation_state":"computed","paper":{"title":"Revisiting Performance Claims for Chest X-Ray Models Using Clinical Context","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Andrew Wang, Jiashuo Zhang, Michael Oberst","submitted_at":"2025-09-24T01:10:35Z","abstract_excerpt":"Public datasets of Chest X-Rays (CXRs) have long been a popular benchmark for developing machine learning (ML) computer vision models in healthcare. However, the reported strong average-case performance of these models do not necessarily reflect their actual utility when used in heterogeneous clinical settings, potentially masking weaker performance in medically significant scenarios. In this work we use clinical context to provide a more holistic evaluation of models for CXR diagnosis. In particular, we use discharge summaries, recorded prior to each CXR, to derive a ``pre-CXR'' probability o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2509.19671","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-09-24T01:10:35Z","cross_cats_sorted":[],"title_canon_sha256":"65014989ae272a392456a48fcac98593a86a271ad683480279554d899f8b3bf2","abstract_canon_sha256":"7627022c26048ecbd54a3d92050fc894924232683fca6ccb88b274d194d157bd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-29T00:14:00.372273Z","signature_b64":"uK303PN39o1ZVP/nOP7pra4BGfNaqCpNujwl33s1U1K3vUuD8AoQ4kBFFRGEtnwHmU0dktWwY+3GtCOeAgkxAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c0331bad0eacedfc728b4f29ce460c6b38ab2b70a8643d5e9381d4e97e6acda7","last_reissued_at":"2026-06-29T00:14:00.371765Z","signature_status":"signed_v1","first_computed_at":"2026-06-29T00:14:00.371765Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Revisiting Performance Claims for Chest X-Ray Models Using Clinical Context","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Andrew Wang, Jiashuo Zhang, Michael Oberst","submitted_at":"2025-09-24T01:10:35Z","abstract_excerpt":"Public datasets of Chest X-Rays (CXRs) have long been a popular benchmark for developing machine learning (ML) computer vision models in healthcare. However, the reported strong average-case performance of these models do not necessarily reflect their actual utility when used in heterogeneous clinical settings, potentially masking weaker performance in medically significant scenarios. In this work we use clinical context to provide a more holistic evaluation of models for CXR diagnosis. In particular, we use discharge summaries, recorded prior to each CXR, to derive a ``pre-CXR'' probability o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.19671","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.19671/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2509.19671","created_at":"2026-06-29T00:14:00.371824+00:00"},{"alias_kind":"arxiv_version","alias_value":"2509.19671v3","created_at":"2026-06-29T00:14:00.371824+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.19671","created_at":"2026-06-29T00:14:00.371824+00:00"},{"alias_kind":"pith_short_12","alias_value":"YAZRXLIOVTW7","created_at":"2026-06-29T00:14:00.371824+00:00"},{"alias_kind":"pith_short_16","alias_value":"YAZRXLIOVTW7Y4UL","created_at":"2026-06-29T00:14:00.371824+00:00"},{"alias_kind":"pith_short_8","alias_value":"YAZRXLIO","created_at":"2026-06-29T00:14:00.371824+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2604.18753","citing_title":"Handling and Interpreting Missing Modalities in Patient Clinical Trajectories via Autoregressive Sequence Modeling","ref_index":13,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM","json":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM.json","graph_json":"https://pith.science/api/pith-number/YAZRXLIOVTW7Y4ULJ4U44RQMNM/graph.json","events_json":"https://pith.science/api/pith-number/YAZRXLIOVTW7Y4ULJ4U44RQMNM/events.json","paper":"https://pith.science/paper/YAZRXLIO"},"agent_actions":{"view_html":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM","download_json":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM.json","view_paper":"https://pith.science/paper/YAZRXLIO","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2509.19671&json=true","fetch_graph":"https://pith.science/api/pith-number/YAZRXLIOVTW7Y4ULJ4U44RQMNM/graph.json","fetch_events":"https://pith.science/api/pith-number/YAZRXLIOVTW7Y4ULJ4U44RQMNM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM/action/storage_attestation","attest_author":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM/action/author_attestation","sign_citation":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM/action/citation_signature","submit_replication":"https://pith.science/pith/YAZRXLIOVTW7Y4ULJ4U44RQMNM/action/replication_record"}},"created_at":"2026-06-29T00:14:00.371824+00:00","updated_at":"2026-06-29T00:14:00.371824+00:00"}