{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:27VZKU34OVNW73436WUSTFXXCD","short_pith_number":"pith:27VZKU34","schema_version":"1.0","canonical_sha256":"d7eb95537c755b6fef9bf5a92996f710f74d777ac296a0b8b300363a1b3f5af9","source":{"kind":"arxiv","id":"1901.07042","version":5},"attestation_state":"computed","paper":{"title":"MIMIC-CXR-JPG, a large publicly available database of labeled chest radiographs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A large dataset of 377,110 labeled chest x-rays is now publicly available for medical computer vision research.","cross_cats":["cs.LG","eess.IV"],"primary_cat":"cs.CV","authors_text":"Alistair E. W. Johnson, Chih-ying Deng, Matthew P. Lungren, Nathaniel R. Greenbaum, Roger G. Mark, Seth J. Berkowitz, Steven Horng, Tom J. Pollard, Yifan Peng, Zhiyong Lu","submitted_at":"2019-01-21T19:01:00Z","abstract_excerpt":"Chest radiography is an extremely powerful imaging modality, allowing for a detailed inspection of a patient's thorax, but requiring specialized training for proper interpretation. With the advent of high performance general purpose computer vision algorithms, the accurate automated analysis of chest radiographs is becoming increasingly of interest to researchers. However, a key challenge in the development of these techniques is the lack of sufficient data. Here we describe MIMIC-CXR-JPG v2.0.0, a large dataset of 377,110 chest x-rays associated with 227,827 imaging studies sourced from the B"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"1901.07042","kind":"arxiv","version":5},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-21T19:01:00Z","cross_cats_sorted":["cs.LG","eess.IV"],"title_canon_sha256":"24cfb386d9a7373fa8fa9d036b1aa0ee88e836ad71c982b120129e6ecd275082","abstract_canon_sha256":"639dce0f9864968c4a8b6512ba0a98f110bd85dfc65d509a73d3b861224262a9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:15.154807Z","signature_b64":"L3d9gnf2/DMfu6JEDaQdkPnyWLeyLUzHXoIs9ltxLbkmZYYmuM8XI6wC1IaWVtzRNRHJ1ipH9urbfsMTJ+J7DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d7eb95537c755b6fef9bf5a92996f710f74d777ac296a0b8b300363a1b3f5af9","last_reissued_at":"2026-05-17T23:38:15.154275Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:15.154275Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MIMIC-CXR-JPG, a large publicly available database of labeled chest radiographs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"A large dataset of 377,110 labeled chest x-rays is now publicly available for medical computer vision research.","cross_cats":["cs.LG","eess.IV"],"primary_cat":"cs.CV","authors_text":"Alistair E. W. Johnson, Chih-ying Deng, Matthew P. Lungren, Nathaniel R. Greenbaum, Roger G. Mark, Seth J. Berkowitz, Steven Horng, Tom J. Pollard, Yifan Peng, Zhiyong Lu","submitted_at":"2019-01-21T19:01:00Z","abstract_excerpt":"Chest radiography is an extremely powerful imaging modality, allowing for a detailed inspection of a patient's thorax, but requiring specialized training for proper interpretation. With the advent of high performance general purpose computer vision algorithms, the accurate automated analysis of chest radiographs is becoming increasingly of interest to researchers. However, a key challenge in the development of these techniques is the lack of sufficient data. Here we describe MIMIC-CXR-JPG v2.0.0, a large dataset of 377,110 chest x-rays associated with 227,827 imaging studies sourced from the B"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"MIMIC-CXR-JPG v2.0.0 is a large dataset of 377,110 chest x-rays associated with 227,827 imaging studies... Images are provided with 14 labels derived from two natural language processing tools applied to the corresponding free-text radiology reports.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The 14 labels produced by the two NLP tools accurately capture the clinical content of the radiology reports and correspond to verifiable findings in the images.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"MIMIC-CXR-JPG provides 377,110 labeled chest X-rays derived from MIMIC-CXR with NLP-generated labels and standard splits for medical imaging AI development.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A large dataset of 377,110 labeled chest x-rays is now publicly available for medical computer vision research.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"bea0fbe2f0ce4ffe3f8cf2d3acdf66e289dc94a2c25b0924c537285485575904"},"source":{"id":"1901.07042","kind":"arxiv","version":5},"verdict":{"id":"486278c7-eda1-46a8-b596-84564d1a4cfd","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T04:11:25.530810Z","strongest_claim":"MIMIC-CXR-JPG v2.0.0 is a large dataset of 377,110 chest x-rays associated with 227,827 imaging studies... Images are provided with 14 labels derived from two natural language processing tools applied to the corresponding free-text radiology reports.","one_line_summary":"MIMIC-CXR-JPG provides 377,110 labeled chest X-rays derived from MIMIC-CXR with NLP-generated labels and standard splits for medical imaging AI development.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The 14 labels produced by the two NLP tools accurately capture the clinical content of the radiology reports and correspond to verifiable findings in the images.","pith_extraction_headline":"A large dataset of 377,110 labeled chest x-rays is now publicly available for medical computer vision research."},"references":{"count":20,"sample":[{"doi":"","year":2015,"title":"The US radiologist workforce: an analysis of temporal and geographic variation by using large national datasets","work_id":"9f15271a-ed7e-4b0e-9a0e-c8130e722f6c","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"A county-level analysis of the US radiologist workforce: physician supply and subspecialty characteristics","work_id":"5fcad9a6-2e73-4864-bf18-81caaac5e278","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Radiologist shortage leaves patient care at risk, warns royal college","work_id":"ed0980c5-0a89-4536-b2fe-7b32c1c74c5a","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2017,"title":"Improving Patient Safety: Avoiding Unread Imaging Exams in the National V A Enterprise Electronic Health Record","work_id":"325e2855-6e68-40d5-8b56-d28b655378d1","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2015,"title":"Imaging in the land of 1000 hills: Rwanda radiology country report","work_id":"5ac6745c-c94e-4d67-8d08-03d418a86220","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":20,"snapshot_sha256":"80b67c410054b016bbbe27a921a31daac29e7aee35aeb9db462b662cae487411","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"c5e2ea1e29b793a94a9578329f2dd94966b37161a5edfd0f3ef16506b40ee1f3"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.07042","created_at":"2026-05-17T23:38:15.154366+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.07042v5","created_at":"2026-05-17T23:38:15.154366+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.07042","created_at":"2026-05-17T23:38:15.154366+00:00"},{"alias_kind":"pith_short_12","alias_value":"27VZKU34OVNW","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_16","alias_value":"27VZKU34OVNW7343","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_8","alias_value":"27VZKU34","created_at":"2026-05-18T12:33:07.085635+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":29,"internal_anchor_count":29,"sample":[{"citing_arxiv_id":"1906.09354","citing_title":"Boosting the rule-out accuracy of deep disease detection using class weight modifiers","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2408.16213","citing_title":"M4CXR: Exploring Multi-task Potentials of Multi-modal Large Language Models for Chest X-ray Interpretation","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2503.18297","citing_title":"Image-to-Text for Medical Reports Using Adaptive Co-Attention and Triple-LSTM Module","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"2504.07415","citing_title":"RA-RRG: Multimodal Retrieval-Augmented Radiology Report Generation with Key Phrase Extraction","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15574","citing_title":"MI-CXR: A Benchmark for Longitudinal Reasoning over Multi-Interval Chest X-rays","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16639","citing_title":"MedMIX: Modality-Internal Expert Fusion for Multimodal Medical Diagnosis","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17436","citing_title":"Medical Context Distorts Decisions in Clinical Vision Language Models","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19374","citing_title":"Concept-Guided Noisy Negative Suppression for Zero-Shot Classification and Grounding of Chest X-Ray Findings","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20158","citing_title":"Rethinking Visual Attribution for Chest X-ray Reasoning in Large Vision Language Models","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2506.11989","citing_title":"Thought Graph Traversal for Test-time Scaling in Chest X-ray VLLMs","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2509.20490","citing_title":"RadAgents: Multimodal Agentic Reasoning for Chest X-ray Interpretation with Radiologist-like Workflows","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2511.11030","citing_title":"Algorithms Trained on Normal Chest X-rays Can Predict Health Insurance Types","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2512.09315","citing_title":"Benchmarking Real-World Medical Image Classification with Noisy Labels: Challenges, Practice, and Outlook","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2602.12705","citing_title":"MedXIAOHE: A Comprehensive Recipe for Building Medical MLLMs","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2603.01756","citing_title":"NeuroSymb-MRG: Differentiable Abductive Reasoning with Active Uncertainty Minimization for Radiology Report Generation","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2404.18416","citing_title":"Capabilities of Gemini Models in Medicine","ref_index":60,"is_internal_anchor":true},{"citing_arxiv_id":"2603.15525","citing_title":"Clinically Aware Synthetic Image Generation for Concept Coverage in Chest X-ray Models","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11304","citing_title":"CheXTemporal: A Dataset for Temporally-Grounded Reasoning in Chest Radiography","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.27559","citing_title":"RIHA: Report-Image Hierarchical Alignment for Radiology Report Generation","ref_index":73,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10761","citing_title":"RadThinking: A Dataset for Longitudinal Clinical Reasoning in Radiology","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"2604.22989","citing_title":"CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00887","citing_title":"SparseContrast: Dynamic Sparse Attention for Efficient and Accurate Contrastive Learning in Medical Imaging","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2604.12152","citing_title":"Domain-Specific Latent Representations Improve the Fidelity of Diffusion-Based Medical Image Super-Resolution","ref_index":54,"is_internal_anchor":true},{"citing_arxiv_id":"2604.11835","citing_title":"Schema-Adaptive Tabular Representation Learning with LLMs for Generalizable Multimodal Clinical Reasoning","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"2604.10233","citing_title":"Adapting 2D Multi-Modal Large Language Model for 3D CT Image Analysis","ref_index":32,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":1,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD","json":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD.json","graph_json":"https://pith.science/api/pith-number/27VZKU34OVNW73436WUSTFXXCD/graph.json","events_json":"https://pith.science/api/pith-number/27VZKU34OVNW73436WUSTFXXCD/events.json","paper":"https://pith.science/paper/27VZKU34"},"agent_actions":{"view_html":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD","download_json":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD.json","view_paper":"https://pith.science/paper/27VZKU34","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.07042&json=true","fetch_graph":"https://pith.science/api/pith-number/27VZKU34OVNW73436WUSTFXXCD/graph.json","fetch_events":"https://pith.science/api/pith-number/27VZKU34OVNW73436WUSTFXXCD/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD/action/timestamp_anchor","attest_storage":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD/action/storage_attestation","attest_author":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD/action/author_attestation","sign_citation":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD/action/citation_signature","submit_replication":"https://pith.science/pith/27VZKU34OVNW73436WUSTFXXCD/action/replication_record"}},"created_at":"2026-05-17T23:38:15.154366+00:00","updated_at":"2026-05-17T23:38:15.154366+00:00"}