{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:3DTOXHQJSVX2GEGN756TIV66R5","short_pith_number":"pith:3DTOXHQJ","schema_version":"1.0","canonical_sha256":"d8e6eb9e09956fa310cdff7d3457de8f486c85987a5752c9ceb3e28b3ddc8b02","source":{"kind":"arxiv","id":"2606.23301","version":1},"attestation_state":"computed","paper":{"title":"EHR-Complex: Benchmarking Medical Agents for Complex Clinical Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Jian Wang, Jinjie Gu, Kui Ren, Lei Liu, Yitong Qiao, Yue Shen, Zhixuan Chu","submitted_at":"2026-06-22T13:14:21Z","abstract_excerpt":"Clinical agents promise to democratize access to electronic health records (EHRs), yet existing benchmarks fail to reflect the complexity of practical EHR analysis, e.g., often operating on idealized, clean EHRs via static SQL generation rather than interactive execution. In this work, we introduce EHR-Complex, a large-scale benchmark designed for interactive clinical database reasoning. Built on the large MIMIC-IV substrate (365K patients, 31 tables, 500M+ records), EHR-Complex comprises about 52K tasks spanning six clinical intents, supporting both patient-level and population-level queries,"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.23301","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-06-22T13:14:21Z","cross_cats_sorted":[],"title_canon_sha256":"1ac9ff5ab080c55c6a047436ba7fb60206c8076fc74a5174209e7ba851d041b5","abstract_canon_sha256":"b3289ca3f454574199339361c261cd95a181a015061c2954c5540020600abcf0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-23T03:14:16.297037Z","signature_b64":"+wwPy0nVlvmQUjEc4XWnwcxaJCQT96QUUbLnQbU7ownodnpMA5CVtI2Mma0gC+OqWhuJ9qtBVT6vOvw/ib/kAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d8e6eb9e09956fa310cdff7d3457de8f486c85987a5752c9ceb3e28b3ddc8b02","last_reissued_at":"2026-06-23T03:14:16.296644Z","signature_status":"signed_v1","first_computed_at":"2026-06-23T03:14:16.296644Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"EHR-Complex: Benchmarking Medical Agents for Complex Clinical Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Jian Wang, Jinjie Gu, Kui Ren, Lei Liu, Yitong Qiao, Yue Shen, Zhixuan Chu","submitted_at":"2026-06-22T13:14:21Z","abstract_excerpt":"Clinical agents promise to democratize access to electronic health records (EHRs), yet existing benchmarks fail to reflect the complexity of practical EHR analysis, e.g., often operating on idealized, clean EHRs via static SQL generation rather than interactive execution. In this work, we introduce EHR-Complex, a large-scale benchmark designed for interactive clinical database reasoning. Built on the large MIMIC-IV substrate (365K patients, 31 tables, 500M+ records), EHR-Complex comprises about 52K tasks spanning six clinical intents, supporting both patient-level and population-level queries,"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.23301","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.23301/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.23301","created_at":"2026-06-23T03:14:16.296702+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.23301v1","created_at":"2026-06-23T03:14:16.296702+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.23301","created_at":"2026-06-23T03:14:16.296702+00:00"},{"alias_kind":"pith_short_12","alias_value":"3DTOXHQJSVX2","created_at":"2026-06-23T03:14:16.296702+00:00"},{"alias_kind":"pith_short_16","alias_value":"3DTOXHQJSVX2GEGN","created_at":"2026-06-23T03:14:16.296702+00:00"},{"alias_kind":"pith_short_8","alias_value":"3DTOXHQJ","created_at":"2026-06-23T03:14:16.296702+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5","json":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5.json","graph_json":"https://pith.science/api/pith-number/3DTOXHQJSVX2GEGN756TIV66R5/graph.json","events_json":"https://pith.science/api/pith-number/3DTOXHQJSVX2GEGN756TIV66R5/events.json","paper":"https://pith.science/paper/3DTOXHQJ"},"agent_actions":{"view_html":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5","download_json":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5.json","view_paper":"https://pith.science/paper/3DTOXHQJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.23301&json=true","fetch_graph":"https://pith.science/api/pith-number/3DTOXHQJSVX2GEGN756TIV66R5/graph.json","fetch_events":"https://pith.science/api/pith-number/3DTOXHQJSVX2GEGN756TIV66R5/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5/action/storage_attestation","attest_author":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5/action/author_attestation","sign_citation":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5/action/citation_signature","submit_replication":"https://pith.science/pith/3DTOXHQJSVX2GEGN756TIV66R5/action/replication_record"}},"created_at":"2026-06-23T03:14:16.296702+00:00","updated_at":"2026-06-23T03:14:16.296702+00:00"}