{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:5BEDTMAES2QPL6Y5LLBUTM35TQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"02658ded90aea1684435a497bb5613e076343530cb802cc7fe3367e81e8e4e4b","cross_cats_sorted":["cs.CL","cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-30T06:09:01Z","title_canon_sha256":"f032af75bf53e21764d0af15e4d693b3623c5f390a4612ba4716611c91706082"},"schema_version":"1.0","source":{"id":"2606.31179","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.31179","created_at":"2026-07-01T01:17:31Z"},{"alias_kind":"arxiv_version","alias_value":"2606.31179v1","created_at":"2026-07-01T01:17:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.31179","created_at":"2026-07-01T01:17:31Z"},{"alias_kind":"pith_short_12","alias_value":"5BEDTMAES2QP","created_at":"2026-07-01T01:17:31Z"},{"alias_kind":"pith_short_16","alias_value":"5BEDTMAES2QPL6Y5","created_at":"2026-07-01T01:17:31Z"},{"alias_kind":"pith_short_8","alias_value":"5BEDTMAE","created_at":"2026-07-01T01:17:31Z"}],"graph_snapshots":[{"event_id":"sha256:db8380aeb2062c6d70858260fed155623f52eaf20da6760022b348865a62ca2a","target":"graph","created_at":"2026-07-01T01:17:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.31179/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As AI agents become increasingly capable of complex, long-horizon reasoning, rigorous and holistic evaluation is essential for measuring progress toward real-world healthcare applications. We introduce HealthAgentBench, a suite of 54 agentic healthcare tasks across 7 categories each with its unique environment. The benchmark suite spans diverse workflows throughout the patient journey and a broad range of modalities. Each task is designed to replicate an end-to-end clinical workflow: given minimal instructions, an agent must explore raw healthcare data, operate within a complex environment, an","authors_text":"Cliff Wong, Guanghui Qin, Hoifung Poon, Jason Entenmann, Jeya Maria Jose Valanarasu, Juan Manuel Zambrano Chaves, Maximilian Rokuss, Mingyu Lu, Mu Wei, Naoto Usuyama, Peniel Argaw, Qianchu Liu, Qin Liu, Sheng Zhang, Timothy Ossowski, Tristan Naumann, Wen-wai Yim, Yashna Hasija, Zilin Jing","cross_cats":["cs.CL","cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-30T06:09:01Z","title":"HealthAgentBench: A Unified Benchmark Suite of Realistic Agentic Healthcare Environments for Challenging Frontier AI Agents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.31179","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:697cb873660fa44e728f163a49e99c64a2ca4a0f285c5a96c07f579f66f54ad8","target":"record","created_at":"2026-07-01T01:17:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"02658ded90aea1684435a497bb5613e076343530cb802cc7fe3367e81e8e4e4b","cross_cats_sorted":["cs.CL","cs.CV"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-30T06:09:01Z","title_canon_sha256":"f032af75bf53e21764d0af15e4d693b3623c5f390a4612ba4716611c91706082"},"schema_version":"1.0","source":{"id":"2606.31179","kind":"arxiv","version":1}},"canonical_sha256":"e84839b00496a0f5fb1d5ac349b37d9c38e196fd9063697cfb26a58e145d2402","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e84839b00496a0f5fb1d5ac349b37d9c38e196fd9063697cfb26a58e145d2402","first_computed_at":"2026-07-01T01:17:31.470503Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-01T01:17:31.470503Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"D+M1lbVaTdrBdpOpVikdk7n1SCaFUfC/2WAGxb/tOCJfD5CvC7pXs7z04qn0AhT6zZ0pfj3+wrLm/3lAx/dhCQ==","signature_status":"signed_v1","signed_at":"2026-07-01T01:17:31.470920Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.31179","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:697cb873660fa44e728f163a49e99c64a2ca4a0f285c5a96c07f579f66f54ad8","sha256:db8380aeb2062c6d70858260fed155623f52eaf20da6760022b348865a62ca2a"],"state_sha256":"c496a37884bc9611dca3cf5f4e0439414bbf78769489af0bf3bb6c2d926ae05a"}