{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:YLCNGINIAXAOKFCDVSOMDF3LB6","short_pith_number":"pith:YLCNGINI","schema_version":"1.0","canonical_sha256":"c2c4d321a805c0e51443ac9cc1976b0fa1ef2c09358c360a5099042273c755f3","source":{"kind":"arxiv","id":"2604.10783","version":2},"attestation_state":"computed","paper":{"title":"Learning Preference-Based Objectives from Clinical Narratives for Dynamic Sepsis Treatment","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Clinical narratives supply preference signals that train rewards yielding better recovery in sequential treatment policies.","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Arturo Yong Yao Neo, Daniel J. Tan, Jayne Hui Zhen Chan, Kai Wen Hwang, Kay Choong See, Mengling Feng","submitted_at":"2026-04-12T19:18:02Z","abstract_excerpt":"Designing reward functions for reinforcement learning (RL) in healthcare remains challenging because clinically meaningful outcomes are sparse, delayed, and difficult to explicitly specify. Although structured clinical data capture physiologic states, they often fail to reflect broader aspects of patient trajectories such as treatment response, recovery dynamics, and intervention burden. Clinical narratives, by contrast, encode longitudinal clinician assessments of disease progression, treatment effectiveness, and recovery, providing a potential source of trajectory-level supervision beyond pr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2604.10783","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-04-12T19:18:02Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"9783c1112bd70d73fbe1e612a52a75ce9b5e47e274b99ecd93bc73b9581ce887","abstract_canon_sha256":"000828a8122a2c5b0e0f458508872216a5f59f3907c99e9daa8457c173ecbd7b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:05:09.221022Z","signature_b64":"E8ummO8J0o7kvKe+u/PYyHR+cWKpAT3r5XPNwO72Y8eS569csHF3BwhB2904j4KV/rZjpT7+TqoKWIVTHgSnBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c2c4d321a805c0e51443ac9cc1976b0fa1ef2c09358c360a5099042273c755f3","last_reissued_at":"2026-05-26T02:05:09.220173Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:05:09.220173Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Preference-Based Objectives from Clinical Narratives for Dynamic Sepsis Treatment","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Clinical narratives supply preference signals that train rewards yielding better recovery in sequential treatment policies.","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Arturo Yong Yao Neo, Daniel J. Tan, Jayne Hui Zhen Chan, Kai Wen Hwang, Kay Choong See, Mengling Feng","submitted_at":"2026-04-12T19:18:02Z","abstract_excerpt":"Designing reward functions for reinforcement learning (RL) in healthcare remains challenging because clinically meaningful outcomes are sparse, delayed, and difficult to explicitly specify. Although structured clinical data capture physiologic states, they often fail to reflect broader aspects of patient trajectories such as treatment response, recovery dynamics, and intervention burden. Clinical narratives, by contrast, encode longitudinal clinician assessments of disease progression, treatment effectiveness, and recovery, providing a potential source of trajectory-level supervision beyond pr"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"The learned reward aligns strongly with trajectory quality (Spearman rho = 0.63) and enables policies that are consistently associated with improved recovery-related outcomes, including increased organ support-free days and faster shock resolution, while maintaining comparable performance on mortality. These effects persist under external validation.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That LLM-derived trajectory quality scores and pairwise preferences from discharge summaries accurately and unbiasedly reflect true clinical trajectory quality, patient preferences, and treatment effectiveness without significant influence from narrative variability, LLM biases, or selection effects in the data.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"CN-PR learns reward functions from LLM-derived preferences over clinical trajectories to improve RL policies for sequential treatment decisions, showing correlation with quality scores and better recovery outcomes.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Clinical narratives supply preference signals that train rewards yielding better recovery in sequential treatment policies.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"c001aff38cc89309091fb76ef60268c5074cf1f9d594ff77ef1482587708e23d"},"source":{"id":"2604.10783","kind":"arxiv","version":2},"verdict":{"id":"f6c26b22-4e47-4d5d-a20a-4f19e938d53b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T15:33:37.916432Z","strongest_claim":"The learned reward aligns strongly with trajectory quality (Spearman rho = 0.63) and enables policies that are consistently associated with improved recovery-related outcomes, including increased organ support-free days and faster shock resolution, while maintaining comparable performance on mortality. These effects persist under external validation.","one_line_summary":"CN-PR learns reward functions from LLM-derived preferences over clinical trajectories to improve RL policies for sequential treatment decisions, showing correlation with quality scores and better recovery outcomes.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That LLM-derived trajectory quality scores and pairwise preferences from discharge summaries accurately and unbiasedly reflect true clinical trajectory quality, patient preferences, and treatment effectiveness without significant influence from narrative variability, LLM biases, or selection effects in the data.","pith_extraction_headline":"Clinical narratives supply preference signals that train rewards yielding better recovery in sequential treatment policies."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.10783/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.10783","created_at":"2026-05-26T02:05:09.220329+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.10783v2","created_at":"2026-05-26T02:05:09.220329+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.10783","created_at":"2026-05-26T02:05:09.220329+00:00"},{"alias_kind":"pith_short_12","alias_value":"YLCNGINIAXAO","created_at":"2026-05-26T02:05:09.220329+00:00"},{"alias_kind":"pith_short_16","alias_value":"YLCNGINIAXAOKFCD","created_at":"2026-05-26T02:05:09.220329+00:00"},{"alias_kind":"pith_short_8","alias_value":"YLCNGINI","created_at":"2026-05-26T02:05:09.220329+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6","json":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6.json","graph_json":"https://pith.science/api/pith-number/YLCNGINIAXAOKFCDVSOMDF3LB6/graph.json","events_json":"https://pith.science/api/pith-number/YLCNGINIAXAOKFCDVSOMDF3LB6/events.json","paper":"https://pith.science/paper/YLCNGINI"},"agent_actions":{"view_html":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6","download_json":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6.json","view_paper":"https://pith.science/paper/YLCNGINI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.10783&json=true","fetch_graph":"https://pith.science/api/pith-number/YLCNGINIAXAOKFCDVSOMDF3LB6/graph.json","fetch_events":"https://pith.science/api/pith-number/YLCNGINIAXAOKFCDVSOMDF3LB6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6/action/storage_attestation","attest_author":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6/action/author_attestation","sign_citation":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6/action/citation_signature","submit_replication":"https://pith.science/pith/YLCNGINIAXAOKFCDVSOMDF3LB6/action/replication_record"}},"created_at":"2026-05-26T02:05:09.220329+00:00","updated_at":"2026-05-26T02:05:09.220329+00:00"}