{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2018:XKGCES2LAMO7KGKG7AJWWEE46Y","short_pith_number":"pith:XKGCES2L","schema_version":"1.0","canonical_sha256":"ba8c224b4b031df51946f8136b109cf610a2a58f404cd97a7f161b5fcb981419","source":{"kind":"arxiv","id":"1811.12583","version":1},"attestation_state":"computed","paper":{"title":"Rethinking clinical prediction: Why machine learning must consider year of care and feature aggregation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Anna Goldenberg, Bret Nestor, Geeticka Chauhan, Marzyeh Ghassemi, Matthew B. A. McDermott, Michael C. Hughes, Tristan Naumann","submitted_at":"2018-11-30T02:30:10Z","abstract_excerpt":"Machine learning for healthcare often trains models on de-identified datasets with randomly-shifted calendar dates, ignoring the fact that data were generated under hospital operation practices that change over time. These changing practices induce definitive changes in observed data which confound evaluations which do not account for dates and limit the generalisability of date-agnostic models. In this work, we establish the magnitude of this problem on MIMIC, a public hospital dataset, and showcase a simple solution. We augment MIMIC with the year in which care was provided and show that a m"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1811.12583","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-11-30T02:30:10Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"d0da84b6e8d5ea2207ca8055b0d4cc0717d287a084f32659b43dc2355d22b626","abstract_canon_sha256":"22f9b1eb87c93f044a97115c4006e698af9050a63f9b60f96d0dffd7e1561c61"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:59:29.195581Z","signature_b64":"hmmZTomUaWZLA1lk9KT1wjbl1Y6M34YJuwX2MWC6IgCJSk9J8oyQjhYc4+1O3Z6QUZpNtvSkCl2K1F85ZOlWAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ba8c224b4b031df51946f8136b109cf610a2a58f404cd97a7f161b5fcb981419","last_reissued_at":"2026-05-17T23:59:29.195004Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:59:29.195004Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Rethinking clinical prediction: Why machine learning must consider year of care and feature aggregation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Anna Goldenberg, Bret Nestor, Geeticka Chauhan, Marzyeh Ghassemi, Matthew B. A. McDermott, Michael C. Hughes, Tristan Naumann","submitted_at":"2018-11-30T02:30:10Z","abstract_excerpt":"Machine learning for healthcare often trains models on de-identified datasets with randomly-shifted calendar dates, ignoring the fact that data were generated under hospital operation practices that change over time. These changing practices induce definitive changes in observed data which confound evaluations which do not account for dates and limit the generalisability of date-agnostic models. In this work, we establish the magnitude of this problem on MIMIC, a public hospital dataset, and showcase a simple solution. We augment MIMIC with the year in which care was provided and show that a m"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1811.12583","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1811.12583","created_at":"2026-05-17T23:59:29.195086+00:00"},{"alias_kind":"arxiv_version","alias_value":"1811.12583v1","created_at":"2026-05-17T23:59:29.195086+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1811.12583","created_at":"2026-05-17T23:59:29.195086+00:00"},{"alias_kind":"pith_short_12","alias_value":"XKGCES2LAMO7","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_16","alias_value":"XKGCES2LAMO7KGKG","created_at":"2026-05-18T12:33:01.666342+00:00"},{"alias_kind":"pith_short_8","alias_value":"XKGCES2L","created_at":"2026-05-18T12:33:01.666342+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1907.01463","citing_title":"Reproducibility in Machine Learning for Health","ref_index":35,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y","json":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y.json","graph_json":"https://pith.science/api/pith-number/XKGCES2LAMO7KGKG7AJWWEE46Y/graph.json","events_json":"https://pith.science/api/pith-number/XKGCES2LAMO7KGKG7AJWWEE46Y/events.json","paper":"https://pith.science/paper/XKGCES2L"},"agent_actions":{"view_html":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y","download_json":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y.json","view_paper":"https://pith.science/paper/XKGCES2L","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1811.12583&json=true","fetch_graph":"https://pith.science/api/pith-number/XKGCES2LAMO7KGKG7AJWWEE46Y/graph.json","fetch_events":"https://pith.science/api/pith-number/XKGCES2LAMO7KGKG7AJWWEE46Y/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y/action/timestamp_anchor","attest_storage":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y/action/storage_attestation","attest_author":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y/action/author_attestation","sign_citation":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y/action/citation_signature","submit_replication":"https://pith.science/pith/XKGCES2LAMO7KGKG7AJWWEE46Y/action/replication_record"}},"created_at":"2026-05-17T23:59:29.195086+00:00","updated_at":"2026-05-17T23:59:29.195086+00:00"}