{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:TFTEWVSYWF7O5DHNT6WZX5JHGR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"17feb0459b0875e04814cf344cc8739af9865d3befae50bd6cc6209589f3442a","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-12-17T19:38:29Z","title_canon_sha256":"936d2e4d57636448eb6cd503ac77137b6f4294275ef3b4765027deb8814370ac"},"schema_version":"1.0","source":{"id":"2412.16209","kind":"arxiv","version":5}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2412.16209","created_at":"2026-06-02T02:04:04Z"},{"alias_kind":"arxiv_version","alias_value":"2412.16209v5","created_at":"2026-06-02T02:04:04Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2412.16209","created_at":"2026-06-02T02:04:04Z"},{"alias_kind":"pith_short_12","alias_value":"TFTEWVSYWF7O","created_at":"2026-06-02T02:04:04Z"},{"alias_kind":"pith_short_16","alias_value":"TFTEWVSYWF7O5DHN","created_at":"2026-06-02T02:04:04Z"},{"alias_kind":"pith_short_8","alias_value":"TFTEWVSY","created_at":"2026-06-02T02:04:04Z"}],"graph_snapshots":[{"event_id":"sha256:09e5d536fff82e17dc809b32738ff3058454b4bb0448779f3f5436b093d1b613","target":"graph","created_at":"2026-06-02T02:04:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2412.16209/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"When using machine learning for imbalanced binary classification problems, it is common to subsample the majority class to create a (more) balanced training dataset. This biases the model's predictions because the model learns from data that is not fully representative of the underlying population of interest. One way of accounting for this bias is analytically mapping the resulting predictions to new values based on the sampling rate for the majority class. We show that calibrating a random forest this way has negative consequences, including prevalence estimates that depend on both the numbe","authors_text":"Daniel J. Lizotte, Douglas G. Woolford, Nathan Phelps","cross_cats":["stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-12-17T19:38:29Z","title":"Challenges in the calibration of tree-based models for imbalanced classification"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2412.16209","kind":"arxiv","version":5},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b0b0304c769055aeb8b5c8593d9ef8739dfcf797ab8a3aec0d7e7abaf9aa7eff","target":"record","created_at":"2026-06-02T02:04:04Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"17feb0459b0875e04814cf344cc8739af9865d3befae50bd6cc6209589f3442a","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2024-12-17T19:38:29Z","title_canon_sha256":"936d2e4d57636448eb6cd503ac77137b6f4294275ef3b4765027deb8814370ac"},"schema_version":"1.0","source":{"id":"2412.16209","kind":"arxiv","version":5}},"canonical_sha256":"99664b5658b17eee8ced9fad9bf5273465c0ce158850ad9393439fa765cdfb46","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"99664b5658b17eee8ced9fad9bf5273465c0ce158850ad9393439fa765cdfb46","first_computed_at":"2026-06-02T02:04:04.758119Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T02:04:04.758119Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"W3t3u0UNCbHBMWtnirZY7YHhqHrjFibB1lbVdyrRbuFtfhUbeplYq5hmoMNP9oVZycK9ngDE9vcscpEOcmx5Bw==","signature_status":"signed_v1","signed_at":"2026-06-02T02:04:04.758586Z","signed_message":"canonical_sha256_bytes"},"source_id":"2412.16209","source_kind":"arxiv","source_version":5}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b0b0304c769055aeb8b5c8593d9ef8739dfcf797ab8a3aec0d7e7abaf9aa7eff","sha256:09e5d536fff82e17dc809b32738ff3058454b4bb0448779f3f5436b093d1b613"],"state_sha256":"d0d67c1ff6f506178b976194a46f9f388f1e7894a1ff2ce73e42976a80d169b8"}