{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ZLZTKUJAYXPUKWIPQV26DXQGTK","short_pith_number":"pith:ZLZTKUJA","canonical_record":{"source":{"id":"2605.14147","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T21:57:38Z","cross_cats_sorted":[],"title_canon_sha256":"a44340b3470611b140eb2bbba369beab5a7aae9958673643771f9884001ed26d","abstract_canon_sha256":"eaeacbea86cb77724acb8d2b0c3ae14d722b91944782a9fa53bd7189b76c53f6"},"schema_version":"1.0"},"canonical_sha256":"caf3355120c5df45590f8575e1de069a84adc372ca6e45c6f0b9408aa61771b3","source":{"kind":"arxiv","id":"2605.14147","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14147","created_at":"2026-05-17T23:39:11Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14147v1","created_at":"2026-05-17T23:39:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14147","created_at":"2026-05-17T23:39:11Z"},{"alias_kind":"pith_short_12","alias_value":"ZLZTKUJAYXPU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"ZLZTKUJAYXPUKWIP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"ZLZTKUJA","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ZLZTKUJAYXPUKWIPQV26DXQGTK","target":"record","payload":{"canonical_record":{"source":{"id":"2605.14147","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T21:57:38Z","cross_cats_sorted":[],"title_canon_sha256":"a44340b3470611b140eb2bbba369beab5a7aae9958673643771f9884001ed26d","abstract_canon_sha256":"eaeacbea86cb77724acb8d2b0c3ae14d722b91944782a9fa53bd7189b76c53f6"},"schema_version":"1.0"},"canonical_sha256":"caf3355120c5df45590f8575e1de069a84adc372ca6e45c6f0b9408aa61771b3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:11.618042Z","signature_b64":"8xMCOs9co6dK8Mf5pObCcwdbKbcPO9fQ4jYskFNyf77+dhjIuLR2u/x+vHXQ30eiF1xcHKWv0NG37P6pAbXMDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"caf3355120c5df45590f8575e1de069a84adc372ca6e45c6f0b9408aa61771b3","last_reissued_at":"2026-05-17T23:39:11.617459Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:11.617459Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.14147","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xx3mskmVf/277L6LQxz73EH8KrggHr3U9fzRJD2Qc4R1p21dtyNSNuFWjF92icMSPx+uJgFYE+8KOtM6udoHBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T05:13:16.402766Z"},"content_sha256":"336e1bc3b1c54d9a7a17efb73e538dea55542b498b44d953424c4fbbfb20dacb","schema_version":"1.0","event_id":"sha256:336e1bc3b1c54d9a7a17efb73e538dea55542b498b44d953424c4fbbfb20dacb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ZLZTKUJAYXPUKWIPQV26DXQGTK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A Systematic Evaluation of Imbalance Handling Methods in Biomedical Binary Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Imbalance handling boosts complex models on unstructured biomedical data but harms simple ones.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jiandong Chen, Ju Sun, Le Peng, Lingjie Su, Rui Zhang, Yash Travadi","submitted_at":"2026-05-13T21:57:38Z","abstract_excerpt":"Objective: The primary goal of this study was to systematically examine the impact of commonly used imbalance handling methods (IHMs) on predictive performance in biomedical binary classification, considering the interplay between model complexity and diverse data modalities.\n  Material and Methods: We evaluated five representative IHMs: random undersampling (RUS), random oversampling (ROS), SMOTE, re-weighting (RW), and direct F1-score optimization (DMO), against a raw training (RAW) baseline. The evaluation encompassed three public biomedical datasets: MIMIC-III (tabular), ADE-Corpus-V2 (tex"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"clear benefits were observed for more complex models and unstructured data: (a) ROS and RW consistently enhanced the performance of powerful models; (b) direct F1-score optimization demonstrated utility primarily for unstructured text and image data; and (c) RUS and SMOTE consistently degraded performance and are therefore not recommended.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the three chosen public datasets and the selected model architectures sufficiently represent the broader space of biomedical binary classification problems so that the observed patterns generalize.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Random oversampling and re-weighting boost complex models on unstructured biomedical data, but undersampling and SMOTE degrade results and simple models on tabular data see no benefit.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Imbalance handling boosts complex models on unstructured biomedical data but harms simple ones.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"42e85600647f37551903d3ad9e44bec665127eaab8b1ca52583bb47d41f3eecd"},"source":{"id":"2605.14147","kind":"arxiv","version":1},"verdict":{"id":"aa377807-8686-4c7d-a8bc-dd561791156b","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T04:47:03.226655Z","strongest_claim":"clear benefits were observed for more complex models and unstructured data: (a) ROS and RW consistently enhanced the performance of powerful models; (b) direct F1-score optimization demonstrated utility primarily for unstructured text and image data; and (c) RUS and SMOTE consistently degraded performance and are therefore not recommended.","one_line_summary":"Random oversampling and re-weighting boost complex models on unstructured biomedical data, but undersampling and SMOTE degrade results and simple models on tabular data see no benefit.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the three chosen public datasets and the selected model architectures sufficiently represent the broader space of biomedical binary classification problems so that the observed patterns generalize.","pith_extraction_headline":"Imbalance handling boosts complex models on unstructured biomedical data but harms simple ones."},"references":{"count":3,"sample":[{"doi":"10.1145/1273496.1273614","year":2025,"title":"Aftab, J. et al. Artificial intelligence based classification and prediction of medical imaging using a novel framework of inverted and self-attention deep neural network architecture. Sci. Rep. 15, 8","work_id":"67ed078d-f365-40c6-932e-0643d36a60e1","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.48550/arxiv.2507.15240","year":2025,"title":"Peng, L., Travadi, Y., He, C., Cui, Y. & Sun, J. Exact Reformulation and Optimization for Direct Metric Optimization in Binary Imbalanced Classification. Preprint at https://doi.org/10.48550/arXiv.250","work_id":"ef33fd71-6226-4686-82ed-f473754dba93","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.18653/v1/2020.findings-emnlp.187","year":2015,"title":"MiME: Multilevel Medical Embedding of Electronic Health Records for Predictive Healthcare","work_id":"9a5efd45-573e-4d18-b020-23a564030b16","ref_index":3,"cited_arxiv_id":"1810.09593","is_internal_anchor":true}],"resolved_work":3,"snapshot_sha256":"c510f9a87caf4a0b78aee039730f428e0cb01feddf4cc2e93eb925c36f0cb9a3","internal_anchors":1},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"aa377807-8686-4c7d-a8bc-dd561791156b"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ZysfOIJfyQgXYbIQUu30sZ64wzfj/1yPnLdbARIwpcEuVtkswomAbYN19PViB10CimlPY4u/IlmZNru89gU5AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T05:13:16.403746Z"},"content_sha256":"c40b06b519ba7a3b7448f830889a183d01633b658c797a429c6d5937be78f389","schema_version":"1.0","event_id":"sha256:c40b06b519ba7a3b7448f830889a183d01633b658c797a429c6d5937be78f389"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK/bundle.json","state_url":"https://pith.science/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T05:13:16Z","links":{"resolver":"https://pith.science/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK","bundle":"https://pith.science/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK/bundle.json","state":"https://pith.science/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZLZTKUJAYXPUKWIPQV26DXQGTK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ZLZTKUJAYXPUKWIPQV26DXQGTK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"eaeacbea86cb77724acb8d2b0c3ae14d722b91944782a9fa53bd7189b76c53f6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T21:57:38Z","title_canon_sha256":"a44340b3470611b140eb2bbba369beab5a7aae9958673643771f9884001ed26d"},"schema_version":"1.0","source":{"id":"2605.14147","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14147","created_at":"2026-05-17T23:39:11Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14147v1","created_at":"2026-05-17T23:39:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14147","created_at":"2026-05-17T23:39:11Z"},{"alias_kind":"pith_short_12","alias_value":"ZLZTKUJAYXPU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"ZLZTKUJAYXPUKWIP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"ZLZTKUJA","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:c40b06b519ba7a3b7448f830889a183d01633b658c797a429c6d5937be78f389","target":"graph","created_at":"2026-05-17T23:39:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"clear benefits were observed for more complex models and unstructured data: (a) ROS and RW consistently enhanced the performance of powerful models; (b) direct F1-score optimization demonstrated utility primarily for unstructured text and image data; and (c) RUS and SMOTE consistently degraded performance and are therefore not recommended."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the three chosen public datasets and the selected model architectures sufficiently represent the broader space of biomedical binary classification problems so that the observed patterns generalize."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Random oversampling and re-weighting boost complex models on unstructured biomedical data, but undersampling and SMOTE degrade results and simple models on tabular data see no benefit."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Imbalance handling boosts complex models on unstructured biomedical data but harms simple ones."}],"snapshot_sha256":"42e85600647f37551903d3ad9e44bec665127eaab8b1ca52583bb47d41f3eecd"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Objective: The primary goal of this study was to systematically examine the impact of commonly used imbalance handling methods (IHMs) on predictive performance in biomedical binary classification, considering the interplay between model complexity and diverse data modalities.\n  Material and Methods: We evaluated five representative IHMs: random undersampling (RUS), random oversampling (ROS), SMOTE, re-weighting (RW), and direct F1-score optimization (DMO), against a raw training (RAW) baseline. The evaluation encompassed three public biomedical datasets: MIMIC-III (tabular), ADE-Corpus-V2 (tex","authors_text":"Jiandong Chen, Ju Sun, Le Peng, Lingjie Su, Rui Zhang, Yash Travadi","cross_cats":[],"headline":"Imbalance handling boosts complex models on unstructured biomedical data but harms simple ones.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T21:57:38Z","title":"A Systematic Evaluation of Imbalance Handling Methods in Biomedical Binary Classification"},"references":{"count":3,"internal_anchors":1,"resolved_work":3,"sample":[{"cited_arxiv_id":"","doi":"10.1145/1273496.1273614","is_internal_anchor":false,"ref_index":1,"title":"Aftab, J. et al. Artificial intelligence based classification and prediction of medical imaging using a novel framework of inverted and self-attention deep neural network architecture. Sci. Rep. 15, 8","work_id":"67ed078d-f365-40c6-932e-0643d36a60e1","year":2025},{"cited_arxiv_id":"","doi":"10.48550/arxiv.2507.15240","is_internal_anchor":false,"ref_index":2,"title":"Peng, L., Travadi, Y., He, C., Cui, Y. & Sun, J. Exact Reformulation and Optimization for Direct Metric Optimization in Binary Imbalanced Classification. Preprint at https://doi.org/10.48550/arXiv.250","work_id":"ef33fd71-6226-4686-82ed-f473754dba93","year":2025},{"cited_arxiv_id":"1810.09593","doi":"10.18653/v1/2020.findings-emnlp.187","is_internal_anchor":true,"ref_index":3,"title":"MiME: Multilevel Medical Embedding of Electronic Health Records for Predictive Healthcare","work_id":"9a5efd45-573e-4d18-b020-23a564030b16","year":2015}],"snapshot_sha256":"c510f9a87caf4a0b78aee039730f428e0cb01feddf4cc2e93eb925c36f0cb9a3"},"source":{"id":"2605.14147","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T04:47:03.226655Z","id":"aa377807-8686-4c7d-a8bc-dd561791156b","model_set":{"reader":"grok-4.3"},"one_line_summary":"Random oversampling and re-weighting boost complex models on unstructured biomedical data, but undersampling and SMOTE degrade results and simple models on tabular data see no benefit.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Imbalance handling boosts complex models on unstructured biomedical data but harms simple ones.","strongest_claim":"clear benefits were observed for more complex models and unstructured data: (a) ROS and RW consistently enhanced the performance of powerful models; (b) direct F1-score optimization demonstrated utility primarily for unstructured text and image data; and (c) RUS and SMOTE consistently degraded performance and are therefore not recommended.","weakest_assumption":"That the three chosen public datasets and the selected model architectures sufficiently represent the broader space of biomedical binary classification problems so that the observed patterns generalize."}},"verdict_id":"aa377807-8686-4c7d-a8bc-dd561791156b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:336e1bc3b1c54d9a7a17efb73e538dea55542b498b44d953424c4fbbfb20dacb","target":"record","created_at":"2026-05-17T23:39:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"eaeacbea86cb77724acb8d2b0c3ae14d722b91944782a9fa53bd7189b76c53f6","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-13T21:57:38Z","title_canon_sha256":"a44340b3470611b140eb2bbba369beab5a7aae9958673643771f9884001ed26d"},"schema_version":"1.0","source":{"id":"2605.14147","kind":"arxiv","version":1}},"canonical_sha256":"caf3355120c5df45590f8575e1de069a84adc372ca6e45c6f0b9408aa61771b3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"caf3355120c5df45590f8575e1de069a84adc372ca6e45c6f0b9408aa61771b3","first_computed_at":"2026-05-17T23:39:11.617459Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:11.617459Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8xMCOs9co6dK8Mf5pObCcwdbKbcPO9fQ4jYskFNyf77+dhjIuLR2u/x+vHXQ30eiF1xcHKWv0NG37P6pAbXMDA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:11.618042Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14147","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:336e1bc3b1c54d9a7a17efb73e538dea55542b498b44d953424c4fbbfb20dacb","sha256:c40b06b519ba7a3b7448f830889a183d01633b658c797a429c6d5937be78f389"],"state_sha256":"9dde5db6d02ed2b2b145771e245d98ee10afe5090bef84dd12ce6310ae5ac9fa"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iE+ZxH1hbFFhAqeuMNGWL8fGQIaIIq9sy4TqliccfJuuSt7lQ9DLvSwGXSdggK3YwovfUaTJ+GRtDtjU0GcVDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T05:13:16.408543Z","bundle_sha256":"143bfb7f495823d70111fc904a135f62164564b5fbfbaaca9a012c9424ecf0f6"}}