{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:WVCV7IYIOWGU22NLOZ2CFPF27O","short_pith_number":"pith:WVCV7IYI","canonical_record":{"source":{"id":"2605.14467","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T07:01:57Z","cross_cats_sorted":[],"title_canon_sha256":"639ef3276a18192cbfa2f7de31c276bfe7124f90f774ca84bf411a6f06a62ddf","abstract_canon_sha256":"29acc579c06db15fa930371e27e25b8992f3534f54978d5113fdb0831df8f5e7"},"schema_version":"1.0"},"canonical_sha256":"b5455fa308758d4d69ab767422bcbafb9886ddeaeef242cab8ee3b6bdd0babf3","source":{"kind":"arxiv","id":"2605.14467","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14467","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14467v1","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14467","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"pith_short_12","alias_value":"WVCV7IYIOWGU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"WVCV7IYIOWGU22NL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"WVCV7IYI","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:WVCV7IYIOWGU22NLOZ2CFPF27O","target":"record","payload":{"canonical_record":{"source":{"id":"2605.14467","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T07:01:57Z","cross_cats_sorted":[],"title_canon_sha256":"639ef3276a18192cbfa2f7de31c276bfe7124f90f774ca84bf411a6f06a62ddf","abstract_canon_sha256":"29acc579c06db15fa930371e27e25b8992f3534f54978d5113fdb0831df8f5e7"},"schema_version":"1.0"},"canonical_sha256":"b5455fa308758d4d69ab767422bcbafb9886ddeaeef242cab8ee3b6bdd0babf3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:06.707043Z","signature_b64":"I8bW9hrgmEuDXHkmYjAS2LnrkAif6RUoVYPqZkMn3qJ7EcKUhffRmzujJM2ymVc5U0o/T6wpeT92kbwWM2OlBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b5455fa308758d4d69ab767422bcbafb9886ddeaeef242cab8ee3b6bdd0babf3","last_reissued_at":"2026-05-17T23:39:06.706304Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:06.706304Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.14467","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OLGVhX5jeMqLUQErcYdZdnm2BGlxy4MynglucOW61pr99eIIQI3jaxsVtY4fXVanelRA+kIOVxJ/6XloiWupCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T15:25:31.139537Z"},"content_sha256":"11036ea2ba3964b3d1a864222c939621b9e59f89c91010cf3de38d89e593fcb9","schema_version":"1.0","event_id":"sha256:11036ea2ba3964b3d1a864222c939621b9e59f89c91010cf3de38d89e593fcb9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:WVCV7IYIOWGU22NLOZ2CFPF27O","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Focused PU learning from imbalanced data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A focused empirical risk estimator enables effective training of binary classifiers from positive and unlabeled examples in highly imbalanced data.","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Elias Zavitsanos, Georgios Paliouras","submitted_at":"2026-05-14T07:01:57Z","abstract_excerpt":"We propose a new method of learning from positive and unlabeled (PU) examples in highly imbalanced datasets. Many real-world problems, such as disease gene identification, targeted marketing, fraud detection, and recommender systems, are hard to address with machine learning methods, due to limited labeled data. Often, training data comprises positive and unlabeled instances, the latter typically being dominated by negative, but including also several positive instances. While PU learning is well-studied, few methods address imbalanced settings or hard-to-detect positive examples that resemble"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Our approach uses a focused empirical risk estimator, incorporating both positive and unlabeled examples to train binary classifiers. Empirical evaluations demonstrate state-of-the-art performance on imbalanced datasets under two labeling mechanisms - selecting positives completely at random (SCAR) and selecting at random (SAR).","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The focused estimator is assumed to remain effective when positive examples closely resemble negatives and when the data is highly imbalanced, with the SCAR and SAR mechanisms adequately representing real labeling processes.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A focused empirical risk estimator for PU learning achieves state-of-the-art results on imbalanced datasets under SCAR and SAR labeling mechanisms.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A focused empirical risk estimator enables effective training of binary classifiers from positive and unlabeled examples in highly imbalanced data.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"68ad78c95994becd10487282da286f840db43d18309022b6d6deda3f59a32527"},"source":{"id":"2605.14467","kind":"arxiv","version":1},"verdict":{"id":"1e1ef6fe-1dd6-4108-8c0e-fe429caf7f4c","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T01:44:57.609669Z","strongest_claim":"Our approach uses a focused empirical risk estimator, incorporating both positive and unlabeled examples to train binary classifiers. Empirical evaluations demonstrate state-of-the-art performance on imbalanced datasets under two labeling mechanisms - selecting positives completely at random (SCAR) and selecting at random (SAR).","one_line_summary":"A focused empirical risk estimator for PU learning achieves state-of-the-art results on imbalanced datasets under SCAR and SAR labeling mechanisms.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The focused estimator is assumed to remain effective when positive examples closely resemble negatives and when the data is highly imbalanced, with the SCAR and SAR mechanisms adequately representing real labeling processes.","pith_extraction_headline":"A focused empirical risk estimator enables effective training of binary classifiers from positive and unlabeled examples in highly imbalanced data."},"references":{"count":63,"sample":[{"doi":"10.1109/tnnls.2013.2292894","year":2014,"title":"Classification in the Presence of Label Noise: A Survey","work_id":"8e4a8e8c-506d-4000-b294-2cb5743d3f14","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1093/nsr/nwx106","year":2017,"title":"A brief introduction to weakly supervised learning","work_id":"8faa2566-7a25-49ac-b848-2c521ff40c06","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2020,"title":"Machine Learning109(4), 719–760 (2020) https://doi.org/10.1007/ s10994-020-05877-5","work_id":"7c4e5cc2-3ef3-47d3-8f13-1bb74ce1da4f","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1016/j.neucom.2014.10.081","year":2015,"title":"Neurocomput- ing160, 73–84 (2015) https://doi.org/10.1016/j.neucom.2014.10.081 20","work_id":"d001bbb7-a441-47fd-bf08-a888cd0b3281","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2015,"title":"Building Classifiers to Predict the Start of Glucose-Lowering Pharmacotherapy Using Belgian Health Expenditure Data","work_id":"1aaea50a-a99e-4236-8c12-077ef874911a","ref_index":5,"cited_arxiv_id":"1504.07389","is_internal_anchor":true}],"resolved_work":63,"snapshot_sha256":"35bf4e01093c75e39934f97359e273be0a020047f92850921b873f61b0f13a8f","internal_anchors":2},"formal_canon":{"evidence_count":2,"snapshot_sha256":"51fc76b21d2d5ff5ea2ccdace1532ee4b52a52782f8fba039fd3268a5c3d0550"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"1e1ef6fe-1dd6-4108-8c0e-fe429caf7f4c"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KWT2+w+sMAv9DHGwlGqaYlvTBtvYe09Hjcakw9LvshGnTVRyHvvz5TTSI5Wh3Qy62DbXLHJF3APyKudmG99aBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T15:25:31.140218Z"},"content_sha256":"abaf612d106c9573a0635b0a65a71b7bb64e7cf86554e762dfe94748ca04bd95","schema_version":"1.0","event_id":"sha256:abaf612d106c9573a0635b0a65a71b7bb64e7cf86554e762dfe94748ca04bd95"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WVCV7IYIOWGU22NLOZ2CFPF27O/bundle.json","state_url":"https://pith.science/pith/WVCV7IYIOWGU22NLOZ2CFPF27O/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WVCV7IYIOWGU22NLOZ2CFPF27O/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T15:25:31Z","links":{"resolver":"https://pith.science/pith/WVCV7IYIOWGU22NLOZ2CFPF27O","bundle":"https://pith.science/pith/WVCV7IYIOWGU22NLOZ2CFPF27O/bundle.json","state":"https://pith.science/pith/WVCV7IYIOWGU22NLOZ2CFPF27O/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WVCV7IYIOWGU22NLOZ2CFPF27O/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:WVCV7IYIOWGU22NLOZ2CFPF27O","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"29acc579c06db15fa930371e27e25b8992f3534f54978d5113fdb0831df8f5e7","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T07:01:57Z","title_canon_sha256":"639ef3276a18192cbfa2f7de31c276bfe7124f90f774ca84bf411a6f06a62ddf"},"schema_version":"1.0","source":{"id":"2605.14467","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.14467","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"arxiv_version","alias_value":"2605.14467v1","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.14467","created_at":"2026-05-17T23:39:06Z"},{"alias_kind":"pith_short_12","alias_value":"WVCV7IYIOWGU","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"WVCV7IYIOWGU22NL","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"WVCV7IYI","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:abaf612d106c9573a0635b0a65a71b7bb64e7cf86554e762dfe94748ca04bd95","target":"graph","created_at":"2026-05-17T23:39:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Our approach uses a focused empirical risk estimator, incorporating both positive and unlabeled examples to train binary classifiers. Empirical evaluations demonstrate state-of-the-art performance on imbalanced datasets under two labeling mechanisms - selecting positives completely at random (SCAR) and selecting at random (SAR)."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The focused estimator is assumed to remain effective when positive examples closely resemble negatives and when the data is highly imbalanced, with the SCAR and SAR mechanisms adequately representing real labeling processes."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A focused empirical risk estimator for PU learning achieves state-of-the-art results on imbalanced datasets under SCAR and SAR labeling mechanisms."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A focused empirical risk estimator enables effective training of binary classifiers from positive and unlabeled examples in highly imbalanced data."}],"snapshot_sha256":"68ad78c95994becd10487282da286f840db43d18309022b6d6deda3f59a32527"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"51fc76b21d2d5ff5ea2ccdace1532ee4b52a52782f8fba039fd3268a5c3d0550"},"paper":{"abstract_excerpt":"We propose a new method of learning from positive and unlabeled (PU) examples in highly imbalanced datasets. Many real-world problems, such as disease gene identification, targeted marketing, fraud detection, and recommender systems, are hard to address with machine learning methods, due to limited labeled data. Often, training data comprises positive and unlabeled instances, the latter typically being dominated by negative, but including also several positive instances. While PU learning is well-studied, few methods address imbalanced settings or hard-to-detect positive examples that resemble","authors_text":"Elias Zavitsanos, Georgios Paliouras","cross_cats":[],"headline":"A focused empirical risk estimator enables effective training of binary classifiers from positive and unlabeled examples in highly imbalanced data.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T07:01:57Z","title":"Focused PU learning from imbalanced data"},"references":{"count":63,"internal_anchors":2,"resolved_work":63,"sample":[{"cited_arxiv_id":"","doi":"10.1109/tnnls.2013.2292894","is_internal_anchor":false,"ref_index":1,"title":"Classification in the Presence of Label Noise: A Survey","work_id":"8e4a8e8c-506d-4000-b294-2cb5743d3f14","year":2014},{"cited_arxiv_id":"","doi":"10.1093/nsr/nwx106","is_internal_anchor":false,"ref_index":2,"title":"A brief introduction to weakly supervised learning","work_id":"8faa2566-7a25-49ac-b848-2c521ff40c06","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Machine Learning109(4), 719–760 (2020) https://doi.org/10.1007/ s10994-020-05877-5","work_id":"7c4e5cc2-3ef3-47d3-8f13-1bb74ce1da4f","year":2020},{"cited_arxiv_id":"","doi":"10.1016/j.neucom.2014.10.081","is_internal_anchor":false,"ref_index":4,"title":"Neurocomput- ing160, 73–84 (2015) https://doi.org/10.1016/j.neucom.2014.10.081 20","work_id":"d001bbb7-a441-47fd-bf08-a888cd0b3281","year":2015},{"cited_arxiv_id":"1504.07389","doi":"","is_internal_anchor":true,"ref_index":5,"title":"Building Classifiers to Predict the Start of Glucose-Lowering Pharmacotherapy Using Belgian Health Expenditure Data","work_id":"1aaea50a-a99e-4236-8c12-077ef874911a","year":2015}],"snapshot_sha256":"35bf4e01093c75e39934f97359e273be0a020047f92850921b873f61b0f13a8f"},"source":{"id":"2605.14467","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T01:44:57.609669Z","id":"1e1ef6fe-1dd6-4108-8c0e-fe429caf7f4c","model_set":{"reader":"grok-4.3"},"one_line_summary":"A focused empirical risk estimator for PU learning achieves state-of-the-art results on imbalanced datasets under SCAR and SAR labeling mechanisms.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A focused empirical risk estimator enables effective training of binary classifiers from positive and unlabeled examples in highly imbalanced data.","strongest_claim":"Our approach uses a focused empirical risk estimator, incorporating both positive and unlabeled examples to train binary classifiers. Empirical evaluations demonstrate state-of-the-art performance on imbalanced datasets under two labeling mechanisms - selecting positives completely at random (SCAR) and selecting at random (SAR).","weakest_assumption":"The focused estimator is assumed to remain effective when positive examples closely resemble negatives and when the data is highly imbalanced, with the SCAR and SAR mechanisms adequately representing real labeling processes."}},"verdict_id":"1e1ef6fe-1dd6-4108-8c0e-fe429caf7f4c"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:11036ea2ba3964b3d1a864222c939621b9e59f89c91010cf3de38d89e593fcb9","target":"record","created_at":"2026-05-17T23:39:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"29acc579c06db15fa930371e27e25b8992f3534f54978d5113fdb0831df8f5e7","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-14T07:01:57Z","title_canon_sha256":"639ef3276a18192cbfa2f7de31c276bfe7124f90f774ca84bf411a6f06a62ddf"},"schema_version":"1.0","source":{"id":"2605.14467","kind":"arxiv","version":1}},"canonical_sha256":"b5455fa308758d4d69ab767422bcbafb9886ddeaeef242cab8ee3b6bdd0babf3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b5455fa308758d4d69ab767422bcbafb9886ddeaeef242cab8ee3b6bdd0babf3","first_computed_at":"2026-05-17T23:39:06.706304Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:06.706304Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"I8bW9hrgmEuDXHkmYjAS2LnrkAif6RUoVYPqZkMn3qJ7EcKUhffRmzujJM2ymVc5U0o/T6wpeT92kbwWM2OlBQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:06.707043Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.14467","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:11036ea2ba3964b3d1a864222c939621b9e59f89c91010cf3de38d89e593fcb9","sha256:abaf612d106c9573a0635b0a65a71b7bb64e7cf86554e762dfe94748ca04bd95"],"state_sha256":"f768052c39aac1874ab401fc7017f5fc62cf9b633e1c54eca1f7acc791b9a339"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EJvDTitWvBewSXS27kEvlD9qrljoeb1E2RtgTqW0WZ4KaJzKco0zCtifqBvkfaQpQamN29c7Bkn2DyiJDgOFCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T15:25:31.143020Z","bundle_sha256":"385fbec6a65b2fa01a71f2b6166baf926081dd72d388d4ec6e1d288a6f1fc948"}}