{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:SXYCEOSMI4U5ML5TUYLL66EELQ","short_pith_number":"pith:SXYCEOSM","schema_version":"1.0","canonical_sha256":"95f0223a4c4729d62fb3a616bf78845c21e8e4068aac9e5d4892c795b1df1817","source":{"kind":"arxiv","id":"2510.09783","version":2},"attestation_state":"computed","paper":{"title":"Large Language Models for Imbalanced Classification: Diversity makes the difference","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexis Whitton, Dang Nguyen, Kien Do, Sunil Gupta, Svetha Venkatesh, Taylor Braund, Thin Nguyen","submitted_at":"2025-10-10T18:45:29Z","abstract_excerpt":"Oversampling is one of the most widely used approaches for addressing imbalanced classification. The core idea is to generate additional minority samples to rebalance the dataset. Most existing methods, such as SMOTE, require converting categorical variables into numerical vectors, which often leads to information loss. Recently, large language model (LLM)-based methods have been introduced to overcome this limitation. However, current LLM-based approaches typically generate minority samples with limited diversity, reducing robustness and generalizability in downstream classification tasks. To"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2510.09783","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-10T18:45:29Z","cross_cats_sorted":["cs.AI","stat.ML"],"title_canon_sha256":"9c679121228dc7faf83f4355646e33a00f50abc9334495e735c6412fc882183b","abstract_canon_sha256":"1d2dfdfb2b151afe622eda6cbbdc76f4f84881b923c327c2224c4e55d87d90bf"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-09T02:07:10.948165Z","signature_b64":"+qngwB60Y9YY5QvMAVi7mq7ZcDMNACZ+Jr5po5SvQD7HEFH2yPbNZfAxqYeDRdQK36exko5Z2+KLVBgCH2WrDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"95f0223a4c4729d62fb3a616bf78845c21e8e4068aac9e5d4892c795b1df1817","last_reissued_at":"2026-06-09T02:07:10.947286Z","signature_status":"signed_v1","first_computed_at":"2026-06-09T02:07:10.947286Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Large Language Models for Imbalanced Classification: Diversity makes the difference","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","stat.ML"],"primary_cat":"cs.LG","authors_text":"Alexis Whitton, Dang Nguyen, Kien Do, Sunil Gupta, Svetha Venkatesh, Taylor Braund, Thin Nguyen","submitted_at":"2025-10-10T18:45:29Z","abstract_excerpt":"Oversampling is one of the most widely used approaches for addressing imbalanced classification. The core idea is to generate additional minority samples to rebalance the dataset. Most existing methods, such as SMOTE, require converting categorical variables into numerical vectors, which often leads to information loss. Recently, large language model (LLM)-based methods have been introduced to overcome this limitation. However, current LLM-based approaches typically generate minority samples with limited diversity, reducing robustness and generalizability in downstream classification tasks. To"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.09783","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.09783/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2510.09783","created_at":"2026-06-09T02:07:10.947382+00:00"},{"alias_kind":"arxiv_version","alias_value":"2510.09783v2","created_at":"2026-06-09T02:07:10.947382+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.09783","created_at":"2026-06-09T02:07:10.947382+00:00"},{"alias_kind":"pith_short_12","alias_value":"SXYCEOSMI4U5","created_at":"2026-06-09T02:07:10.947382+00:00"},{"alias_kind":"pith_short_16","alias_value":"SXYCEOSMI4U5ML5T","created_at":"2026-06-09T02:07:10.947382+00:00"},{"alias_kind":"pith_short_8","alias_value":"SXYCEOSM","created_at":"2026-06-09T02:07:10.947382+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2604.08628","citing_title":"Retrieval Augmented Classification for Confidential Documents","ref_index":5,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ","json":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ.json","graph_json":"https://pith.science/api/pith-number/SXYCEOSMI4U5ML5TUYLL66EELQ/graph.json","events_json":"https://pith.science/api/pith-number/SXYCEOSMI4U5ML5TUYLL66EELQ/events.json","paper":"https://pith.science/paper/SXYCEOSM"},"agent_actions":{"view_html":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ","download_json":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ.json","view_paper":"https://pith.science/paper/SXYCEOSM","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2510.09783&json=true","fetch_graph":"https://pith.science/api/pith-number/SXYCEOSMI4U5ML5TUYLL66EELQ/graph.json","fetch_events":"https://pith.science/api/pith-number/SXYCEOSMI4U5ML5TUYLL66EELQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ/action/storage_attestation","attest_author":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ/action/author_attestation","sign_citation":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ/action/citation_signature","submit_replication":"https://pith.science/pith/SXYCEOSMI4U5ML5TUYLL66EELQ/action/replication_record"}},"created_at":"2026-06-09T02:07:10.947382+00:00","updated_at":"2026-06-09T02:07:10.947382+00:00"}