{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2YCCGMFATOWEQSTO3WGTZL76OW","short_pith_number":"pith:2YCCGMFA","schema_version":"1.0","canonical_sha256":"d6042330a09bac484a6edd8d3caffe75b1ed852529c5405ec73bde6fc858a950","source":{"kind":"arxiv","id":"2605.19101","version":1},"attestation_state":"computed","paper":{"title":"Heterogeneity-Aware Dataset Scheduling for Efficient Audio Large Language Model Training","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.SD","authors_text":"Chongxin Gan, Jianning Wang, Yang Li, Yanru Wu","submitted_at":"2026-05-18T20:41:08Z","abstract_excerpt":"Training general-purpose Audio Large Language Models (ALLMs) across diverse datasets is essential for holistic audio understanding, yet it faces significant challenges due to dataset heterogeneity, which often leads to conflicting gradients and slow convergence. Despite its impact, how to explicitly manage this heterogeneity during training remains underexplored, with current practices relying primarily on uniform mixture. In this work, we analyze multi-dataset AudioQA training from a convergence perspective and propose Grouped Sequential Training (GST). GST strategically organizes datasets in"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.19101","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-05-18T20:41:08Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"d2f2e869dc92de286a551168ed387533eb25a8faf9779ce8384f5eaa6413fea8","abstract_canon_sha256":"79d9153a9b0e8b5f00aefc628daf6f29878fe2746437d5d79f14336c38035998"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T01:05:27.185960Z","signature_b64":"zmpsnZGAVswbeUaQHF0m4OZQXJfIsfrk6xY9QOpxzWVqXCvrEUmyXlUpk68Sgcr5wQAXSDtuGiE4p4YpZNQYAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d6042330a09bac484a6edd8d3caffe75b1ed852529c5405ec73bde6fc858a950","last_reissued_at":"2026-05-20T01:05:27.185203Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T01:05:27.185203Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Heterogeneity-Aware Dataset Scheduling for Efficient Audio Large Language Model Training","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.SD","authors_text":"Chongxin Gan, Jianning Wang, Yang Li, Yanru Wu","submitted_at":"2026-05-18T20:41:08Z","abstract_excerpt":"Training general-purpose Audio Large Language Models (ALLMs) across diverse datasets is essential for holistic audio understanding, yet it faces significant challenges due to dataset heterogeneity, which often leads to conflicting gradients and slow convergence. Despite its impact, how to explicitly manage this heterogeneity during training remains underexplored, with current practices relying primarily on uniform mixture. In this work, we analyze multi-dataset AudioQA training from a convergence perspective and propose Grouped Sequential Training (GST). GST strategically organizes datasets in"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.19101","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.19101/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.19101","created_at":"2026-05-20T01:05:27.185333+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.19101v1","created_at":"2026-05-20T01:05:27.185333+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.19101","created_at":"2026-05-20T01:05:27.185333+00:00"},{"alias_kind":"pith_short_12","alias_value":"2YCCGMFATOWE","created_at":"2026-05-20T01:05:27.185333+00:00"},{"alias_kind":"pith_short_16","alias_value":"2YCCGMFATOWEQSTO","created_at":"2026-05-20T01:05:27.185333+00:00"},{"alias_kind":"pith_short_8","alias_value":"2YCCGMFA","created_at":"2026-05-20T01:05:27.185333+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW","json":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW.json","graph_json":"https://pith.science/api/pith-number/2YCCGMFATOWEQSTO3WGTZL76OW/graph.json","events_json":"https://pith.science/api/pith-number/2YCCGMFATOWEQSTO3WGTZL76OW/events.json","paper":"https://pith.science/paper/2YCCGMFA"},"agent_actions":{"view_html":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW","download_json":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW.json","view_paper":"https://pith.science/paper/2YCCGMFA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.19101&json=true","fetch_graph":"https://pith.science/api/pith-number/2YCCGMFATOWEQSTO3WGTZL76OW/graph.json","fetch_events":"https://pith.science/api/pith-number/2YCCGMFATOWEQSTO3WGTZL76OW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW/action/storage_attestation","attest_author":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW/action/author_attestation","sign_citation":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW/action/citation_signature","submit_replication":"https://pith.science/pith/2YCCGMFATOWEQSTO3WGTZL76OW/action/replication_record"}},"created_at":"2026-05-20T01:05:27.185333+00:00","updated_at":"2026-05-20T01:05:27.185333+00:00"}