{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MRJXLS3EYCITCMEF7VEYCZXHZH","short_pith_number":"pith:MRJXLS3E","schema_version":"1.0","canonical_sha256":"645375cb64c091313085fd498166e7c9e85d07e4df9c585e90356d645c4dfe72","source":{"kind":"arxiv","id":"2603.00610","version":3},"attestation_state":"computed","paper":{"title":"CMI-RewardBench: Evaluating Music Reward Models with Compositional Multimodal Instruction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.MM","eess.AS"],"primary_cat":"cs.SD","authors_text":"Emmanouil Benetos, Haiwen Xia, Hewei Gao, Mingshuo Ding, Ruibin Yuan, Simon Dixon, Sungkyun Chang, Weixiong Chen, Yinghao Ma, Yizhi Li, Yuchen Yang, Yuxin Ye","submitted_at":"2026-02-28T12:10:58Z","abstract_excerpt":"While music generation models have evolved to handle complex multimodal inputs mixing text, lyrics, and reference audio, evaluation mechanisms have lagged behind. In this paper, we bridge this critical gap by establishing a comprehensive ecosystem for music reward modeling under Compositional Multimodal Instruction (CMI), where the generated music may be conditioned on text descriptions, lyrics, and audio prompts. We first introduce CMI-Pref-Pseudo, a large-scale preference dataset comprising 110k pseudo-labeled samples, and CMI-Pref, a high-quality, human-annotated corpus tailored for fine-gr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.00610","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SD","submitted_at":"2026-02-28T12:10:58Z","cross_cats_sorted":["cs.AI","cs.LG","cs.MM","eess.AS"],"title_canon_sha256":"01847e9f51857feae06967c5b59693972af1628f86cc954013a76bcb1c5d18e0","abstract_canon_sha256":"c4ebb457aabce40bee3a3362ea8c0a4774a341715f18a2e183f10adeb0908d48"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:09:24.290541Z","signature_b64":"2wo3Go1e+aRsK9T1zebJzyPVws30AtaZr4u1M5sje8L3oU5x9S77D9dkrCC5itousqncpEmgTOKnM1YM/AGNDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"645375cb64c091313085fd498166e7c9e85d07e4df9c585e90356d645c4dfe72","last_reissued_at":"2026-06-12T01:09:24.290059Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:09:24.290059Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"CMI-RewardBench: Evaluating Music Reward Models with Compositional Multimodal Instruction","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.LG","cs.MM","eess.AS"],"primary_cat":"cs.SD","authors_text":"Emmanouil Benetos, Haiwen Xia, Hewei Gao, Mingshuo Ding, Ruibin Yuan, Simon Dixon, Sungkyun Chang, Weixiong Chen, Yinghao Ma, Yizhi Li, Yuchen Yang, Yuxin Ye","submitted_at":"2026-02-28T12:10:58Z","abstract_excerpt":"While music generation models have evolved to handle complex multimodal inputs mixing text, lyrics, and reference audio, evaluation mechanisms have lagged behind. In this paper, we bridge this critical gap by establishing a comprehensive ecosystem for music reward modeling under Compositional Multimodal Instruction (CMI), where the generated music may be conditioned on text descriptions, lyrics, and audio prompts. We first introduce CMI-Pref-Pseudo, a large-scale preference dataset comprising 110k pseudo-labeled samples, and CMI-Pref, a high-quality, human-annotated corpus tailored for fine-gr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.00610","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.00610/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.00610","created_at":"2026-06-12T01:09:24.290114+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.00610v3","created_at":"2026-06-12T01:09:24.290114+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.00610","created_at":"2026-06-12T01:09:24.290114+00:00"},{"alias_kind":"pith_short_12","alias_value":"MRJXLS3EYCIT","created_at":"2026-06-12T01:09:24.290114+00:00"},{"alias_kind":"pith_short_16","alias_value":"MRJXLS3EYCITCMEF","created_at":"2026-06-12T01:09:24.290114+00:00"},{"alias_kind":"pith_short_8","alias_value":"MRJXLS3E","created_at":"2026-06-12T01:09:24.290114+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH","json":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH.json","graph_json":"https://pith.science/api/pith-number/MRJXLS3EYCITCMEF7VEYCZXHZH/graph.json","events_json":"https://pith.science/api/pith-number/MRJXLS3EYCITCMEF7VEYCZXHZH/events.json","paper":"https://pith.science/paper/MRJXLS3E"},"agent_actions":{"view_html":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH","download_json":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH.json","view_paper":"https://pith.science/paper/MRJXLS3E","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.00610&json=true","fetch_graph":"https://pith.science/api/pith-number/MRJXLS3EYCITCMEF7VEYCZXHZH/graph.json","fetch_events":"https://pith.science/api/pith-number/MRJXLS3EYCITCMEF7VEYCZXHZH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH/action/storage_attestation","attest_author":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH/action/author_attestation","sign_citation":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH/action/citation_signature","submit_replication":"https://pith.science/pith/MRJXLS3EYCITCMEF7VEYCZXHZH/action/replication_record"}},"created_at":"2026-06-12T01:09:24.290114+00:00","updated_at":"2026-06-12T01:09:24.290114+00:00"}