{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:LXKEYMNJGPBMHP5WHXR6S57A4Q","short_pith_number":"pith:LXKEYMNJ","schema_version":"1.0","canonical_sha256":"5dd44c31a933c2c3bfb63de3e977e0e402dc5d56048ad7aca3c971ee97f2d148","source":{"kind":"arxiv","id":"2605.26646","version":1},"attestation_state":"computed","paper":{"title":"UnityMAS-O: A General RL Optimization Framework for LLM-Based Multi-Agent Systems","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.MA"],"primary_cat":"cs.AI","authors_text":"Bin Zhang, Biqing Qi, Erhan Zhang, Haitao Li, Jiaxin Mao, Jinyuan Feng, Lingyong Yan, Qi Liu, Rui Li, Shijie Wang, Wei Yang, Xiaochi Wei, Yan Gao, Yao Hu, Yiqun Chen, Yi Wu, Zechun Niu","submitted_at":"2026-05-26T07:30:03Z","abstract_excerpt":"LLM-based multi-agent systems decompose complex tasks into interacting roles, but most remain manually orchestrated by prompts, tools, and control rules, while agents are rarely optimized through a unified reinforcement learning interface. Existing RL post-training frameworks mainly target single-policy optimization and lack abstractions for user-defined multi-agent workflows, structured interaction, role-specific credit assignment, and configurable parameter sharing.\n  We present UnityMAS-O, a general RL optimization framework for LLM-based multi-agent systems. UnityMAS-O treats the complete "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.26646","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-05-26T07:30:03Z","cross_cats_sorted":["cs.CL","cs.MA"],"title_canon_sha256":"ab9dcccf1d81af46e8c46a71b8093659ee01d59dcb961cbf85d9c1aece3bfcf4","abstract_canon_sha256":"fad35e7854e513a31cf406b699ee177f10f10fca3dc5889b35066b93c8e54ef2"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:06:03.929155Z","signature_b64":"Cp5c7KGZlu7fWSNhR0HnsOMY49sm4Ok0vFvPGEdRa4IR7flGCVO476UmmctsAQBCm63vPS6ZofWmjkvqZxYACw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5dd44c31a933c2c3bfb63de3e977e0e402dc5d56048ad7aca3c971ee97f2d148","last_reissued_at":"2026-05-27T01:06:03.928303Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:06:03.928303Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"UnityMAS-O: A General RL Optimization Framework for LLM-Based Multi-Agent Systems","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.MA"],"primary_cat":"cs.AI","authors_text":"Bin Zhang, Biqing Qi, Erhan Zhang, Haitao Li, Jiaxin Mao, Jinyuan Feng, Lingyong Yan, Qi Liu, Rui Li, Shijie Wang, Wei Yang, Xiaochi Wei, Yan Gao, Yao Hu, Yiqun Chen, Yi Wu, Zechun Niu","submitted_at":"2026-05-26T07:30:03Z","abstract_excerpt":"LLM-based multi-agent systems decompose complex tasks into interacting roles, but most remain manually orchestrated by prompts, tools, and control rules, while agents are rarely optimized through a unified reinforcement learning interface. Existing RL post-training frameworks mainly target single-policy optimization and lack abstractions for user-defined multi-agent workflows, structured interaction, role-specific credit assignment, and configurable parameter sharing.\n  We present UnityMAS-O, a general RL optimization framework for LLM-based multi-agent systems. UnityMAS-O treats the complete "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26646","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.26646/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.26646","created_at":"2026-05-27T01:06:03.928445+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.26646v1","created_at":"2026-05-27T01:06:03.928445+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26646","created_at":"2026-05-27T01:06:03.928445+00:00"},{"alias_kind":"pith_short_12","alias_value":"LXKEYMNJGPBM","created_at":"2026-05-27T01:06:03.928445+00:00"},{"alias_kind":"pith_short_16","alias_value":"LXKEYMNJGPBMHP5W","created_at":"2026-05-27T01:06:03.928445+00:00"},{"alias_kind":"pith_short_8","alias_value":"LXKEYMNJ","created_at":"2026-05-27T01:06:03.928445+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q","json":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q.json","graph_json":"https://pith.science/api/pith-number/LXKEYMNJGPBMHP5WHXR6S57A4Q/graph.json","events_json":"https://pith.science/api/pith-number/LXKEYMNJGPBMHP5WHXR6S57A4Q/events.json","paper":"https://pith.science/paper/LXKEYMNJ"},"agent_actions":{"view_html":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q","download_json":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q.json","view_paper":"https://pith.science/paper/LXKEYMNJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.26646&json=true","fetch_graph":"https://pith.science/api/pith-number/LXKEYMNJGPBMHP5WHXR6S57A4Q/graph.json","fetch_events":"https://pith.science/api/pith-number/LXKEYMNJGPBMHP5WHXR6S57A4Q/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q/action/timestamp_anchor","attest_storage":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q/action/storage_attestation","attest_author":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q/action/author_attestation","sign_citation":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q/action/citation_signature","submit_replication":"https://pith.science/pith/LXKEYMNJGPBMHP5WHXR6S57A4Q/action/replication_record"}},"created_at":"2026-05-27T01:06:03.928445+00:00","updated_at":"2026-05-27T01:06:03.928445+00:00"}