{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:5OA6LXOL4YBZNXV5UXV6BA23X4","short_pith_number":"pith:5OA6LXOL","schema_version":"1.0","canonical_sha256":"eb81e5ddcbe60396debda5ebe0835bbf1c4f56e7144e37c583c4910f6e5ee8f1","source":{"kind":"arxiv","id":"2606.13598","version":1},"attestation_state":"computed","paper":{"title":"Reward Modeling for Multi-Agent Orchestration","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.CL","cs.LG","cs.MA"],"primary_cat":"cs.AI","authors_text":"Haizhou Shi, Hao Wang, King Yeung Tsang, Semih Yavuz, Shafiq Joty, Vishal Venkataramani, Zihao Zhao, Zixuan Ke","submitted_at":"2026-06-11T17:16:24Z","abstract_excerpt":"Multi-Agent Systems (MAS) built on Large Language Models (LLMs) require effective orchestration to coordinate specialized agents, yet training such orchestrators is hindered by limited supervision and high computational cost. We propose Orchestration Reward Modeling (OrchRM), a self-supervised framework for evaluating orchestration quality without human annotations. OrchRM leverages intermediate artifacts from multi-agent executions to construct win-lose pairs for Bradley-Terry reward model training. Unlike existing MAS test-time scaling and orchestrator training frameworks that rely on costly"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.13598","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-11T17:16:24Z","cross_cats_sorted":["cs.CL","cs.LG","cs.MA"],"title_canon_sha256":"f7bdc88b84a2471cb97ac5a976cbe2fa232f8e06927646487919cc8f083a4297","abstract_canon_sha256":"f1dd88210b3193825f6d2864eeced8e0e06adfa6399cf98346fcc68c8eced4d1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:10:12.289564Z","signature_b64":"GvSg8I54DrY/8S640kXXBpSzbzuEqnHnQyDfu0VyAGSGT1H+4vM4qYcalwRfn3CJXoiYGgTEKc8yGR6Gnk0SDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eb81e5ddcbe60396debda5ebe0835bbf1c4f56e7144e37c583c4910f6e5ee8f1","last_reissued_at":"2026-06-12T01:10:12.288803Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:10:12.288803Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Reward Modeling for Multi-Agent Orchestration","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.CL","cs.LG","cs.MA"],"primary_cat":"cs.AI","authors_text":"Haizhou Shi, Hao Wang, King Yeung Tsang, Semih Yavuz, Shafiq Joty, Vishal Venkataramani, Zihao Zhao, Zixuan Ke","submitted_at":"2026-06-11T17:16:24Z","abstract_excerpt":"Multi-Agent Systems (MAS) built on Large Language Models (LLMs) require effective orchestration to coordinate specialized agents, yet training such orchestrators is hindered by limited supervision and high computational cost. We propose Orchestration Reward Modeling (OrchRM), a self-supervised framework for evaluating orchestration quality without human annotations. OrchRM leverages intermediate artifacts from multi-agent executions to construct win-lose pairs for Bradley-Terry reward model training. Unlike existing MAS test-time scaling and orchestrator training frameworks that rely on costly"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.13598","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.13598/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.13598","created_at":"2026-06-12T01:10:12.288963+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.13598v1","created_at":"2026-06-12T01:10:12.288963+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.13598","created_at":"2026-06-12T01:10:12.288963+00:00"},{"alias_kind":"pith_short_12","alias_value":"5OA6LXOL4YBZ","created_at":"2026-06-12T01:10:12.288963+00:00"},{"alias_kind":"pith_short_16","alias_value":"5OA6LXOL4YBZNXV5","created_at":"2026-06-12T01:10:12.288963+00:00"},{"alias_kind":"pith_short_8","alias_value":"5OA6LXOL","created_at":"2026-06-12T01:10:12.288963+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4","json":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4.json","graph_json":"https://pith.science/api/pith-number/5OA6LXOL4YBZNXV5UXV6BA23X4/graph.json","events_json":"https://pith.science/api/pith-number/5OA6LXOL4YBZNXV5UXV6BA23X4/events.json","paper":"https://pith.science/paper/5OA6LXOL"},"agent_actions":{"view_html":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4","download_json":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4.json","view_paper":"https://pith.science/paper/5OA6LXOL","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.13598&json=true","fetch_graph":"https://pith.science/api/pith-number/5OA6LXOL4YBZNXV5UXV6BA23X4/graph.json","fetch_events":"https://pith.science/api/pith-number/5OA6LXOL4YBZNXV5UXV6BA23X4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4/action/storage_attestation","attest_author":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4/action/author_attestation","sign_citation":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4/action/citation_signature","submit_replication":"https://pith.science/pith/5OA6LXOL4YBZNXV5UXV6BA23X4/action/replication_record"}},"created_at":"2026-06-12T01:10:12.288963+00:00","updated_at":"2026-06-12T01:10:12.288963+00:00"}