{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:44QNB6TK73THN7WR46PEEWJBML","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0ec01432cdf3449c10a726e0c4002df0600337267604cbab42e9e4f84a45b025","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","title_canon_sha256":"a2c211972f15ef7860b2c174146a0400e453863f43eaed9d0d35c4eb83f1c8c5"},"schema_version":"1.0","source":{"id":"2601.22297","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.22297","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"arxiv_version","alias_value":"2601.22297v2","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.22297","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_12","alias_value":"44QNB6TK73TH","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_16","alias_value":"44QNB6TK73THN7WR","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_8","alias_value":"44QNB6TK","created_at":"2026-05-20T00:04:23Z"}],"graph_snapshots":[{"event_id":"sha256:840b9df026eacf06575da471fc7df4e8b339eb7e8a6af1f0192ea76fbaa56dcf","target":"graph","created_at":"2026-05-20T00:04:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2601.22297/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The reasoning abilities of large language models (LLMs) have been substantially improved by reinforcement learning with verifiable rewards (RLVR). At test time, collaborative reasoning through Multi-Agent Debate (MAD) has emerged as a promising approach for enhancing LLM performance. However, current RLVR methods typically train LLMs to solve problems in isolation, without explicitly preparing them to synthesize and benefit from different rationales that arise during debate. In this work, we propose Self-Debate Reinforcement Learning(SDRL), a training framework where models learn from self-deb","authors_text":"Chenxi Liu, Heng Huang, Ruibo Chen, Tianyi Xiong, Tong Zheng, Yanshuo Chen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","title":"Learning from Self-Debate: Preparing Reasoning Models for Multi-Agent Debate"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.22297","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e2f22885ea683aa952837d8cb225e0569a0d07eb11ff9fbb4abfb3c717a6c3a7","target":"record","created_at":"2026-05-20T00:04:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0ec01432cdf3449c10a726e0c4002df0600337267604cbab42e9e4f84a45b025","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","title_canon_sha256":"a2c211972f15ef7860b2c174146a0400e453863f43eaed9d0d35c4eb83f1c8c5"},"schema_version":"1.0","source":{"id":"2601.22297","kind":"arxiv","version":2}},"canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","first_computed_at":"2026-05-20T00:04:23.829234Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:23.829234Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"7gq5JaxFJvtmta/LBcG3jmI8g0iVEXv+T3H7WEBM3u+fNbZinuFSQBJAJ8RV8mswrrwczWJ9nFjANJM50GqhBw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:23.829991Z","signed_message":"canonical_sha256_bytes"},"source_id":"2601.22297","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e2f22885ea683aa952837d8cb225e0569a0d07eb11ff9fbb4abfb3c717a6c3a7","sha256:840b9df026eacf06575da471fc7df4e8b339eb7e8a6af1f0192ea76fbaa56dcf"],"state_sha256":"bb38aea6a3e9595728db77294a7229c16db0978b45cf843f46244d2c5642f1f3"}