{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:44QNB6TK73THN7WR46PEEWJBML","short_pith_number":"pith:44QNB6TK","canonical_record":{"source":{"id":"2601.22297","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","cross_cats_sorted":[],"title_canon_sha256":"a2c211972f15ef7860b2c174146a0400e453863f43eaed9d0d35c4eb83f1c8c5","abstract_canon_sha256":"0ec01432cdf3449c10a726e0c4002df0600337267604cbab42e9e4f84a45b025"},"schema_version":"1.0"},"canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","source":{"kind":"arxiv","id":"2601.22297","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.22297","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"arxiv_version","alias_value":"2601.22297v2","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.22297","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_12","alias_value":"44QNB6TK73TH","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_16","alias_value":"44QNB6TK73THN7WR","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_8","alias_value":"44QNB6TK","created_at":"2026-05-20T00:04:23Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:44QNB6TK73THN7WR46PEEWJBML","target":"record","payload":{"canonical_record":{"source":{"id":"2601.22297","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","cross_cats_sorted":[],"title_canon_sha256":"a2c211972f15ef7860b2c174146a0400e453863f43eaed9d0d35c4eb83f1c8c5","abstract_canon_sha256":"0ec01432cdf3449c10a726e0c4002df0600337267604cbab42e9e4f84a45b025"},"schema_version":"1.0"},"canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:23.829991Z","signature_b64":"7gq5JaxFJvtmta/LBcG3jmI8g0iVEXv+T3H7WEBM3u+fNbZinuFSQBJAJ8RV8mswrrwczWJ9nFjANJM50GqhBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","last_reissued_at":"2026-05-20T00:04:23.829234Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:23.829234Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2601.22297","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2obl9igjFWGVfxrQ2ZvQX8s+FjFKnaDrzKCNw2eQkfXzMO2i0GtKM7fg4Oisgdt+J8ouhnk1G2OAvXyKWoPKAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T12:23:17.674743Z"},"content_sha256":"e2f22885ea683aa952837d8cb225e0569a0d07eb11ff9fbb4abfb3c717a6c3a7","schema_version":"1.0","event_id":"sha256:e2f22885ea683aa952837d8cb225e0569a0d07eb11ff9fbb4abfb3c717a6c3a7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:44QNB6TK73THN7WR46PEEWJBML","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Learning from Self-Debate: Preparing Reasoning Models for Multi-Agent Debate","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chenxi Liu, Heng Huang, Ruibo Chen, Tianyi Xiong, Tong Zheng, Yanshuo Chen","submitted_at":"2026-01-29T20:21:44Z","abstract_excerpt":"The reasoning abilities of large language models (LLMs) have been substantially improved by reinforcement learning with verifiable rewards (RLVR). At test time, collaborative reasoning through Multi-Agent Debate (MAD) has emerged as a promising approach for enhancing LLM performance. However, current RLVR methods typically train LLMs to solve problems in isolation, without explicitly preparing them to synthesize and benefit from different rationales that arise during debate. In this work, we propose Self-Debate Reinforcement Learning(SDRL), a training framework where models learn from self-deb"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.22297","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.22297/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"2r7dqdSwexjbGwQyqWimR5R//9IqnbJEAGLf8MWkbAiEDrsasUVbWCjsqkTMLJZjPJFNUy2z1k+gmgeTw+KTAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T12:23:17.675441Z"},"content_sha256":"840b9df026eacf06575da471fc7df4e8b339eb7e8a6af1f0192ea76fbaa56dcf","schema_version":"1.0","event_id":"sha256:840b9df026eacf06575da471fc7df4e8b339eb7e8a6af1f0192ea76fbaa56dcf"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/44QNB6TK73THN7WR46PEEWJBML/bundle.json","state_url":"https://pith.science/pith/44QNB6TK73THN7WR46PEEWJBML/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/44QNB6TK73THN7WR46PEEWJBML/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T12:23:17Z","links":{"resolver":"https://pith.science/pith/44QNB6TK73THN7WR46PEEWJBML","bundle":"https://pith.science/pith/44QNB6TK73THN7WR46PEEWJBML/bundle.json","state":"https://pith.science/pith/44QNB6TK73THN7WR46PEEWJBML/state.json","well_known_bundle":"https://pith.science/.well-known/pith/44QNB6TK73THN7WR46PEEWJBML/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:44QNB6TK73THN7WR46PEEWJBML","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0ec01432cdf3449c10a726e0c4002df0600337267604cbab42e9e4f84a45b025","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","title_canon_sha256":"a2c211972f15ef7860b2c174146a0400e453863f43eaed9d0d35c4eb83f1c8c5"},"schema_version":"1.0","source":{"id":"2601.22297","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.22297","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"arxiv_version","alias_value":"2601.22297v2","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.22297","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_12","alias_value":"44QNB6TK73TH","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_16","alias_value":"44QNB6TK73THN7WR","created_at":"2026-05-20T00:04:23Z"},{"alias_kind":"pith_short_8","alias_value":"44QNB6TK","created_at":"2026-05-20T00:04:23Z"}],"graph_snapshots":[{"event_id":"sha256:840b9df026eacf06575da471fc7df4e8b339eb7e8a6af1f0192ea76fbaa56dcf","target":"graph","created_at":"2026-05-20T00:04:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2601.22297/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The reasoning abilities of large language models (LLMs) have been substantially improved by reinforcement learning with verifiable rewards (RLVR). At test time, collaborative reasoning through Multi-Agent Debate (MAD) has emerged as a promising approach for enhancing LLM performance. However, current RLVR methods typically train LLMs to solve problems in isolation, without explicitly preparing them to synthesize and benefit from different rationales that arise during debate. In this work, we propose Self-Debate Reinforcement Learning(SDRL), a training framework where models learn from self-deb","authors_text":"Chenxi Liu, Heng Huang, Ruibo Chen, Tianyi Xiong, Tong Zheng, Yanshuo Chen","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","title":"Learning from Self-Debate: Preparing Reasoning Models for Multi-Agent Debate"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.22297","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e2f22885ea683aa952837d8cb225e0569a0d07eb11ff9fbb4abfb3c717a6c3a7","target":"record","created_at":"2026-05-20T00:04:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0ec01432cdf3449c10a726e0c4002df0600337267604cbab42e9e4f84a45b025","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T20:21:44Z","title_canon_sha256":"a2c211972f15ef7860b2c174146a0400e453863f43eaed9d0d35c4eb83f1c8c5"},"schema_version":"1.0","source":{"id":"2601.22297","kind":"arxiv","version":2}},"canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e720d0fa6afee676fed1e79e42592162eefc04d6f1d43928b6adfcf07a49d2c0","first_computed_at":"2026-05-20T00:04:23.829234Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:23.829234Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"7gq5JaxFJvtmta/LBcG3jmI8g0iVEXv+T3H7WEBM3u+fNbZinuFSQBJAJ8RV8mswrrwczWJ9nFjANJM50GqhBw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:23.829991Z","signed_message":"canonical_sha256_bytes"},"source_id":"2601.22297","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e2f22885ea683aa952837d8cb225e0569a0d07eb11ff9fbb4abfb3c717a6c3a7","sha256:840b9df026eacf06575da471fc7df4e8b339eb7e8a6af1f0192ea76fbaa56dcf"],"state_sha256":"bb38aea6a3e9595728db77294a7229c16db0978b45cf843f46244d2c5642f1f3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"y3K87Whk+sLbHjb+mWnFHE1kXQh1kS5dz9pJDc1P4T90tcIFwOmkAxDGL6qGjJrH5+CwoSW3N+RLJELMHbymCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T12:23:17.678925Z","bundle_sha256":"7c4ee9fda2d144bb6bc6ef682a9b5d0577962a760545b615607e401bb13b998d"}}