{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:MRDWRL3OM5EXWVYIYD7TZKZIZG","short_pith_number":"pith:MRDWRL3O","canonical_record":{"source":{"id":"2508.05004","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-08-07T03:38:16Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"28e6dc030c05c9b1bd153d4c38293c8d96f6bfbd03c940830afe4162def8544e","abstract_canon_sha256":"b9e978157adc5a2629f1b85bdce9cf6c30705ad48fc37a0df5b12d4f8f7994f1"},"schema_version":"1.0"},"canonical_sha256":"644768af6e67497b5708c0ff3cab28c98ff9cc5e4125a68c2b8b7f77bb4af1f2","source":{"kind":"arxiv","id":"2508.05004","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.05004","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"arxiv_version","alias_value":"2508.05004v4","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.05004","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"pith_short_12","alias_value":"MRDWRL3OM5EX","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"MRDWRL3OM5EXWVYI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"MRDWRL3O","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:MRDWRL3OM5EXWVYIYD7TZKZIZG","target":"record","payload":{"canonical_record":{"source":{"id":"2508.05004","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-08-07T03:38:16Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"28e6dc030c05c9b1bd153d4c38293c8d96f6bfbd03c940830afe4162def8544e","abstract_canon_sha256":"b9e978157adc5a2629f1b85bdce9cf6c30705ad48fc37a0df5b12d4f8f7994f1"},"schema_version":"1.0"},"canonical_sha256":"644768af6e67497b5708c0ff3cab28c98ff9cc5e4125a68c2b8b7f77bb4af1f2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:22.079613Z","signature_b64":"gizSNUrT1kDzCzibQaHg2/yUYKTJxH7GdbsNK4+qQXJDxWHTneVULYkPE+o0qdYHtzdsYVFcV5fwbjnx/GbnDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"644768af6e67497b5708c0ff3cab28c98ff9cc5e4125a68c2b8b7f77bb4af1f2","last_reissued_at":"2026-05-17T23:39:22.078871Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:22.078871Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2508.05004","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bBWJMialWIA/eOMW6yiS92C4TSam1LtjgpbJrGrt7KmM0k9anpyc2MnFxZf0yPRNwHwgAShfgCDSXs2KOdUrCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T22:14:55.355970Z"},"content_sha256":"20acb8ae2a825340bbf46901d3238d60996a4f6791ad8ed638c1d56b745b8b45","schema_version":"1.0","event_id":"sha256:20acb8ae2a825340bbf46901d3238d60996a4f6791ad8ed638c1d56b745b8b45"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:MRDWRL3OM5EXWVYIYD7TZKZIZG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"R-Zero: Self-Evolving Reasoning LLM from Zero Data","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"R-Zero lets a base LLM create its own reasoning tasks by co-evolving a Challenger that proposes hard problems and a Solver that learns to solve them, with no human data or labels required.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chengsong Huang, Dong Yu, Haitao Mi, Hongming Zhang, Jiaxin Huang, Ruosen Li, Wenhao Yu, Xiaoyang Wang, Zongxia Li","submitted_at":"2025-08-07T03:38:16Z","abstract_excerpt":"Self-evolving Large Language Models (LLMs) offer a scalable path toward super-intelligence by autonomously generating, refining, and learning from their own experiences. However, existing methods for training such models still rely heavily on vast human-curated tasks and labels, typically via fine-tuning or reinforcement learning, which poses a fundamental bottleneck to advancing AI systems toward capabilities beyond human intelligence. To overcome this limitation, we introduce R-Zero, a fully autonomous framework that generates its own training data from scratch. Starting from a single base L"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"R-Zero substantially improves reasoning capability across different backbone LLMs, e.g., boosting the Qwen3-4B-Base by +6.49 on math-reasoning benchmarks and +7.54 on general-domain reasoning benchmarks.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the reward signals for the Challenger (proposing tasks near the edge of Solver capability) and Solver (solving those tasks) can be defined and optimized without any external human data or labels while still producing genuine capability gains rather than reward hacking or mode collapse.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"R-Zero lets a base LLM bootstrap its own reasoning curriculum by pitting a Challenger model against a Solver model that co-evolve through autonomous task generation and solution.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"R-Zero lets a base LLM create its own reasoning tasks by co-evolving a Challenger that proposes hard problems and a Solver that learns to solve them, with no human data or labels required.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"385ae95a7795a1eee837ecb45abb954633c92cd8dd88eaf68f8e4802c9c33ebd"},"source":{"id":"2508.05004","kind":"arxiv","version":4},"verdict":{"id":"0534890d-5e2b-4a9f-9b72-43daf99cbc19","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T19:18:34.136554Z","strongest_claim":"R-Zero substantially improves reasoning capability across different backbone LLMs, e.g., boosting the Qwen3-4B-Base by +6.49 on math-reasoning benchmarks and +7.54 on general-domain reasoning benchmarks.","one_line_summary":"R-Zero lets a base LLM bootstrap its own reasoning curriculum by pitting a Challenger model against a Solver model that co-evolve through autonomous task generation and solution.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the reward signals for the Challenger (proposing tasks near the edge of Solver capability) and Solver (solving those tasks) can be defined and optimized without any external human data or labels while still producing genuine capability gains rather than reward hacking or mode collapse.","pith_extraction_headline":"R-Zero lets a base LLM create its own reasoning tasks by co-evolving a Challenger that proposes hard problems and a Solver that learns to solve them, with no human data or labels required."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b969f89b64b4b0ad4e2cd2162fe02902c4e8f98463deba6f8f4c9d78314e8a6e"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"0534890d-5e2b-4a9f-9b72-43daf99cbc19"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:22Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4Y4ny5sQ94rqLOpOEFcBYSHDtXOMUOFMCL2I4+3ZjN4khIzaYhxTLAwI7j437yGHE80+eauWSyMXorBJdEzSCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T22:14:55.356401Z"},"content_sha256":"ae1b8a5fbdcd94209d0b268ba4bcdb1b505bb62866b6e021064980e9bc049a55","schema_version":"1.0","event_id":"sha256:ae1b8a5fbdcd94209d0b268ba4bcdb1b505bb62866b6e021064980e9bc049a55"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG/bundle.json","state_url":"https://pith.science/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T22:14:55Z","links":{"resolver":"https://pith.science/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG","bundle":"https://pith.science/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG/bundle.json","state":"https://pith.science/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MRDWRL3OM5EXWVYIYD7TZKZIZG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:MRDWRL3OM5EXWVYIYD7TZKZIZG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"b9e978157adc5a2629f1b85bdce9cf6c30705ad48fc37a0df5b12d4f8f7994f1","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-08-07T03:38:16Z","title_canon_sha256":"28e6dc030c05c9b1bd153d4c38293c8d96f6bfbd03c940830afe4162def8544e"},"schema_version":"1.0","source":{"id":"2508.05004","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.05004","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"arxiv_version","alias_value":"2508.05004v4","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.05004","created_at":"2026-05-17T23:39:22Z"},{"alias_kind":"pith_short_12","alias_value":"MRDWRL3OM5EX","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"MRDWRL3OM5EXWVYI","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"MRDWRL3O","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:ae1b8a5fbdcd94209d0b268ba4bcdb1b505bb62866b6e021064980e9bc049a55","target":"graph","created_at":"2026-05-17T23:39:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"R-Zero substantially improves reasoning capability across different backbone LLMs, e.g., boosting the Qwen3-4B-Base by +6.49 on math-reasoning benchmarks and +7.54 on general-domain reasoning benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the reward signals for the Challenger (proposing tasks near the edge of Solver capability) and Solver (solving those tasks) can be defined and optimized without any external human data or labels while still producing genuine capability gains rather than reward hacking or mode collapse."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"R-Zero lets a base LLM bootstrap its own reasoning curriculum by pitting a Challenger model against a Solver model that co-evolve through autonomous task generation and solution."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"R-Zero lets a base LLM create its own reasoning tasks by co-evolving a Challenger that proposes hard problems and a Solver that learns to solve them, with no human data or labels required."}],"snapshot_sha256":"385ae95a7795a1eee837ecb45abb954633c92cd8dd88eaf68f8e4802c9c33ebd"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"b969f89b64b4b0ad4e2cd2162fe02902c4e8f98463deba6f8f4c9d78314e8a6e"},"paper":{"abstract_excerpt":"Self-evolving Large Language Models (LLMs) offer a scalable path toward super-intelligence by autonomously generating, refining, and learning from their own experiences. However, existing methods for training such models still rely heavily on vast human-curated tasks and labels, typically via fine-tuning or reinforcement learning, which poses a fundamental bottleneck to advancing AI systems toward capabilities beyond human intelligence. To overcome this limitation, we introduce R-Zero, a fully autonomous framework that generates its own training data from scratch. Starting from a single base L","authors_text":"Chengsong Huang, Dong Yu, Haitao Mi, Hongming Zhang, Jiaxin Huang, Ruosen Li, Wenhao Yu, Xiaoyang Wang, Zongxia Li","cross_cats":["cs.AI","cs.CL"],"headline":"R-Zero lets a base LLM create its own reasoning tasks by co-evolving a Challenger that proposes hard problems and a Solver that learns to solve them, with no human data or labels required.","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-08-07T03:38:16Z","title":"R-Zero: Self-Evolving Reasoning LLM from Zero Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.05004","kind":"arxiv","version":4},"verdict":{"created_at":"2026-05-14T19:18:34.136554Z","id":"0534890d-5e2b-4a9f-9b72-43daf99cbc19","model_set":{"reader":"grok-4.3"},"one_line_summary":"R-Zero lets a base LLM bootstrap its own reasoning curriculum by pitting a Challenger model against a Solver model that co-evolve through autonomous task generation and solution.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"R-Zero lets a base LLM create its own reasoning tasks by co-evolving a Challenger that proposes hard problems and a Solver that learns to solve them, with no human data or labels required.","strongest_claim":"R-Zero substantially improves reasoning capability across different backbone LLMs, e.g., boosting the Qwen3-4B-Base by +6.49 on math-reasoning benchmarks and +7.54 on general-domain reasoning benchmarks.","weakest_assumption":"That the reward signals for the Challenger (proposing tasks near the edge of Solver capability) and Solver (solving those tasks) can be defined and optimized without any external human data or labels while still producing genuine capability gains rather than reward hacking or mode collapse."}},"verdict_id":"0534890d-5e2b-4a9f-9b72-43daf99cbc19"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:20acb8ae2a825340bbf46901d3238d60996a4f6791ad8ed638c1d56b745b8b45","target":"record","created_at":"2026-05-17T23:39:22Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"b9e978157adc5a2629f1b85bdce9cf6c30705ad48fc37a0df5b12d4f8f7994f1","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-08-07T03:38:16Z","title_canon_sha256":"28e6dc030c05c9b1bd153d4c38293c8d96f6bfbd03c940830afe4162def8544e"},"schema_version":"1.0","source":{"id":"2508.05004","kind":"arxiv","version":4}},"canonical_sha256":"644768af6e67497b5708c0ff3cab28c98ff9cc5e4125a68c2b8b7f77bb4af1f2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"644768af6e67497b5708c0ff3cab28c98ff9cc5e4125a68c2b8b7f77bb4af1f2","first_computed_at":"2026-05-17T23:39:22.078871Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:22.078871Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gizSNUrT1kDzCzibQaHg2/yUYKTJxH7GdbsNK4+qQXJDxWHTneVULYkPE+o0qdYHtzdsYVFcV5fwbjnx/GbnDw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:22.079613Z","signed_message":"canonical_sha256_bytes"},"source_id":"2508.05004","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:20acb8ae2a825340bbf46901d3238d60996a4f6791ad8ed638c1d56b745b8b45","sha256:ae1b8a5fbdcd94209d0b268ba4bcdb1b505bb62866b6e021064980e9bc049a55"],"state_sha256":"515eea9fbf52dac45ffe9a7990eb8183a77806887b09cc8338c09cbaff52523a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"yk0UKCn+G8aO1aNqIZUBnBT53PhVlTNjhXrZ3fAEjcsl2LgIamBeS2525UnTeLlDw2mPR0+GwLk5W6PvMdW0BA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T22:14:55.358499Z","bundle_sha256":"1cf89bb3531b205609a055d627f2fa9d4cb2dba9763bed06bbca5206456da71d"}}