{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:YYXR2WAL6MK5EWBYF2QDMLUFPS","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"97f3ea108185fc25741dae86d92f03fa0ec171db8c855fab78d2fd4659774e7a","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2024-05-20T01:04:40Z","title_canon_sha256":"cac5c1897e3139b2e2616615e0260b990c1809c83d38220b3a69c988b2521d15"},"schema_version":"1.0","source":{"id":"2405.11143","kind":"arxiv","version":6}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2405.11143","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"arxiv_version","alias_value":"2405.11143v6","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2405.11143","created_at":"2026-05-17T23:38:53Z"},{"alias_kind":"pith_short_12","alias_value":"YYXR2WAL6MK5","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"YYXR2WAL6MK5EWBY","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"YYXR2WAL","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:f8d3cd6d2ffca7f73e2ca3cce504511d78d221e0f04126cfe7bf0aada766b531","target":"graph","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Experimental results show that OpenRLHF achieves superior training efficiency, with speedups ranging from 1.22x to 1.68x across different model sizes, compared to state-of-the-art frameworks. Additionally, it requires significantly fewer lines of code for implementation."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The reported speedups and code reductions are measured under fair, comparable conditions against the true state-of-the-art baselines, and the ease-of-use metric (lines of code) accurately reflects real-world implementation effort for typical users."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"OpenRLHF is a new open-source RLHF framework reporting 1.22x to 1.68x speedups and fewer lines of code than prior systems."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"OpenRLHF delivers a streamlined open-source framework for RLHF that trains models 1.22x to 1.68x faster while requiring far fewer lines of code."}],"snapshot_sha256":"ae0be8a697a3fc02167f98e44202e47dbd919e6c41b6a9fd0a9ea4d481e2e6df"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Large Language Models (LLMs) fine-tuned via Reinforcement Learning from Human Feedback (RLHF) and Reinforcement Learning with Verifiable Rewards (RLVR) significantly improve the alignment of human-AI values, further raising the upper bound of AI capabilities, particularly in reasoning-intensive, long-context Chain-of-Thought (CoT) tasks. However, existing frameworks commonly face challenges such as inference bottlenecks and complexity barriers, which restrict their accessibility to newcomers. To bridge this gap, we introduce \\textbf{OpenRLHF}, a user-friendly, scalable, and easy-to-learn open-","authors_text":"Bin Chen, Hao Chen, Haoran Wang, Haotian Xu, Jason Klein Liu, Jian Hu, Songlin Jiang, Weikai Fang, Wei Shen, Weixun Wang, Xianyu, Xibin Wu, Yiming Liu, Yu Cao, Zilin Zhu","cross_cats":["cs.CL","cs.LG"],"headline":"OpenRLHF delivers a streamlined open-source framework for RLHF that trains models 1.22x to 1.68x faster while requiring far fewer lines of code.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2024-05-20T01:04:40Z","title":"OpenRLHF: An Easy-to-use, Scalable and High-performance RLHF Framework"},"references":{"count":30,"internal_anchors":10,"resolved_work":30,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Deep reinforcement learning from human preferences.Advances in neural information processing systems, 30","work_id":"a63ca98b-8916-47bd-bd9b-d0c693024f4c","year":2017},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Learning to summarize with human feedback","work_id":"4098eb4f-6e58-402f-9bde-2a625fa0675c","year":2020},{"cited_arxiv_id":"2501.12948","doi":"","is_internal_anchor":true,"ref_index":3,"title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","work_id":"e6b75ad5-2877-4168-97c8-710407094d20","year":2025},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Exploring data scaling trends and effects in reinforcement learning from human feedback","work_id":"b7419524-b2c2-49b3-9745-04133e8060b2","year":2025},{"cited_arxiv_id":"2303.08774","doi":"","is_internal_anchor":true,"ref_index":5,"title":"GPT-4 Technical Report","work_id":"b928e041-6991-4c08-8c81-0359e4097c7b","year":2023}],"snapshot_sha256":"ccd33241ac43c41a3506861d99c53b6a062aee36fbf96c8829ac817e291594f2"},"source":{"id":"2405.11143","kind":"arxiv","version":6},"verdict":{"created_at":"2026-05-15T03:24:18.968024Z","id":"edd6e13c-8c67-4d5f-93cf-1a2be2b4b07b","model_set":{"reader":"grok-4.3"},"one_line_summary":"OpenRLHF is a new open-source RLHF framework reporting 1.22x to 1.68x speedups and fewer lines of code than prior systems.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"OpenRLHF delivers a streamlined open-source framework for RLHF that trains models 1.22x to 1.68x faster while requiring far fewer lines of code.","strongest_claim":"Experimental results show that OpenRLHF achieves superior training efficiency, with speedups ranging from 1.22x to 1.68x across different model sizes, compared to state-of-the-art frameworks. Additionally, it requires significantly fewer lines of code for implementation.","weakest_assumption":"The reported speedups and code reductions are measured under fair, comparable conditions against the true state-of-the-art baselines, and the ease-of-use metric (lines of code) accurately reflects real-world implementation effort for typical users."}},"verdict_id":"edd6e13c-8c67-4d5f-93cf-1a2be2b4b07b"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e9e4e4891c615186dce26641137f8cbb474f17e0cc6cae626d511f08cc33be76","target":"record","created_at":"2026-05-17T23:38:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"97f3ea108185fc25741dae86d92f03fa0ec171db8c855fab78d2fd4659774e7a","cross_cats_sorted":["cs.CL","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2024-05-20T01:04:40Z","title_canon_sha256":"cac5c1897e3139b2e2616615e0260b990c1809c83d38220b3a69c988b2521d15"},"schema_version":"1.0","source":{"id":"2405.11143","kind":"arxiv","version":6}},"canonical_sha256":"c62f1d580bf315d258382ea0362e857c8d43d67375d111aa7ff0b1884177f5cb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c62f1d580bf315d258382ea0362e857c8d43d67375d111aa7ff0b1884177f5cb","first_computed_at":"2026-05-17T23:38:53.680048Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:53.680048Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KqYSQGxBRekqEfjP2TJkgyzVdX1WcNPQ+Fmnkp2QYOiVKtOOg7GSZbBYdg7szU8+6ro08xvRpNEjixvoAcoPBw==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:53.680532Z","signed_message":"canonical_sha256_bytes"},"source_id":"2405.11143","source_kind":"arxiv","source_version":6}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e9e4e4891c615186dce26641137f8cbb474f17e0cc6cae626d511f08cc33be76","sha256:f8d3cd6d2ffca7f73e2ca3cce504511d78d221e0f04126cfe7bf0aada766b531"],"state_sha256":"0cebf9ba7632c2e502ac42389a90c612bd60e27e987fa0df494664e5d066e433"}