{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:3T7I6SRLYYQSMS2HDBIITTL7EY","short_pith_number":"pith:3T7I6SRL","canonical_record":{"source":{"id":"2310.01377","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2023-10-02T17:40:01Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"b7b8be285286f3dd7d47544a7033add9fc57876b36c4cf43b92d8ac8f1cd2f66","abstract_canon_sha256":"1d36aa47da97202909f564bcf2fd99c5f68f7c70f1de301a52bfcd55c832cdff"},"schema_version":"1.0"},"canonical_sha256":"dcfe8f4a2bc621264b47185089cd7f26248b30c7e9609908cb419894e397dff4","source":{"kind":"arxiv","id":"2310.01377","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2310.01377","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"arxiv_version","alias_value":"2310.01377v2","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2310.01377","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"pith_short_12","alias_value":"3T7I6SRLYYQS","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"3T7I6SRLYYQSMS2H","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"3T7I6SRL","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:3T7I6SRLYYQSMS2HDBIITTL7EY","target":"record","payload":{"canonical_record":{"source":{"id":"2310.01377","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2023-10-02T17:40:01Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"b7b8be285286f3dd7d47544a7033add9fc57876b36c4cf43b92d8ac8f1cd2f66","abstract_canon_sha256":"1d36aa47da97202909f564bcf2fd99c5f68f7c70f1de301a52bfcd55c832cdff"},"schema_version":"1.0"},"canonical_sha256":"dcfe8f4a2bc621264b47185089cd7f26248b30c7e9609908cb419894e397dff4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:13.587095Z","signature_b64":"049o1knKGsFqThP+wGkEo0Fq76L3edqexuZ5UYUOAEePmNJ0iEctiq3GzGa4fWqvQO+MvR5HFDw/X1ALYa6NDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dcfe8f4a2bc621264b47185089cd7f26248b30c7e9609908cb419894e397dff4","last_reissued_at":"2026-05-17T23:38:13.586464Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:13.586464Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2310.01377","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4VMNzykGJfE0TOAKjpwL+P8A0Vvp2d02fS1WjMetA13M9/aDR9ChCZWEyddb8FgtK/zIxFd6VitrMDdc1phbDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T18:47:47.044361Z"},"content_sha256":"dbd1f8bf03f353c722692457c917bc4e907b547cfd0de211157137d1208bb4ec","schema_version":"1.0","event_id":"sha256:dbd1f8bf03f353c722692457c917bc4e907b547cfd0de211157137d1208bb4ec"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:3T7I6SRLYYQSMS2HDBIITTL7EY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"UltraFeedback: Boosting Language Models with Scaled AI Feedback","license":"http://creativecommons.org/licenses/by-sa/4.0/","headline":"A dataset of over one million GPT-4 feedbacks enables effective alignment of LLaMA-based chat models.","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Bingxiang He, Ganqu Cui, Guanming Yao, Guotong Xie, Lifan Yuan, Maosong Sun, Ning Ding, Ruobing Xie, Wei Zhu, Yankai Lin, Yuan Ni, Zhiyuan Liu","submitted_at":"2023-10-02T17:40:01Z","abstract_excerpt":"Learning from human feedback has become a pivot technique in aligning large language models (LLMs) with human preferences. However, acquiring vast and premium human feedback is bottlenecked by time, labor, and human capability, resulting in small sizes or limited topics of current datasets. This further hinders feedback learning as well as alignment research within the open-source community. To address this issue, we explore how to go beyond human feedback and collect high-quality \\textit{AI feedback} automatically for a scalable alternative. Specifically, we identify \\textbf{scale and diversi"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Built upon UltraFeedback, we align a LLaMA-based model by best-of-n sampling and reinforcement learning, demonstrating its exceptional performance on chat benchmarks.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the series of techniques applied to mitigate annotation biases in GPT-4 feedback produces sufficiently reliable and unbiased signals for effective model alignment.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"UltraFeedback is a large-scale AI feedback dataset that enables effective alignment of open-source language models, yielding strong results on chat benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A dataset of over one million GPT-4 feedbacks enables effective alignment of LLaMA-based chat models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"651a10cc1350ba03067efd47bcb55e9e95127e7d55fc592ec68511628725edf1"},"source":{"id":"2310.01377","kind":"arxiv","version":2},"verdict":{"id":"252a8e97-63b0-4ad7-9667-1cd978ace386","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-17T16:26:16.522516Z","strongest_claim":"Built upon UltraFeedback, we align a LLaMA-based model by best-of-n sampling and reinforcement learning, demonstrating its exceptional performance on chat benchmarks.","one_line_summary":"UltraFeedback is a large-scale AI feedback dataset that enables effective alignment of open-source language models, yielding strong results on chat benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the series of techniques applied to mitigate annotation biases in GPT-4 feedback produces sufficiently reliable and unbiased signals for effective model alignment.","pith_extraction_headline":"A dataset of over one million GPT-4 feedbacks enables effective alignment of LLaMA-based chat models."},"references":{"count":14,"sample":[{"doi":"10.5281/zenodo.5371628","year":2021,"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","ref_index":1,"cited_arxiv_id":"2107.03374","is_internal_anchor":true},{"doi":"10.18653/v1/","year":2023,"title":"doi: 10.18653/v1/ 2024.findings-acl.586","work_id":"8d675bdd-79ca-48d6-9163-fc17ce0e8ece","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.48550/arxiv","year":2022,"title":"Self-critiquing models for assisting human evaluators","work_id":"3fcefdd1-22ab-4648-a683-cb1555e7a50e","ref_index":3,"cited_arxiv_id":"2206.05802","is_internal_anchor":true},{"doi":"","year":null,"title":"This may be particularly helpful if you have a busy schedule and may not have time to take them later in the day","work_id":"3761cc93-810c-498a-b8f7-6fbb54a50451","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Taking a vitamin D supplement after spending time outdoors can help boost your levels and ensure you’re getting enough","work_id":"dad4fd18-cbc0-46a0-866d-afcab590a1a9","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":14,"snapshot_sha256":"576c9171a7604250df5469674777be4ed6c66a9eead0820ce47ec4fd283f263d","internal_anchors":2},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d3e53c5bd066182d0d71c7229b7c10558aaf01828949e86336fb1134216b3905"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"252a8e97-63b0-4ad7-9667-1cd978ace386"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:38:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1FihvPwU1GpYGVrNjfez/dGv3vzRpOwyQyNLnhimCFtWZT7qIggqA+15ybYfXwaHqKuj1Vg0jgU/5yeP3BaZBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-18T18:47:47.045489Z"},"content_sha256":"03b634c9e0c7f64ccc52aa24fd9cf4e8f507f3f329f4684c549405c1972667a5","schema_version":"1.0","event_id":"sha256:03b634c9e0c7f64ccc52aa24fd9cf4e8f507f3f329f4684c549405c1972667a5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3T7I6SRLYYQSMS2HDBIITTL7EY/bundle.json","state_url":"https://pith.science/pith/3T7I6SRLYYQSMS2HDBIITTL7EY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3T7I6SRLYYQSMS2HDBIITTL7EY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-18T18:47:47Z","links":{"resolver":"https://pith.science/pith/3T7I6SRLYYQSMS2HDBIITTL7EY","bundle":"https://pith.science/pith/3T7I6SRLYYQSMS2HDBIITTL7EY/bundle.json","state":"https://pith.science/pith/3T7I6SRLYYQSMS2HDBIITTL7EY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3T7I6SRLYYQSMS2HDBIITTL7EY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:3T7I6SRLYYQSMS2HDBIITTL7EY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1d36aa47da97202909f564bcf2fd99c5f68f7c70f1de301a52bfcd55c832cdff","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2023-10-02T17:40:01Z","title_canon_sha256":"b7b8be285286f3dd7d47544a7033add9fc57876b36c4cf43b92d8ac8f1cd2f66"},"schema_version":"1.0","source":{"id":"2310.01377","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2310.01377","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"arxiv_version","alias_value":"2310.01377v2","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2310.01377","created_at":"2026-05-17T23:38:13Z"},{"alias_kind":"pith_short_12","alias_value":"3T7I6SRLYYQS","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"3T7I6SRLYYQSMS2H","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"3T7I6SRL","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:03b634c9e0c7f64ccc52aa24fd9cf4e8f507f3f329f4684c549405c1972667a5","target":"graph","created_at":"2026-05-17T23:38:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Built upon UltraFeedback, we align a LLaMA-based model by best-of-n sampling and reinforcement learning, demonstrating its exceptional performance on chat benchmarks."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the series of techniques applied to mitigate annotation biases in GPT-4 feedback produces sufficiently reliable and unbiased signals for effective model alignment."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"UltraFeedback is a large-scale AI feedback dataset that enables effective alignment of open-source language models, yielding strong results on chat benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"A dataset of over one million GPT-4 feedbacks enables effective alignment of LLaMA-based chat models."}],"snapshot_sha256":"651a10cc1350ba03067efd47bcb55e9e95127e7d55fc592ec68511628725edf1"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"d3e53c5bd066182d0d71c7229b7c10558aaf01828949e86336fb1134216b3905"},"paper":{"abstract_excerpt":"Learning from human feedback has become a pivot technique in aligning large language models (LLMs) with human preferences. However, acquiring vast and premium human feedback is bottlenecked by time, labor, and human capability, resulting in small sizes or limited topics of current datasets. This further hinders feedback learning as well as alignment research within the open-source community. To address this issue, we explore how to go beyond human feedback and collect high-quality \\textit{AI feedback} automatically for a scalable alternative. Specifically, we identify \\textbf{scale and diversi","authors_text":"Bingxiang He, Ganqu Cui, Guanming Yao, Guotong Xie, Lifan Yuan, Maosong Sun, Ning Ding, Ruobing Xie, Wei Zhu, Yankai Lin, Yuan Ni, Zhiyuan Liu","cross_cats":["cs.AI","cs.LG"],"headline":"A dataset of over one million GPT-4 feedbacks enables effective alignment of LLaMA-based chat models.","license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2023-10-02T17:40:01Z","title":"UltraFeedback: Boosting Language Models with Scaled AI Feedback"},"references":{"count":14,"internal_anchors":2,"resolved_work":14,"sample":[{"cited_arxiv_id":"2107.03374","doi":"10.5281/zenodo.5371628","is_internal_anchor":true,"ref_index":1,"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","year":2021},{"cited_arxiv_id":"","doi":"10.18653/v1/","is_internal_anchor":false,"ref_index":2,"title":"doi: 10.18653/v1/ 2024.findings-acl.586","work_id":"8d675bdd-79ca-48d6-9163-fc17ce0e8ece","year":2023},{"cited_arxiv_id":"2206.05802","doi":"10.48550/arxiv","is_internal_anchor":true,"ref_index":3,"title":"Self-critiquing models for assisting human evaluators","work_id":"3fcefdd1-22ab-4648-a683-cb1555e7a50e","year":2022},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"This may be particularly helpful if you have a busy schedule and may not have time to take them later in the day","work_id":"3761cc93-810c-498a-b8f7-6fbb54a50451","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Taking a vitamin D supplement after spending time outdoors can help boost your levels and ensure you’re getting enough","work_id":"dad4fd18-cbc0-46a0-866d-afcab590a1a9","year":null}],"snapshot_sha256":"576c9171a7604250df5469674777be4ed6c66a9eead0820ce47ec4fd283f263d"},"source":{"id":"2310.01377","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-17T16:26:16.522516Z","id":"252a8e97-63b0-4ad7-9667-1cd978ace386","model_set":{"reader":"grok-4.3"},"one_line_summary":"UltraFeedback is a large-scale AI feedback dataset that enables effective alignment of open-source language models, yielding strong results on chat benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"A dataset of over one million GPT-4 feedbacks enables effective alignment of LLaMA-based chat models.","strongest_claim":"Built upon UltraFeedback, we align a LLaMA-based model by best-of-n sampling and reinforcement learning, demonstrating its exceptional performance on chat benchmarks.","weakest_assumption":"That the series of techniques applied to mitigate annotation biases in GPT-4 feedback produces sufficiently reliable and unbiased signals for effective model alignment."}},"verdict_id":"252a8e97-63b0-4ad7-9667-1cd978ace386"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dbd1f8bf03f353c722692457c917bc4e907b547cfd0de211157137d1208bb4ec","target":"record","created_at":"2026-05-17T23:38:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1d36aa47da97202909f564bcf2fd99c5f68f7c70f1de301a52bfcd55c832cdff","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2023-10-02T17:40:01Z","title_canon_sha256":"b7b8be285286f3dd7d47544a7033add9fc57876b36c4cf43b92d8ac8f1cd2f66"},"schema_version":"1.0","source":{"id":"2310.01377","kind":"arxiv","version":2}},"canonical_sha256":"dcfe8f4a2bc621264b47185089cd7f26248b30c7e9609908cb419894e397dff4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"dcfe8f4a2bc621264b47185089cd7f26248b30c7e9609908cb419894e397dff4","first_computed_at":"2026-05-17T23:38:13.586464Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:38:13.586464Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"049o1knKGsFqThP+wGkEo0Fq76L3edqexuZ5UYUOAEePmNJ0iEctiq3GzGa4fWqvQO+MvR5HFDw/X1ALYa6NDA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:38:13.587095Z","signed_message":"canonical_sha256_bytes"},"source_id":"2310.01377","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dbd1f8bf03f353c722692457c917bc4e907b547cfd0de211157137d1208bb4ec","sha256:03b634c9e0c7f64ccc52aa24fd9cf4e8f507f3f329f4684c549405c1972667a5"],"state_sha256":"52faff825b259e72ef345e1f5c2566cf3544cd4707b8ebff7da42a53dda8cf58"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xx2yVIXkQY/v49pnwwGsZUTbVuqOUbVGWjQ76oTBaVqWPAbkV0KpUJ6u8HsaZgaQ/dAhe286pCfmE7sMhZtQDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-18T18:47:47.048461Z","bundle_sha256":"f73c71d3c8b32648caf34f629c5fb80b54b6ba9ac1074a8576ed5d4226b27741"}}