{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:DJ6ELZMMJUFB5ZLDJPX4S6SELX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d95d58d71157c02b45df7556157c0442ae8cff37b423c35a2660340a50400e4a","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-09-02T19:54:41Z","title_canon_sha256":"3627b39756a6f7cee97efa10f330ffbd3a6ae0b730e4c9f75daadbc2713d22ce"},"schema_version":"1.0","source":{"id":"2009.01325","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2009.01325","created_at":"2026-05-18T01:40:46Z"},{"alias_kind":"arxiv_version","alias_value":"2009.01325v3","created_at":"2026-05-18T01:40:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2009.01325","created_at":"2026-05-18T01:40:46Z"},{"alias_kind":"pith_short_12","alias_value":"DJ6ELZMMJUFB","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"DJ6ELZMMJUFB5ZLD","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"DJ6ELZMM","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:dde93fca4245659511e7d7cb2695f22780e5f6b7afcd45417f762912cc00061c","target":"graph","created_at":"2026-05-18T01:40:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"As language models become more powerful, training and evaluation are increasingly bottlenecked by the data and metrics used for a particular task. For example, summarization models are often trained to predict human reference summaries and evaluated using ROUGE, but both of these metrics are rough proxies for what we really care about -- summary quality. In this work, we show that it is possible to significantly improve summary quality by training a model to optimize for human preferences. We collect a large, high-quality dataset of human comparisons between summaries, train a model to predict","authors_text":"Alec Radford, Chelsea Voss, Daniel M. Ziegler, Dario Amodei, Jeff Wu, Long Ouyang, Nisan Stiennon, Paul Christiano, Ryan Lowe","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-09-02T19:54:41Z","title":"Learning to summarize from human feedback"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2009.01325","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3910c23f8739aef63928a19d1d80f8d30b3afe88f0ef0bd05fe19b55b2cba517","target":"record","created_at":"2026-05-18T01:40:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d95d58d71157c02b45df7556157c0442ae8cff37b423c35a2660340a50400e4a","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2020-09-02T19:54:41Z","title_canon_sha256":"3627b39756a6f7cee97efa10f330ffbd3a6ae0b730e4c9f75daadbc2713d22ce"},"schema_version":"1.0","source":{"id":"2009.01325","kind":"arxiv","version":3}},"canonical_sha256":"1a7c45e58c4d0a1ee5634befc97a445de59ec49bd499c44667a23eb7b524eaa0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1a7c45e58c4d0a1ee5634befc97a445de59ec49bd499c44667a23eb7b524eaa0","first_computed_at":"2026-05-18T01:40:46.510846Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:40:46.510846Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8fRwmf558LUtCoDTFBxQpm0nrRc23taEzVSioZR8qfBOU5GZ9C7VsR81yIAVkdoRXxk+ukRyOfTPH9YcSQROCg==","signature_status":"signed_v1","signed_at":"2026-05-18T01:40:46.511579Z","signed_message":"canonical_sha256_bytes"},"source_id":"2009.01325","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3910c23f8739aef63928a19d1d80f8d30b3afe88f0ef0bd05fe19b55b2cba517","sha256:dde93fca4245659511e7d7cb2695f22780e5f6b7afcd45417f762912cc00061c"],"state_sha256":"db4e16fab38d55467b4049f1c3f33a5ebce64f776fb365ed6c9ba8481bce512b"}