{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:GP7AEOYHTMGSALX7VHS463TM72","short_pith_number":"pith:GP7AEOYH","canonical_record":{"source":{"id":"2506.01523","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-02T10:36:31Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"f5240f7c6f54432a1a08d31b2d3b1217f84ed7dfd045972cc624ed4e55d99eec","abstract_canon_sha256":"3f3992e6f4dc642189f1a3d9326f7266a28a150cdb030137b0b1c81a02d4c96a"},"schema_version":"1.0"},"canonical_sha256":"33fe023b079b0d202effa9e5cf6e6cfe9f720e7de63ca471777bd1fa77abeb88","source":{"kind":"arxiv","id":"2506.01523","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.01523","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"arxiv_version","alias_value":"2506.01523v2","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.01523","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"pith_short_12","alias_value":"GP7AEOYHTMGS","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"pith_short_16","alias_value":"GP7AEOYHTMGSALX7","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"pith_short_8","alias_value":"GP7AEOYH","created_at":"2026-05-20T00:04:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:GP7AEOYHTMGSALX7VHS463TM72","target":"record","payload":{"canonical_record":{"source":{"id":"2506.01523","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-02T10:36:31Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"f5240f7c6f54432a1a08d31b2d3b1217f84ed7dfd045972cc624ed4e55d99eec","abstract_canon_sha256":"3f3992e6f4dc642189f1a3d9326f7266a28a150cdb030137b0b1c81a02d4c96a"},"schema_version":"1.0"},"canonical_sha256":"33fe023b079b0d202effa9e5cf6e6cfe9f720e7de63ca471777bd1fa77abeb88","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:10.214211Z","signature_b64":"j0WCFqekaDLpX0nmKTMYVQbF6L2Gjf5TvKMmIw5cP5DLbjH81KeiMigfcmpgKF6rq3bMk+KRoJOeH/uYwM0ACw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"33fe023b079b0d202effa9e5cf6e6cfe9f720e7de63ca471777bd1fa77abeb88","last_reissued_at":"2026-05-20T00:04:10.213461Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:10.213461Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2506.01523","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1lBC7cA1XWdKTOCqNZgE1XdKl7Ikhmq94kenzZ/AuuhnlnVh2KflErSR5Kv2uSI55sVTEF6MblbDZ3FnlEq/AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T23:33:57.284716Z"},"content_sha256":"1dd95f97abd4e525ff857edda4803fcc2ce130a3caaee226634a453142ea9fa3","schema_version":"1.0","event_id":"sha256:1dd95f97abd4e525ff857edda4803fcc2ce130a3caaee226634a453142ea9fa3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:GP7AEOYHTMGSALX7VHS463TM72","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Beyond RLHF: A Unified Theoretical Framework of Alignment","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Jaewoong Cho, Jihun Yun, Jongha Jon Ryu, Jongho Park, Junhyuck Kim, Juno Kim, Kwang-Sung Jun","submitted_at":"2025-06-02T10:36:31Z","abstract_excerpt":"Alignment via reinforcement learning from human feedback (RLHF) has become the dominant paradigm for controlling the quality of outputs from large language models (LLMs). However, existing theories do not provide strong justification for the RLHF objective itself and do not allow comparisons of the guarantees between various methods because different methods are often analyzed under different frameworks. Toward a unified framework for alignment, we ask under what assumptions can we derive existing or new training objectives and obtain theoretical guarantees. To this end, we reframe alignment a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.01523","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2506.01523/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:10Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KuDOvGKW7RIOI9GNq2I20GeIHrjtliL7p+rRbe+fuZz4oand2LT/BVN/Gw+fiu2856X4K1ltYylUEEUzkFL1Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T23:33:57.285532Z"},"content_sha256":"b1c1824ce6f0e0d4ef1913dc5734d1347a3b3debd0c5749b1dd93cb3d19bdada","schema_version":"1.0","event_id":"sha256:b1c1824ce6f0e0d4ef1913dc5734d1347a3b3debd0c5749b1dd93cb3d19bdada"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GP7AEOYHTMGSALX7VHS463TM72/bundle.json","state_url":"https://pith.science/pith/GP7AEOYHTMGSALX7VHS463TM72/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GP7AEOYHTMGSALX7VHS463TM72/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T23:33:57Z","links":{"resolver":"https://pith.science/pith/GP7AEOYHTMGSALX7VHS463TM72","bundle":"https://pith.science/pith/GP7AEOYHTMGSALX7VHS463TM72/bundle.json","state":"https://pith.science/pith/GP7AEOYHTMGSALX7VHS463TM72/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GP7AEOYHTMGSALX7VHS463TM72/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:GP7AEOYHTMGSALX7VHS463TM72","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"3f3992e6f4dc642189f1a3d9326f7266a28a150cdb030137b0b1c81a02d4c96a","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-02T10:36:31Z","title_canon_sha256":"f5240f7c6f54432a1a08d31b2d3b1217f84ed7dfd045972cc624ed4e55d99eec"},"schema_version":"1.0","source":{"id":"2506.01523","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.01523","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"arxiv_version","alias_value":"2506.01523v2","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.01523","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"pith_short_12","alias_value":"GP7AEOYHTMGS","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"pith_short_16","alias_value":"GP7AEOYHTMGSALX7","created_at":"2026-05-20T00:04:10Z"},{"alias_kind":"pith_short_8","alias_value":"GP7AEOYH","created_at":"2026-05-20T00:04:10Z"}],"graph_snapshots":[{"event_id":"sha256:b1c1824ce6f0e0d4ef1913dc5734d1347a3b3debd0c5749b1dd93cb3d19bdada","target":"graph","created_at":"2026-05-20T00:04:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2506.01523/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Alignment via reinforcement learning from human feedback (RLHF) has become the dominant paradigm for controlling the quality of outputs from large language models (LLMs). However, existing theories do not provide strong justification for the RLHF objective itself and do not allow comparisons of the guarantees between various methods because different methods are often analyzed under different frameworks. Toward a unified framework for alignment, we ask under what assumptions can we derive existing or new training objectives and obtain theoretical guarantees. To this end, we reframe alignment a","authors_text":"Jaewoong Cho, Jihun Yun, Jongha Jon Ryu, Jongho Park, Junhyuck Kim, Juno Kim, Kwang-Sung Jun","cross_cats":["stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-02T10:36:31Z","title":"Beyond RLHF: A Unified Theoretical Framework of Alignment"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.01523","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1dd95f97abd4e525ff857edda4803fcc2ce130a3caaee226634a453142ea9fa3","target":"record","created_at":"2026-05-20T00:04:10Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"3f3992e6f4dc642189f1a3d9326f7266a28a150cdb030137b0b1c81a02d4c96a","cross_cats_sorted":["stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-02T10:36:31Z","title_canon_sha256":"f5240f7c6f54432a1a08d31b2d3b1217f84ed7dfd045972cc624ed4e55d99eec"},"schema_version":"1.0","source":{"id":"2506.01523","kind":"arxiv","version":2}},"canonical_sha256":"33fe023b079b0d202effa9e5cf6e6cfe9f720e7de63ca471777bd1fa77abeb88","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"33fe023b079b0d202effa9e5cf6e6cfe9f720e7de63ca471777bd1fa77abeb88","first_computed_at":"2026-05-20T00:04:10.213461Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:10.213461Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"j0WCFqekaDLpX0nmKTMYVQbF6L2Gjf5TvKMmIw5cP5DLbjH81KeiMigfcmpgKF6rq3bMk+KRoJOeH/uYwM0ACw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:10.214211Z","signed_message":"canonical_sha256_bytes"},"source_id":"2506.01523","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1dd95f97abd4e525ff857edda4803fcc2ce130a3caaee226634a453142ea9fa3","sha256:b1c1824ce6f0e0d4ef1913dc5734d1347a3b3debd0c5749b1dd93cb3d19bdada"],"state_sha256":"9611e58aa756900fac0d23e87543da85b24de5d1b5c324980570186be32fca70"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CGbtpeGeXQhbAO8kbQ+bLYb89RgKujl249i053921JDSuFdKWglibHR7aPY36nwR9JaB5tbcZ3FH1KTPsxDBBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T23:33:57.289981Z","bundle_sha256":"a97dbb088b0eac1c7678fc1257283157283f5d5d5dad373e6484c5c6f4e1d791"}}