{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:QYVVT3Y4YA7KOZ6XUTN2WGNMWE","short_pith_number":"pith:QYVVT3Y4","canonical_record":{"source":{"id":"2605.20834","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T07:26:22Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"48cf0056f73e3c4e0656a89800ade81c3726e3e9f4fb868b3cef56ffcc0cdd36","abstract_canon_sha256":"d7ace5246ce47d63861cae66addee02f4ebcd9240992adc10f6922c5b9ed5523"},"schema_version":"1.0"},"canonical_sha256":"862b59ef1cc03ea767d7a4dbab19acb12769f1117f90d7c8263830628914f982","source":{"kind":"arxiv","id":"2605.20834","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20834","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20834v1","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20834","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"pith_short_12","alias_value":"QYVVT3Y4YA7K","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"pith_short_16","alias_value":"QYVVT3Y4YA7KOZ6X","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"pith_short_8","alias_value":"QYVVT3Y4","created_at":"2026-05-21T01:05:23Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:QYVVT3Y4YA7KOZ6XUTN2WGNMWE","target":"record","payload":{"canonical_record":{"source":{"id":"2605.20834","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T07:26:22Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"48cf0056f73e3c4e0656a89800ade81c3726e3e9f4fb868b3cef56ffcc0cdd36","abstract_canon_sha256":"d7ace5246ce47d63861cae66addee02f4ebcd9240992adc10f6922c5b9ed5523"},"schema_version":"1.0"},"canonical_sha256":"862b59ef1cc03ea767d7a4dbab19acb12769f1117f90d7c8263830628914f982","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:23.736050Z","signature_b64":"QkF4eHqmaRv+cnPy/nGuAh4oCoYGaTGBSHSOBsV07DT7W9wX4ngwbKU7G8zKcygJFQFwtZHRF2o47PofKpZ7CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"862b59ef1cc03ea767d7a4dbab19acb12769f1117f90d7c8263830628914f982","last_reissued_at":"2026-05-21T01:05:23.735349Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:23.735349Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.20834","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"brYzeAPv2J4OjIZjoNxOC2vb68Thy2kludaqo1CYvHysrcOWEpG2nFYVaukxaht0B+ABq5SBGF0qyQmcxirkCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T12:17:36.020797Z"},"content_sha256":"4c2be0c35ee0cfdeb0f19ee345e637a51b134875b6d997b09ce2e40b2b02259f","schema_version":"1.0","event_id":"sha256:4c2be0c35ee0cfdeb0f19ee345e637a51b134875b6d997b09ce2e40b2b02259f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:QYVVT3Y4YA7KOZ6XUTN2WGNMWE","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Conditional Equivalence of DPO and RLHF: Implicit Assumption, Failure Modes, and Provable Alignment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Bo Han, Dong Fang, Wei Xue, Yike Guo, Yonggang Zhang, Zhiqin Yang","submitted_at":"2026-05-20T07:26:22Z","abstract_excerpt":"Direct Preference Optimization (DPO) has emerged as a popular alternative to Reinforcement Learning from Human Feedback (RLHF), offering theoretical equivalence with simpler implementation. We prove this equivalence is conditional rather than universal, depending on an implicit assumption frequently violated in practice: the RLHF-optimal policy must prefer human-preferred responses. When this assumption fails, DPO optimizes relative advantage over the reference policy rather than absolute alignment with human preferences, leading to pathological convergence where policies decrease DPO loss whi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20834","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20834/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hBoshkKO7TX8039LKtXDjDAPbKK5BvmQZL1mvVJv2MVcbA1Bevkh/y66JLpoAD2Is68SNa9Diey9Qi15hSzfBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-24T12:17:36.021229Z"},"content_sha256":"2780dc611619ded90f423626cec143854b0574101b612074c60f1ed0299c32f4","schema_version":"1.0","event_id":"sha256:2780dc611619ded90f423626cec143854b0574101b612074c60f1ed0299c32f4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE/bundle.json","state_url":"https://pith.science/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-24T12:17:36Z","links":{"resolver":"https://pith.science/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE","bundle":"https://pith.science/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE/bundle.json","state":"https://pith.science/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QYVVT3Y4YA7KOZ6XUTN2WGNMWE/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:QYVVT3Y4YA7KOZ6XUTN2WGNMWE","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d7ace5246ce47d63861cae66addee02f4ebcd9240992adc10f6922c5b9ed5523","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T07:26:22Z","title_canon_sha256":"48cf0056f73e3c4e0656a89800ade81c3726e3e9f4fb868b3cef56ffcc0cdd36"},"schema_version":"1.0","source":{"id":"2605.20834","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20834","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20834v1","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20834","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"pith_short_12","alias_value":"QYVVT3Y4YA7K","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"pith_short_16","alias_value":"QYVVT3Y4YA7KOZ6X","created_at":"2026-05-21T01:05:23Z"},{"alias_kind":"pith_short_8","alias_value":"QYVVT3Y4","created_at":"2026-05-21T01:05:23Z"}],"graph_snapshots":[{"event_id":"sha256:2780dc611619ded90f423626cec143854b0574101b612074c60f1ed0299c32f4","target":"graph","created_at":"2026-05-21T01:05:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.20834/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Direct Preference Optimization (DPO) has emerged as a popular alternative to Reinforcement Learning from Human Feedback (RLHF), offering theoretical equivalence with simpler implementation. We prove this equivalence is conditional rather than universal, depending on an implicit assumption frequently violated in practice: the RLHF-optimal policy must prefer human-preferred responses. When this assumption fails, DPO optimizes relative advantage over the reference policy rather than absolute alignment with human preferences, leading to pathological convergence where policies decrease DPO loss whi","authors_text":"Bo Han, Dong Fang, Wei Xue, Yike Guo, Yonggang Zhang, Zhiqin Yang","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T07:26:22Z","title":"Conditional Equivalence of DPO and RLHF: Implicit Assumption, Failure Modes, and Provable Alignment"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20834","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4c2be0c35ee0cfdeb0f19ee345e637a51b134875b6d997b09ce2e40b2b02259f","target":"record","created_at":"2026-05-21T01:05:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d7ace5246ce47d63861cae66addee02f4ebcd9240992adc10f6922c5b9ed5523","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-20T07:26:22Z","title_canon_sha256":"48cf0056f73e3c4e0656a89800ade81c3726e3e9f4fb868b3cef56ffcc0cdd36"},"schema_version":"1.0","source":{"id":"2605.20834","kind":"arxiv","version":1}},"canonical_sha256":"862b59ef1cc03ea767d7a4dbab19acb12769f1117f90d7c8263830628914f982","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"862b59ef1cc03ea767d7a4dbab19acb12769f1117f90d7c8263830628914f982","first_computed_at":"2026-05-21T01:05:23.735349Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:05:23.735349Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"QkF4eHqmaRv+cnPy/nGuAh4oCoYGaTGBSHSOBsV07DT7W9wX4ngwbKU7G8zKcygJFQFwtZHRF2o47PofKpZ7CA==","signature_status":"signed_v1","signed_at":"2026-05-21T01:05:23.736050Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.20834","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4c2be0c35ee0cfdeb0f19ee345e637a51b134875b6d997b09ce2e40b2b02259f","sha256:2780dc611619ded90f423626cec143854b0574101b612074c60f1ed0299c32f4"],"state_sha256":"28025eaba27ee1533bc6d80ac58c957ba369d6a66874852de333bb6e982e6883"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OWDCaLILsv2YNqGXug2N40H9BN84+pSOPOk3HKKS+bvAYkUUgKDrZbVEXEC246grQILJypBf7Orm/0Wohh0wAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-24T12:17:36.024369Z","bundle_sha256":"c28f8f0b894236d789604c831290938e1e7621094a6207c1fbee171130c0020a"}}