{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:CZF5FQS5LN7UB7ZNTXOOOVJZHY","short_pith_number":"pith:CZF5FQS5","canonical_record":{"source":{"id":"2512.00778","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-11-30T08:27:59Z","cross_cats_sorted":[],"title_canon_sha256":"484e4705e58a8141dd6c83d956e894a2e4dfaed184896d56a26756771aaa94ed","abstract_canon_sha256":"4c702ee26d076bb2c2d0231385a77a64bfac6c3db6d0a366123eb311a2e2a796"},"schema_version":"1.0"},"canonical_sha256":"164bd2c25d5b7f40ff2d9ddce755393e13871139542bb6321c4781c3509bec1d","source":{"kind":"arxiv","id":"2512.00778","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.00778","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"arxiv_version","alias_value":"2512.00778v2","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.00778","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"pith_short_12","alias_value":"CZF5FQS5LN7U","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"pith_short_16","alias_value":"CZF5FQS5LN7UB7ZN","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"pith_short_8","alias_value":"CZF5FQS5","created_at":"2026-05-20T00:00:29Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:CZF5FQS5LN7UB7ZNTXOOOVJZHY","target":"record","payload":{"canonical_record":{"source":{"id":"2512.00778","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-11-30T08:27:59Z","cross_cats_sorted":[],"title_canon_sha256":"484e4705e58a8141dd6c83d956e894a2e4dfaed184896d56a26756771aaa94ed","abstract_canon_sha256":"4c702ee26d076bb2c2d0231385a77a64bfac6c3db6d0a366123eb311a2e2a796"},"schema_version":"1.0"},"canonical_sha256":"164bd2c25d5b7f40ff2d9ddce755393e13871139542bb6321c4781c3509bec1d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:29.086029Z","signature_b64":"fGJh8u05xQNLopY6Xc29boUHzr+HV0HB0kTXu4+aPTqRef+XcC7V2I/UyAI6o9vXjWRbFT75sb/chklPczOACQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"164bd2c25d5b7f40ff2d9ddce755393e13871139542bb6321c4781c3509bec1d","last_reissued_at":"2026-05-20T00:00:29.085323Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:29.085323Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2512.00778","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"nu6IOPPBrvD+f7HAtnnlIC8vhFuoiJf1xB35WODTUlGvzPqfCXUeSUftc/R3f9bONQYj7N1ft+3YE8RGhFqpDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T00:24:13.789063Z"},"content_sha256":"d66fb28247306dc6455399bf1ddf6e608f4b402f1d2d7c574372e774fa231d2d","schema_version":"1.0","event_id":"sha256:d66fb28247306dc6455399bf1ddf6e608f4b402f1d2d7c574372e774fa231d2d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:CZF5FQS5LN7UB7ZNTXOOOVJZHY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"What Is Preference Optimization Doing, and Why?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bo Han, Gang Niu, Masashi Sugiyama, Qizhou Wang, Yue Wang, Zizhuo Zhang","submitted_at":"2025-11-30T08:27:59Z","abstract_excerpt":"Preference optimization (PO) is indispensable for large language models (LLMs), with methods such as direct preference optimization (DPO) and proximal policy optimization (PPO) achieving great success. A common belief is that DPO is supervised learning while PPO is reinforcement learning, yet deeper analyses for the reasons underlying these differences remain lacking. To fill this gap, we analyze their optimization dynamics, revealing distinct algorithmic behaviors and comprehending their underlying causes. First, we examine the target directions of gradient-based updates and find that DPO fol"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.00778","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.00778/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:00:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Gi98OIXw1p8ydHxAbV3MJyViN8ckdHhSqroxqaRITyfuffK8MuknDV86cah8XKp8YGh76BF2itvlERQuyTqdDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T00:24:13.789766Z"},"content_sha256":"573b782b0d711c75f74f009ba3d1eb2173a7ba55bef3af5c5aab6eaf104414c7","schema_version":"1.0","event_id":"sha256:573b782b0d711c75f74f009ba3d1eb2173a7ba55bef3af5c5aab6eaf104414c7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY/bundle.json","state_url":"https://pith.science/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T00:24:13Z","links":{"resolver":"https://pith.science/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY","bundle":"https://pith.science/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY/bundle.json","state":"https://pith.science/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CZF5FQS5LN7UB7ZNTXOOOVJZHY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:CZF5FQS5LN7UB7ZNTXOOOVJZHY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4c702ee26d076bb2c2d0231385a77a64bfac6c3db6d0a366123eb311a2e2a796","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-11-30T08:27:59Z","title_canon_sha256":"484e4705e58a8141dd6c83d956e894a2e4dfaed184896d56a26756771aaa94ed"},"schema_version":"1.0","source":{"id":"2512.00778","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.00778","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"arxiv_version","alias_value":"2512.00778v2","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.00778","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"pith_short_12","alias_value":"CZF5FQS5LN7U","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"pith_short_16","alias_value":"CZF5FQS5LN7UB7ZN","created_at":"2026-05-20T00:00:29Z"},{"alias_kind":"pith_short_8","alias_value":"CZF5FQS5","created_at":"2026-05-20T00:00:29Z"}],"graph_snapshots":[{"event_id":"sha256:573b782b0d711c75f74f009ba3d1eb2173a7ba55bef3af5c5aab6eaf104414c7","target":"graph","created_at":"2026-05-20T00:00:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.00778/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Preference optimization (PO) is indispensable for large language models (LLMs), with methods such as direct preference optimization (DPO) and proximal policy optimization (PPO) achieving great success. A common belief is that DPO is supervised learning while PPO is reinforcement learning, yet deeper analyses for the reasons underlying these differences remain lacking. To fill this gap, we analyze their optimization dynamics, revealing distinct algorithmic behaviors and comprehending their underlying causes. First, we examine the target directions of gradient-based updates and find that DPO fol","authors_text":"Bo Han, Gang Niu, Masashi Sugiyama, Qizhou Wang, Yue Wang, Zizhuo Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-11-30T08:27:59Z","title":"What Is Preference Optimization Doing, and Why?"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.00778","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d66fb28247306dc6455399bf1ddf6e608f4b402f1d2d7c574372e774fa231d2d","target":"record","created_at":"2026-05-20T00:00:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4c702ee26d076bb2c2d0231385a77a64bfac6c3db6d0a366123eb311a2e2a796","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-11-30T08:27:59Z","title_canon_sha256":"484e4705e58a8141dd6c83d956e894a2e4dfaed184896d56a26756771aaa94ed"},"schema_version":"1.0","source":{"id":"2512.00778","kind":"arxiv","version":2}},"canonical_sha256":"164bd2c25d5b7f40ff2d9ddce755393e13871139542bb6321c4781c3509bec1d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"164bd2c25d5b7f40ff2d9ddce755393e13871139542bb6321c4781c3509bec1d","first_computed_at":"2026-05-20T00:00:29.085323Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:00:29.085323Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"fGJh8u05xQNLopY6Xc29boUHzr+HV0HB0kTXu4+aPTqRef+XcC7V2I/UyAI6o9vXjWRbFT75sb/chklPczOACQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:00:29.086029Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.00778","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d66fb28247306dc6455399bf1ddf6e608f4b402f1d2d7c574372e774fa231d2d","sha256:573b782b0d711c75f74f009ba3d1eb2173a7ba55bef3af5c5aab6eaf104414c7"],"state_sha256":"ae3d559951342927e253a465f0bfcdbafd12e62a2316cc167632e53872070278"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/y8fmJ18pz6Bw+1vTJwaH4SgZ2BZWHnoqZp/p7So1+Rw265Qoc2OKrPCVC8JToMJMj9Z+PNwpoa9UDu2AShPBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T00:24:13.792862Z","bundle_sha256":"c046dd480c71f65f8b15761c8a3b0ab28365a4ab08bbf00e8131bd652cb28066"}}