{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:AVSLDM5H3KRS72I62DHTKBDIN6","short_pith_number":"pith:AVSLDM5H","canonical_record":{"source":{"id":"2506.10054","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-11T17:58:05Z","cross_cats_sorted":["cs.AI","cs.CL","cs.CV"],"title_canon_sha256":"0d7e264a8cb173ce0801564e1ddca0b6ac3700cb74acb268707f5417b3c50536","abstract_canon_sha256":"601eabd7018136ec2c38389db11aaf2dcc16c3eb41f0f1c0cebf2bb219d26a44"},"schema_version":"1.0"},"canonical_sha256":"0564b1b3a7daa32fe91ed0cf3504686fbd513c8b082ce6c8c0cf546456923eab","source":{"kind":"arxiv","id":"2506.10054","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.10054","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"arxiv_version","alias_value":"2506.10054v4","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.10054","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"pith_short_12","alias_value":"AVSLDM5H3KRS","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"pith_short_16","alias_value":"AVSLDM5H3KRS72I6","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"pith_short_8","alias_value":"AVSLDM5H","created_at":"2026-05-26T01:03:13Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:AVSLDM5H3KRS72I62DHTKBDIN6","target":"record","payload":{"canonical_record":{"source":{"id":"2506.10054","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-11T17:58:05Z","cross_cats_sorted":["cs.AI","cs.CL","cs.CV"],"title_canon_sha256":"0d7e264a8cb173ce0801564e1ddca0b6ac3700cb74acb268707f5417b3c50536","abstract_canon_sha256":"601eabd7018136ec2c38389db11aaf2dcc16c3eb41f0f1c0cebf2bb219d26a44"},"schema_version":"1.0"},"canonical_sha256":"0564b1b3a7daa32fe91ed0cf3504686fbd513c8b082ce6c8c0cf546456923eab","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:13.097690Z","signature_b64":"HuDHEUyeSX4Baomqftwk4k8bggAnzfdyRJUSgzxePxlVrNgS7vw0IorDYXchCopz51M73jqwgYHk/P5TeGNnCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0564b1b3a7daa32fe91ed0cf3504686fbd513c8b082ce6c8c0cf546456923eab","last_reissued_at":"2026-05-26T01:03:13.097119Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:13.097119Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2506.10054","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dzhzwTkyJygn8sYh9aMBR9DN9V46uG9lhAV491BBX+HOYDYUddB60dDLsrjJrEmQE1CeneNdi888Qn1mVQjQAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:10:12.620957Z"},"content_sha256":"d7ec04addbceb54291c15dcf8169f63bf4d8e0a1206e8fd792408fb57a442885","schema_version":"1.0","event_id":"sha256:d7ec04addbceb54291c15dcf8169f63bf4d8e0a1206e8fd792408fb57a442885"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:AVSLDM5H3KRS72I62DHTKBDIN6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Uni-DPO: A Unified Paradigm for Dynamic Preference Optimization of LLMs","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":["cs.AI","cs.CL","cs.CV"],"primary_cat":"cs.LG","authors_text":"Baotian Hu, Chengquan Zhang, Haotian Xu, Min Zhang, Senqiao Yang, Shangpin Peng, Takashi Isobe, Weinong Wang, Xing Wu, Zhuotao Tian","submitted_at":"2025-06-11T17:58:05Z","abstract_excerpt":"Direct Preference Optimization (DPO) has emerged as a cornerstone of reinforcement learning from human feedback (RLHF) due to its simplicity and efficiency. However, existing DPO-based methods typically treat all preference pairs equally, overlooking substantial variations in data quality and learning difficulty, which leads to inefficient data utilization and suboptimal performance. To address this limitation, we propose Uni-DPO, a unified dynamic preference optimization framework that jointly considers (a) the inherent quality of preference pairs and (b) the model's evolving performance duri"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.10054","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2506.10054/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wgubGrjDyBTlobtW/0KFhjA19WgtSd3q/Hk6o+RwQK4A+DIgvrYfUSOJ0f6dB240umeOKx2zlB+O9jRKiQOuAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T03:10:12.621352Z"},"content_sha256":"8145b07bab7ee03ac95c27da9bf7a31343e4b976cd98c6fa1004a66c9ac4b8e0","schema_version":"1.0","event_id":"sha256:8145b07bab7ee03ac95c27da9bf7a31343e4b976cd98c6fa1004a66c9ac4b8e0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AVSLDM5H3KRS72I62DHTKBDIN6/bundle.json","state_url":"https://pith.science/pith/AVSLDM5H3KRS72I62DHTKBDIN6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AVSLDM5H3KRS72I62DHTKBDIN6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T03:10:12Z","links":{"resolver":"https://pith.science/pith/AVSLDM5H3KRS72I62DHTKBDIN6","bundle":"https://pith.science/pith/AVSLDM5H3KRS72I62DHTKBDIN6/bundle.json","state":"https://pith.science/pith/AVSLDM5H3KRS72I62DHTKBDIN6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AVSLDM5H3KRS72I62DHTKBDIN6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:AVSLDM5H3KRS72I62DHTKBDIN6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"601eabd7018136ec2c38389db11aaf2dcc16c3eb41f0f1c0cebf2bb219d26a44","cross_cats_sorted":["cs.AI","cs.CL","cs.CV"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-11T17:58:05Z","title_canon_sha256":"0d7e264a8cb173ce0801564e1ddca0b6ac3700cb74acb268707f5417b3c50536"},"schema_version":"1.0","source":{"id":"2506.10054","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2506.10054","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"arxiv_version","alias_value":"2506.10054v4","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.10054","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"pith_short_12","alias_value":"AVSLDM5H3KRS","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"pith_short_16","alias_value":"AVSLDM5H3KRS72I6","created_at":"2026-05-26T01:03:13Z"},{"alias_kind":"pith_short_8","alias_value":"AVSLDM5H","created_at":"2026-05-26T01:03:13Z"}],"graph_snapshots":[{"event_id":"sha256:8145b07bab7ee03ac95c27da9bf7a31343e4b976cd98c6fa1004a66c9ac4b8e0","target":"graph","created_at":"2026-05-26T01:03:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2506.10054/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Direct Preference Optimization (DPO) has emerged as a cornerstone of reinforcement learning from human feedback (RLHF) due to its simplicity and efficiency. However, existing DPO-based methods typically treat all preference pairs equally, overlooking substantial variations in data quality and learning difficulty, which leads to inefficient data utilization and suboptimal performance. To address this limitation, we propose Uni-DPO, a unified dynamic preference optimization framework that jointly considers (a) the inherent quality of preference pairs and (b) the model's evolving performance duri","authors_text":"Baotian Hu, Chengquan Zhang, Haotian Xu, Min Zhang, Senqiao Yang, Shangpin Peng, Takashi Isobe, Weinong Wang, Xing Wu, Zhuotao Tian","cross_cats":["cs.AI","cs.CL","cs.CV"],"headline":"","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-11T17:58:05Z","title":"Uni-DPO: A Unified Paradigm for Dynamic Preference Optimization of LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.10054","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d7ec04addbceb54291c15dcf8169f63bf4d8e0a1206e8fd792408fb57a442885","target":"record","created_at":"2026-05-26T01:03:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"601eabd7018136ec2c38389db11aaf2dcc16c3eb41f0f1c0cebf2bb219d26a44","cross_cats_sorted":["cs.AI","cs.CL","cs.CV"],"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.LG","submitted_at":"2025-06-11T17:58:05Z","title_canon_sha256":"0d7e264a8cb173ce0801564e1ddca0b6ac3700cb74acb268707f5417b3c50536"},"schema_version":"1.0","source":{"id":"2506.10054","kind":"arxiv","version":4}},"canonical_sha256":"0564b1b3a7daa32fe91ed0cf3504686fbd513c8b082ce6c8c0cf546456923eab","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0564b1b3a7daa32fe91ed0cf3504686fbd513c8b082ce6c8c0cf546456923eab","first_computed_at":"2026-05-26T01:03:13.097119Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:13.097119Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"HuDHEUyeSX4Baomqftwk4k8bggAnzfdyRJUSgzxePxlVrNgS7vw0IorDYXchCopz51M73jqwgYHk/P5TeGNnCg==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:13.097690Z","signed_message":"canonical_sha256_bytes"},"source_id":"2506.10054","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d7ec04addbceb54291c15dcf8169f63bf4d8e0a1206e8fd792408fb57a442885","sha256:8145b07bab7ee03ac95c27da9bf7a31343e4b976cd98c6fa1004a66c9ac4b8e0"],"state_sha256":"557c5ffa48cec68009a07d36f424ead496fae70e82d595e81309849c271cfdda"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hh+GFOQ2QOKNevM3wUZIlIYb5WbrDtjit4ff3YuvKaR443qxTH1y+XHu7rzDjwGTDGbS/n0HEvo6EgLU5E8tDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T03:10:12.623826Z","bundle_sha256":"d117bfb8cfb72f8d4506897d9c4be6af2fe6b3cb3afb3da58c65d717a7d3b5c2"}}