{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:YWK22GQISMR233O3P7S43WC7K3","short_pith_number":"pith:YWK22GQI","canonical_record":{"source":{"id":"2605.18191","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-18T10:31:34Z","cross_cats_sorted":[],"title_canon_sha256":"70e192ab2b9e5fd4820a47355ec7a49b62a3c8cf54dae6db97fa6e95a70d26e8","abstract_canon_sha256":"7370964b11cabf5880b4225d456376bfc4fda497d504296176d56c27af6e9901"},"schema_version":"1.0"},"canonical_sha256":"c595ad1a089323adeddb7fe5cdd85f56e89657277730c9455ecb3558a8bdbe06","source":{"kind":"arxiv","id":"2605.18191","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18191","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18191v1","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18191","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"pith_short_12","alias_value":"YWK22GQISMR2","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"pith_short_16","alias_value":"YWK22GQISMR233O3","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"pith_short_8","alias_value":"YWK22GQI","created_at":"2026-05-20T00:05:49Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:YWK22GQISMR233O3P7S43WC7K3","target":"record","payload":{"canonical_record":{"source":{"id":"2605.18191","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-18T10:31:34Z","cross_cats_sorted":[],"title_canon_sha256":"70e192ab2b9e5fd4820a47355ec7a49b62a3c8cf54dae6db97fa6e95a70d26e8","abstract_canon_sha256":"7370964b11cabf5880b4225d456376bfc4fda497d504296176d56c27af6e9901"},"schema_version":"1.0"},"canonical_sha256":"c595ad1a089323adeddb7fe5cdd85f56e89657277730c9455ecb3558a8bdbe06","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:05:49.994079Z","signature_b64":"23fU6VVQ714WYqXqViATWaevL2yCT7CEjk7mzMAOsW0XqLt33lz7ax7zcSrsrJyA3+hOrGvMDk9UrI8S70hhBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c595ad1a089323adeddb7fe5cdd85f56e89657277730c9455ecb3558a8bdbe06","last_reissued_at":"2026-05-20T00:05:49.993511Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:05:49.993511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.18191","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:05:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"CXzg/vBo/gNQyJbCyeTaY+uqCqcpBKAgeDVlpm4YTdk2mYKXF8HSHScOXcUl53vJ2/x+QI55rH4GFNunyVs5BQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T07:23:22.803581Z"},"content_sha256":"2b696ba606df1ecf029bcf98cc729434c696ccedeb141e3ccd53dc79df5e205d","schema_version":"1.0","event_id":"sha256:2b696ba606df1ecf029bcf98cc729434c696ccedeb141e3ccd53dc79df5e205d"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:YWK22GQISMR233O3P7S43WC7K3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Pairwise Preference Reward and Group-Based Diversity Enhancement for Superior Open-Ended Generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Chu Zeng, Guining Cao, Jiaxin Peng, Shuangyong Song, Yongxiang, Yu Zhao","submitted_at":"2026-05-18T10:31:34Z","abstract_excerpt":"Current reinforcement learning(RL) methods are broadly applicable and powerful in verifiable settings where scalar rewards can be provided. However, in open-ended generation tasks, verifying the correctness of responses remains challenging, and training reward models incurs substantial computational and annotation costs. Moreover, reinforcement learning (RLVR) often leads to diversity collapse and produces stereotypical or rigid outputs, outcomes that are particularly undesirable in open-domain scenarios. We propose Pairwise Preference Reward and Group-based Diversity Enhancement (PPR-GDE), a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18191","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.18191/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T23:41:59.011431Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.331613Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"3cb06aade9e1e903d0369e621058650bb2d52070dc4e81b30b2a75a9824ab1a0"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:05:49Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Pu3U7SA1aobI20tYBD6oYnImnZ3O5PmPwYzToOpzsIPeHLoLD7dfmTkTHRNxgHC9prWRxztY0jsXIKxdR7PdAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T07:23:22.804332Z"},"content_sha256":"22d4167b23069b893c50d1b01e3bbc3199cb66f92453e58d6739642d6dab091d","schema_version":"1.0","event_id":"sha256:22d4167b23069b893c50d1b01e3bbc3199cb66f92453e58d6739642d6dab091d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YWK22GQISMR233O3P7S43WC7K3/bundle.json","state_url":"https://pith.science/pith/YWK22GQISMR233O3P7S43WC7K3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YWK22GQISMR233O3P7S43WC7K3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T07:23:22Z","links":{"resolver":"https://pith.science/pith/YWK22GQISMR233O3P7S43WC7K3","bundle":"https://pith.science/pith/YWK22GQISMR233O3P7S43WC7K3/bundle.json","state":"https://pith.science/pith/YWK22GQISMR233O3P7S43WC7K3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YWK22GQISMR233O3P7S43WC7K3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YWK22GQISMR233O3P7S43WC7K3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7370964b11cabf5880b4225d456376bfc4fda497d504296176d56c27af6e9901","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-18T10:31:34Z","title_canon_sha256":"70e192ab2b9e5fd4820a47355ec7a49b62a3c8cf54dae6db97fa6e95a70d26e8"},"schema_version":"1.0","source":{"id":"2605.18191","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.18191","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"arxiv_version","alias_value":"2605.18191v1","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.18191","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"pith_short_12","alias_value":"YWK22GQISMR2","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"pith_short_16","alias_value":"YWK22GQISMR233O3","created_at":"2026-05-20T00:05:49Z"},{"alias_kind":"pith_short_8","alias_value":"YWK22GQI","created_at":"2026-05-20T00:05:49Z"}],"graph_snapshots":[{"event_id":"sha256:22d4167b23069b893c50d1b01e3bbc3199cb66f92453e58d6739642d6dab091d","target":"graph","created_at":"2026-05-20T00:05:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T23:41:59.011431Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T23:33:35.331613Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.18191/integrity.json","findings":[],"snapshot_sha256":"3cb06aade9e1e903d0369e621058650bb2d52070dc4e81b30b2a75a9824ab1a0","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Current reinforcement learning(RL) methods are broadly applicable and powerful in verifiable settings where scalar rewards can be provided. However, in open-ended generation tasks, verifying the correctness of responses remains challenging, and training reward models incurs substantial computational and annotation costs. Moreover, reinforcement learning (RLVR) often leads to diversity collapse and produces stereotypical or rigid outputs, outcomes that are particularly undesirable in open-domain scenarios. We propose Pairwise Preference Reward and Group-based Diversity Enhancement (PPR-GDE), a ","authors_text":"Chu Zeng, Guining Cao, Jiaxin Peng, Shuangyong Song, Yongxiang, Yu Zhao","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-18T10:31:34Z","title":"Pairwise Preference Reward and Group-Based Diversity Enhancement for Superior Open-Ended Generation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.18191","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2b696ba606df1ecf029bcf98cc729434c696ccedeb141e3ccd53dc79df5e205d","target":"record","created_at":"2026-05-20T00:05:49Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7370964b11cabf5880b4225d456376bfc4fda497d504296176d56c27af6e9901","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-18T10:31:34Z","title_canon_sha256":"70e192ab2b9e5fd4820a47355ec7a49b62a3c8cf54dae6db97fa6e95a70d26e8"},"schema_version":"1.0","source":{"id":"2605.18191","kind":"arxiv","version":1}},"canonical_sha256":"c595ad1a089323adeddb7fe5cdd85f56e89657277730c9455ecb3558a8bdbe06","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c595ad1a089323adeddb7fe5cdd85f56e89657277730c9455ecb3558a8bdbe06","first_computed_at":"2026-05-20T00:05:49.993511Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:05:49.993511Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"23fU6VVQ714WYqXqViATWaevL2yCT7CEjk7mzMAOsW0XqLt33lz7ax7zcSrsrJyA3+hOrGvMDk9UrI8S70hhBA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:05:49.994079Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.18191","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2b696ba606df1ecf029bcf98cc729434c696ccedeb141e3ccd53dc79df5e205d","sha256:22d4167b23069b893c50d1b01e3bbc3199cb66f92453e58d6739642d6dab091d"],"state_sha256":"7dff6743d6fec97672e2e1232129096946e644a8263d9dc81a0d1b9cc224a734"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OL2/j5ySGd41+Ua82+B6J0rC461EdNQTXXQpIzC8WMD09HOSXIJZqg29r5uuYZ/x5T5D4jHyRxtbD8hLViT8CA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T07:23:22.807874Z","bundle_sha256":"4488e3913e97468f2c860abf289c44cd1456450f13b056a371671e4f50a1282b"}}