{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:CXEE4TU35SSOFF3PXFJ32QYTSL","short_pith_number":"pith:CXEE4TU3","canonical_record":{"source":{"id":"2605.26579","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-26T05:50:46Z","cross_cats_sorted":[],"title_canon_sha256":"8d45839ab91ddd605fc0e62a894cbfc11cf465fb0ad5e9111686d8d502f41e46","abstract_canon_sha256":"06e0a0128e3dafa4cda370d4b51ce959e22cb17a3c5368d49f9bf56f89908d8d"},"schema_version":"1.0"},"canonical_sha256":"15c84e4e9beca4e2976fb953bd431392c073dbe9adb91d686f772230f50aea42","source":{"kind":"arxiv","id":"2605.26579","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.26579","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"arxiv_version","alias_value":"2605.26579v1","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26579","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"pith_short_12","alias_value":"CXEE4TU35SSO","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"pith_short_16","alias_value":"CXEE4TU35SSOFF3P","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"pith_short_8","alias_value":"CXEE4TU3","created_at":"2026-05-27T01:05:26Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:CXEE4TU35SSOFF3PXFJ32QYTSL","target":"record","payload":{"canonical_record":{"source":{"id":"2605.26579","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-26T05:50:46Z","cross_cats_sorted":[],"title_canon_sha256":"8d45839ab91ddd605fc0e62a894cbfc11cf465fb0ad5e9111686d8d502f41e46","abstract_canon_sha256":"06e0a0128e3dafa4cda370d4b51ce959e22cb17a3c5368d49f9bf56f89908d8d"},"schema_version":"1.0"},"canonical_sha256":"15c84e4e9beca4e2976fb953bd431392c073dbe9adb91d686f772230f50aea42","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:05:26.937146Z","signature_b64":"uu6Ff9fwqYccRAAzwJiGyuuh3y0b92ZUYlPgKybDID+CsyHT2A7UiBbuFaeGB2HXyu/oSu+qMWzQ9yOq2JOQDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"15c84e4e9beca4e2976fb953bd431392c073dbe9adb91d686f772230f50aea42","last_reissued_at":"2026-05-27T01:05:26.936687Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:05:26.936687Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.26579","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:05:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pqlf1oa6YgOSvWJN8JUWs5CJ6CWVlAZF0ph3uFGl0HAlYo/mu2VSmOoOLTcm6wHjuyvgyYg7WNBZVedd4u9ACw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T16:31:45.510216Z"},"content_sha256":"f3efad7adcd0c046f281605ea92971bdc900b6556541b3053fdc7d5efddc8c84","schema_version":"1.0","event_id":"sha256:f3efad7adcd0c046f281605ea92971bdc900b6556541b3053fdc7d5efddc8c84"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:CXEE4TU35SSOFF3PXFJ32QYTSL","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Focal Reward: Balanced Reinforcement Learning under Rubric-Based Rewards","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Feng Hong, Jiangchao Yao, Jun Zhou, Lin Yuan, Qiang Hu, Wanli Gu, Weichang Wu, Xiaolu Zhang, Xinmu Ge, Yu Huang, Zhaoxin Huan, Zihua Zhao","submitted_at":"2026-05-26T05:50:46Z","abstract_excerpt":"The open-ended generation in LLMs usually requires multi-dimensional rubrics to adequately assess quality and guide the improvement of reinforcement learning. However, a critical dilemma inherent in this training paradigm is the imbalanced reward polarization along different rubric dimensions. Under this bottleneck, even if LLMs achieve relatively high rewards after training, they may still exhibit severe deficiencies in certain dimensions, leading to a direct deterioration in user experience. To address this problem, we propose Focal Reward, a novel objective to automatically balance the trai"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26579","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.26579/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:05:26Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ML9lSBe3aDuTQ4bjTrfE5zZgwxwZgDZzHCIWIabCibLjkBQq90p4cu0EtiqQYq6SsJS+1HPklWwY3veUs1OKDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T16:31:45.510940Z"},"content_sha256":"fe1f74370ddacefe959c30f8e5b2d9b16364576d36ce74c0877d83adcfab5975","schema_version":"1.0","event_id":"sha256:fe1f74370ddacefe959c30f8e5b2d9b16364576d36ce74c0877d83adcfab5975"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CXEE4TU35SSOFF3PXFJ32QYTSL/bundle.json","state_url":"https://pith.science/pith/CXEE4TU35SSOFF3PXFJ32QYTSL/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CXEE4TU35SSOFF3PXFJ32QYTSL/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T16:31:45Z","links":{"resolver":"https://pith.science/pith/CXEE4TU35SSOFF3PXFJ32QYTSL","bundle":"https://pith.science/pith/CXEE4TU35SSOFF3PXFJ32QYTSL/bundle.json","state":"https://pith.science/pith/CXEE4TU35SSOFF3PXFJ32QYTSL/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CXEE4TU35SSOFF3PXFJ32QYTSL/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:CXEE4TU35SSOFF3PXFJ32QYTSL","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"06e0a0128e3dafa4cda370d4b51ce959e22cb17a3c5368d49f9bf56f89908d8d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-26T05:50:46Z","title_canon_sha256":"8d45839ab91ddd605fc0e62a894cbfc11cf465fb0ad5e9111686d8d502f41e46"},"schema_version":"1.0","source":{"id":"2605.26579","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.26579","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"arxiv_version","alias_value":"2605.26579v1","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.26579","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"pith_short_12","alias_value":"CXEE4TU35SSO","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"pith_short_16","alias_value":"CXEE4TU35SSOFF3P","created_at":"2026-05-27T01:05:26Z"},{"alias_kind":"pith_short_8","alias_value":"CXEE4TU3","created_at":"2026-05-27T01:05:26Z"}],"graph_snapshots":[{"event_id":"sha256:fe1f74370ddacefe959c30f8e5b2d9b16364576d36ce74c0877d83adcfab5975","target":"graph","created_at":"2026-05-27T01:05:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.26579/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The open-ended generation in LLMs usually requires multi-dimensional rubrics to adequately assess quality and guide the improvement of reinforcement learning. However, a critical dilemma inherent in this training paradigm is the imbalanced reward polarization along different rubric dimensions. Under this bottleneck, even if LLMs achieve relatively high rewards after training, they may still exhibit severe deficiencies in certain dimensions, leading to a direct deterioration in user experience. To address this problem, we propose Focal Reward, a novel objective to automatically balance the trai","authors_text":"Feng Hong, Jiangchao Yao, Jun Zhou, Lin Yuan, Qiang Hu, Wanli Gu, Weichang Wu, Xiaolu Zhang, Xinmu Ge, Yu Huang, Zhaoxin Huan, Zihua Zhao","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-26T05:50:46Z","title":"Focal Reward: Balanced Reinforcement Learning under Rubric-Based Rewards"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.26579","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f3efad7adcd0c046f281605ea92971bdc900b6556541b3053fdc7d5efddc8c84","target":"record","created_at":"2026-05-27T01:05:26Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"06e0a0128e3dafa4cda370d4b51ce959e22cb17a3c5368d49f9bf56f89908d8d","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-26T05:50:46Z","title_canon_sha256":"8d45839ab91ddd605fc0e62a894cbfc11cf465fb0ad5e9111686d8d502f41e46"},"schema_version":"1.0","source":{"id":"2605.26579","kind":"arxiv","version":1}},"canonical_sha256":"15c84e4e9beca4e2976fb953bd431392c073dbe9adb91d686f772230f50aea42","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"15c84e4e9beca4e2976fb953bd431392c073dbe9adb91d686f772230f50aea42","first_computed_at":"2026-05-27T01:05:26.936687Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:05:26.936687Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"uu6Ff9fwqYccRAAzwJiGyuuh3y0b92ZUYlPgKybDID+CsyHT2A7UiBbuFaeGB2HXyu/oSu+qMWzQ9yOq2JOQDA==","signature_status":"signed_v1","signed_at":"2026-05-27T01:05:26.937146Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.26579","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f3efad7adcd0c046f281605ea92971bdc900b6556541b3053fdc7d5efddc8c84","sha256:fe1f74370ddacefe959c30f8e5b2d9b16364576d36ce74c0877d83adcfab5975"],"state_sha256":"850d51afa7b60c0f39a6ffc8ca48140534c93db6f3478fd29928b656f1b52da3"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wQ+ZOVCSV1jepa/vxXapwktL8ZoISguQI5NvhUTyr8Z/ibF2b4Sagdnsh9c5BWNhVTdHNf/JgecFYWZvhPtHAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T16:31:45.513993Z","bundle_sha256":"d2bae15436681617b322e4e3b69fa76a688168325aa1a81b7bd29aceccffc654"}}