{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DQNRXODZWTOYFE62LG7SKHO26S","short_pith_number":"pith:DQNRXODZ","canonical_record":{"source":{"id":"2605.17291","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-17T07:08:14Z","cross_cats_sorted":[],"title_canon_sha256":"150fc0852294ff79d3cee6087945344aec1f7895b9f96721f7377d389ccfae5a","abstract_canon_sha256":"e39eb0f8e01e914c88daecf5d8124ecf20ace960955d272b6dafdc6e618d269b"},"schema_version":"1.0"},"canonical_sha256":"1c1b1bb879b4dd8293da59bf251ddaf493505092b7ee73e786f254c833b69fb6","source":{"kind":"arxiv","id":"2605.17291","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17291","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17291v1","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17291","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"pith_short_12","alias_value":"DQNRXODZWTOY","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"pith_short_16","alias_value":"DQNRXODZWTOYFE62","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"pith_short_8","alias_value":"DQNRXODZ","created_at":"2026-05-20T00:03:50Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DQNRXODZWTOYFE62LG7SKHO26S","target":"record","payload":{"canonical_record":{"source":{"id":"2605.17291","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-17T07:08:14Z","cross_cats_sorted":[],"title_canon_sha256":"150fc0852294ff79d3cee6087945344aec1f7895b9f96721f7377d389ccfae5a","abstract_canon_sha256":"e39eb0f8e01e914c88daecf5d8124ecf20ace960955d272b6dafdc6e618d269b"},"schema_version":"1.0"},"canonical_sha256":"1c1b1bb879b4dd8293da59bf251ddaf493505092b7ee73e786f254c833b69fb6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:50.438389Z","signature_b64":"DjLfAQ6EcwNdyTDA6/wiPrCELueewvGLua2ISL+oIfYf38LiHdn4L1Z6ZS15yUKcYmdf+SwN3qqP+kXIgkThDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1c1b1bb879b4dd8293da59bf251ddaf493505092b7ee73e786f254c833b69fb6","last_reissued_at":"2026-05-20T00:03:50.437710Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:50.437710Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.17291","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5NRRzsOwocqeP19LB8LyxYDJL5y4m0Nc1j6P4MKykbZPIZP+lMSc6gPlCiWiDaBIRLdyiFVQ7bDWouob0HSyDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T23:58:18.400861Z"},"content_sha256":"d9b8e03c542e17cbef5fbf38b23a3b52057194f10551498138d25726f42875e1","schema_version":"1.0","event_id":"sha256:d9b8e03c542e17cbef5fbf38b23a3b52057194f10551498138d25726f42875e1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DQNRXODZWTOYFE62LG7SKHO26S","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Step-wise Rubric Rewards for LLM Reasoning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Baobao Chang, Haozhe Zhao, Jiaqi Wang, Kean Shi, Liang Chen, Minghao Ye, Nan Duan, Ruoyu Wu, Shuai Dong, Weichu Xie, Wenpu Liu, Wenqi Shao, Xiaoying Zhang, Xinbo Xu, Yongfu Zhu, Yuqi Xu, Zirong Chen, Ziyue Wang","submitted_at":"2026-05-17T07:08:14Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) is widely used to improve reasoning in large language models, but rewards only final-answer correctness with no supervision over intermediate steps. Rubric-based methods such as Rubrics as Rewards (RaR) introduce finer-grained supervision by scoring rollouts against structured criteria, yet the rubric scores are still aggregated into a single scalar applied to the entire response, causing three weaknesses: loss of multi-criterion structure, uniform supervision of correct and incorrect steps, and reward hacking through unbounded self-correct"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17291","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.17291/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"claim_evidence","ran_at":"2026-05-19T22:01:57.814167Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.766267Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"ebcd9428039ef9251e00d34accdbb93b5097b255ede3aea731e5314ae719f856"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LiDtWW1xJkzcUq9srsn1ffSNufwu1voXZbttfONoaXOTAYggITMuL1o1orNHcA0r7gvFlI3QOMSDxqx1YYwlAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T23:58:18.401868Z"},"content_sha256":"33a6085053ab220cbc401c6683da30978024d556041144e86eff4ecfa125521f","schema_version":"1.0","event_id":"sha256:33a6085053ab220cbc401c6683da30978024d556041144e86eff4ecfa125521f"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DQNRXODZWTOYFE62LG7SKHO26S/bundle.json","state_url":"https://pith.science/pith/DQNRXODZWTOYFE62LG7SKHO26S/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DQNRXODZWTOYFE62LG7SKHO26S/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T23:58:18Z","links":{"resolver":"https://pith.science/pith/DQNRXODZWTOYFE62LG7SKHO26S","bundle":"https://pith.science/pith/DQNRXODZWTOYFE62LG7SKHO26S/bundle.json","state":"https://pith.science/pith/DQNRXODZWTOYFE62LG7SKHO26S/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DQNRXODZWTOYFE62LG7SKHO26S/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DQNRXODZWTOYFE62LG7SKHO26S","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e39eb0f8e01e914c88daecf5d8124ecf20ace960955d272b6dafdc6e618d269b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-17T07:08:14Z","title_canon_sha256":"150fc0852294ff79d3cee6087945344aec1f7895b9f96721f7377d389ccfae5a"},"schema_version":"1.0","source":{"id":"2605.17291","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.17291","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"arxiv_version","alias_value":"2605.17291v1","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.17291","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"pith_short_12","alias_value":"DQNRXODZWTOY","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"pith_short_16","alias_value":"DQNRXODZWTOYFE62","created_at":"2026-05-20T00:03:50Z"},{"alias_kind":"pith_short_8","alias_value":"DQNRXODZ","created_at":"2026-05-20T00:03:50Z"}],"graph_snapshots":[{"event_id":"sha256:33a6085053ab220cbc401c6683da30978024d556041144e86eff4ecfa125521f","target":"graph","created_at":"2026-05-20T00:03:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"claim_evidence","ran_at":"2026-05-19T22:01:57.814167Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"ai_meta_artifact","ran_at":"2026-05-19T21:33:23.766267Z","status":"skipped","version":"1.0.0"}],"endpoint":"/pith/2605.17291/integrity.json","findings":[],"snapshot_sha256":"ebcd9428039ef9251e00d34accdbb93b5097b255ede3aea731e5314ae719f856","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) is widely used to improve reasoning in large language models, but rewards only final-answer correctness with no supervision over intermediate steps. Rubric-based methods such as Rubrics as Rewards (RaR) introduce finer-grained supervision by scoring rollouts against structured criteria, yet the rubric scores are still aggregated into a single scalar applied to the entire response, causing three weaknesses: loss of multi-criterion structure, uniform supervision of correct and incorrect steps, and reward hacking through unbounded self-correct","authors_text":"Baobao Chang, Haozhe Zhao, Jiaqi Wang, Kean Shi, Liang Chen, Minghao Ye, Nan Duan, Ruoyu Wu, Shuai Dong, Weichu Xie, Wenpu Liu, Wenqi Shao, Xiaoying Zhang, Xinbo Xu, Yongfu Zhu, Yuqi Xu, Zirong Chen, Ziyue Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-17T07:08:14Z","title":"Step-wise Rubric Rewards for LLM Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.17291","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d9b8e03c542e17cbef5fbf38b23a3b52057194f10551498138d25726f42875e1","target":"record","created_at":"2026-05-20T00:03:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e39eb0f8e01e914c88daecf5d8124ecf20ace960955d272b6dafdc6e618d269b","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-05-17T07:08:14Z","title_canon_sha256":"150fc0852294ff79d3cee6087945344aec1f7895b9f96721f7377d389ccfae5a"},"schema_version":"1.0","source":{"id":"2605.17291","kind":"arxiv","version":1}},"canonical_sha256":"1c1b1bb879b4dd8293da59bf251ddaf493505092b7ee73e786f254c833b69fb6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1c1b1bb879b4dd8293da59bf251ddaf493505092b7ee73e786f254c833b69fb6","first_computed_at":"2026-05-20T00:03:50.437710Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:50.437710Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"DjLfAQ6EcwNdyTDA6/wiPrCELueewvGLua2ISL+oIfYf38LiHdn4L1Z6ZS15yUKcYmdf+SwN3qqP+kXIgkThDw==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:50.438389Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.17291","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d9b8e03c542e17cbef5fbf38b23a3b52057194f10551498138d25726f42875e1","sha256:33a6085053ab220cbc401c6683da30978024d556041144e86eff4ecfa125521f"],"state_sha256":"b28c46558e671f7f66a875dffc8dba89a7ba5e62d0ee4584a635777a59a337b0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Alw7YYyFEYQNIfOMbyCm/9htSr42iz/fPzqeZ6ek/g/2NvgkrMaiHFJYDQiGpFsh8RQ0CsAhzR5qPJYas7osBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T23:58:18.405849Z","bundle_sha256":"c2acc40ea3b5f1a723f9140608970e8e014516932a39df4c7d080fec0d2ad329"}}