{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:CZZ3PQXXI4NSB6MKCZWFWSNUAA","short_pith_number":"pith:CZZ3PQXX","canonical_record":{"source":{"id":"2605.21125","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T12:57:37Z","cross_cats_sorted":[],"title_canon_sha256":"549cb9c9b81b0d20b960518ecce86b4fa0cc8dff1ff6d8dcc8a4863fd4d4b4dc","abstract_canon_sha256":"c09774b4a08c5fc6d412f5a9fbd1aa8b33115cedbc7d8b5d7b7632ab888d7d22"},"schema_version":"1.0"},"canonical_sha256":"1673b7c2f7471b20f98a166c5b49b400136e825ff2fa7da142884fea23d0d5b8","source":{"kind":"arxiv","id":"2605.21125","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21125","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21125v1","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21125","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"pith_short_12","alias_value":"CZZ3PQXXI4NS","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"pith_short_16","alias_value":"CZZ3PQXXI4NSB6MK","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"pith_short_8","alias_value":"CZZ3PQXX","created_at":"2026-05-21T01:05:38Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:CZZ3PQXXI4NSB6MKCZWFWSNUAA","target":"record","payload":{"canonical_record":{"source":{"id":"2605.21125","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T12:57:37Z","cross_cats_sorted":[],"title_canon_sha256":"549cb9c9b81b0d20b960518ecce86b4fa0cc8dff1ff6d8dcc8a4863fd4d4b4dc","abstract_canon_sha256":"c09774b4a08c5fc6d412f5a9fbd1aa8b33115cedbc7d8b5d7b7632ab888d7d22"},"schema_version":"1.0"},"canonical_sha256":"1673b7c2f7471b20f98a166c5b49b400136e825ff2fa7da142884fea23d0d5b8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:38.784790Z","signature_b64":"U8W0QcAxXMzJb02UXlQ8RUc1Ti39jEYFCnMc1xlklf7/3SUz430Ucd0a6Xh4+4ztzJVYA+aAsCVTmqvYxmCiDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1673b7c2f7471b20f98a166c5b49b400136e825ff2fa7da142884fea23d0d5b8","last_reissued_at":"2026-05-21T01:05:38.784028Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:38.784028Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.21125","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lJFBzoHOGtkHvU9ACeL+C3VLV7mYQFkTSD93py66G0YKDZsoosAddHysWNXvMxIDv8tYwyu/qaOPx72lbXm7DQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T10:47:29.406685Z"},"content_sha256":"881ebf9b644f4637106df92394dd38a29a867f22286c1453189b0a7231054781","schema_version":"1.0","event_id":"sha256:881ebf9b644f4637106df92394dd38a29a867f22286c1453189b0a7231054781"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:CZZ3PQXXI4NSB6MKCZWFWSNUAA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Advantage Collapse in Group Relative Policy Optimization: Diagnosis and Mitigation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Ao Cheng, Hailun Lu, Qingyong Hu, Qiyao Sun, Runke Huang, Xingming Li, Xixiang He, Xuanyu Ji","submitted_at":"2026-05-20T12:57:37Z","abstract_excerpt":"Group Relative Policy Optimization (GRPO), a prominent algorithm within the Reinforcement Learning from Verifiable Rewards (RLVR) framework, has achieved strong results in improving the reasoning capabilities of large language models (LLMs). However, GRPO is prone to advantage collapse, a failure mode where homogeneous rewards within a group (e.g., all correct or all incorrect answers) yield near-zero advantages and vanishing gradients. To address this, we introduce the Advantage Collapse Rate (ACR), the first diagnostic metric quantifying the proportion of training batches with ineffective gr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21125","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.21125/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"b+yt9v5CjhHTF2Mfs0/d7q39xt4k0Pdl8Ab1kfuUmp0PyEsOZd0UeVLvGPQRwwVJ19AYu7CIylhGbMf+RUoOCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-04T10:47:29.407074Z"},"content_sha256":"86aa0781afc5e300eb4c028380c69423645549a7dd069fc54ff7b1225c26eb60","schema_version":"1.0","event_id":"sha256:86aa0781afc5e300eb4c028380c69423645549a7dd069fc54ff7b1225c26eb60"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA/bundle.json","state_url":"https://pith.science/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-04T10:47:29Z","links":{"resolver":"https://pith.science/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA","bundle":"https://pith.science/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA/bundle.json","state":"https://pith.science/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CZZ3PQXXI4NSB6MKCZWFWSNUAA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:CZZ3PQXXI4NSB6MKCZWFWSNUAA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c09774b4a08c5fc6d412f5a9fbd1aa8b33115cedbc7d8b5d7b7632ab888d7d22","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T12:57:37Z","title_canon_sha256":"549cb9c9b81b0d20b960518ecce86b4fa0cc8dff1ff6d8dcc8a4863fd4d4b4dc"},"schema_version":"1.0","source":{"id":"2605.21125","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21125","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21125v1","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21125","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"pith_short_12","alias_value":"CZZ3PQXXI4NS","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"pith_short_16","alias_value":"CZZ3PQXXI4NSB6MK","created_at":"2026-05-21T01:05:38Z"},{"alias_kind":"pith_short_8","alias_value":"CZZ3PQXX","created_at":"2026-05-21T01:05:38Z"}],"graph_snapshots":[{"event_id":"sha256:86aa0781afc5e300eb4c028380c69423645549a7dd069fc54ff7b1225c26eb60","target":"graph","created_at":"2026-05-21T01:05:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.21125/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Group Relative Policy Optimization (GRPO), a prominent algorithm within the Reinforcement Learning from Verifiable Rewards (RLVR) framework, has achieved strong results in improving the reasoning capabilities of large language models (LLMs). However, GRPO is prone to advantage collapse, a failure mode where homogeneous rewards within a group (e.g., all correct or all incorrect answers) yield near-zero advantages and vanishing gradients. To address this, we introduce the Advantage Collapse Rate (ACR), the first diagnostic metric quantifying the proportion of training batches with ineffective gr","authors_text":"Ao Cheng, Hailun Lu, Qingyong Hu, Qiyao Sun, Runke Huang, Xingming Li, Xixiang He, Xuanyu Ji","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T12:57:37Z","title":"Advantage Collapse in Group Relative Policy Optimization: Diagnosis and Mitigation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21125","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:881ebf9b644f4637106df92394dd38a29a867f22286c1453189b0a7231054781","target":"record","created_at":"2026-05-21T01:05:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c09774b4a08c5fc6d412f5a9fbd1aa8b33115cedbc7d8b5d7b7632ab888d7d22","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T12:57:37Z","title_canon_sha256":"549cb9c9b81b0d20b960518ecce86b4fa0cc8dff1ff6d8dcc8a4863fd4d4b4dc"},"schema_version":"1.0","source":{"id":"2605.21125","kind":"arxiv","version":1}},"canonical_sha256":"1673b7c2f7471b20f98a166c5b49b400136e825ff2fa7da142884fea23d0d5b8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1673b7c2f7471b20f98a166c5b49b400136e825ff2fa7da142884fea23d0d5b8","first_computed_at":"2026-05-21T01:05:38.784028Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:05:38.784028Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"U8W0QcAxXMzJb02UXlQ8RUc1Ti39jEYFCnMc1xlklf7/3SUz430Ucd0a6Xh4+4ztzJVYA+aAsCVTmqvYxmCiDg==","signature_status":"signed_v1","signed_at":"2026-05-21T01:05:38.784790Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.21125","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:881ebf9b644f4637106df92394dd38a29a867f22286c1453189b0a7231054781","sha256:86aa0781afc5e300eb4c028380c69423645549a7dd069fc54ff7b1225c26eb60"],"state_sha256":"e68aac8ac8ecdbf6cf737792399ff763f9038b6dc6287f5f1828d20bc44faf29"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"KjG5jtE59BWgkq84iZ19YhdWZp7NtDPktz7zb9CTtIjOu5duOb73JemP5LxZxbyRPRL9ftrrVA8hJWnQukTdCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-04T10:47:29.409177Z","bundle_sha256":"74613e75a5e36cd9b47bfa6d98875b466832290f92de04420e8bc21ce1f38b20"}}