{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:3NJ3S6G5ZDXLV37AJEMCHT7D7H","short_pith_number":"pith:3NJ3S6G5","canonical_record":{"source":{"id":"2602.12579","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-13T03:40:52Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d0fe213f4d9e0c57760e55cc7ffced71890c342cf26de4252566538ee5cd6bf3","abstract_canon_sha256":"2ac36c8c757ee37f44e31a760dbc07db5927f1eb7fd141b27b99914bd8cd8d0c"},"schema_version":"1.0"},"canonical_sha256":"db53b978ddc8eebaefe0491823cfe3f9ef3c4bc295a3f8b2d2eec159745bc14b","source":{"kind":"arxiv","id":"2602.12579","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.12579","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"arxiv_version","alias_value":"2602.12579v2","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.12579","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"pith_short_12","alias_value":"3NJ3S6G5ZDXL","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"pith_short_16","alias_value":"3NJ3S6G5ZDXLV37A","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"pith_short_8","alias_value":"3NJ3S6G5","created_at":"2026-05-25T02:02:13Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:3NJ3S6G5ZDXLV37AJEMCHT7D7H","target":"record","payload":{"canonical_record":{"source":{"id":"2602.12579","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-13T03:40:52Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d0fe213f4d9e0c57760e55cc7ffced71890c342cf26de4252566538ee5cd6bf3","abstract_canon_sha256":"2ac36c8c757ee37f44e31a760dbc07db5927f1eb7fd141b27b99914bd8cd8d0c"},"schema_version":"1.0"},"canonical_sha256":"db53b978ddc8eebaefe0491823cfe3f9ef3c4bc295a3f8b2d2eec159745bc14b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:02:13.416460Z","signature_b64":"tTA3RykCq6l6fsa6BFSD4OkwzUSuNF7YZLOAnSQWUHNku+ToK4lgS/coYJcvYYyfa7Iu869ZSOW4radBvSP1BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"db53b978ddc8eebaefe0491823cfe3f9ef3c4bc295a3f8b2d2eec159745bc14b","last_reissued_at":"2026-05-25T02:02:13.415539Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:02:13.415539Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.12579","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:02:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ed0RIgwrcKVSIRaeyWH/a0bc/2d2tqx+Ky+u55YYehw5842BxO/9jG+L0P01ZNOJzYdHE/kxU9y5HUSNK+UYDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T17:04:31.711669Z"},"content_sha256":"f0a37630d8ff7f9395e29960c7829955663d801ec552ba7fb8c10ba4c549e0c9","schema_version":"1.0","event_id":"sha256:f0a37630d8ff7f9395e29960c7829955663d801ec552ba7fb8c10ba4c549e0c9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:3NJ3S6G5ZDXLV37AJEMCHT7D7H","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VI-CuRL: Stabilizing Verifier-Independent RL Reasoning via Confidence-Guided Variance Reduction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Masashi Sugiyama, Xin-Qiang Cai","submitted_at":"2026-02-13T03:40:52Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has emerged as a dominant paradigm for enhancing Large Language Models (LLMs) reasoning, yet its reliance on external verifiers limits its scalability. Recent findings suggest that RLVR primarily functions by eliciting latent capabilities, motivating the development of verifier-free algorithms. However, in such settings, standard methods like Group Relative Policy Optimization face a critical challenge: destructive gradient variance that often leads to training collapse. To address this issue, we introduce Verifier-Independent Curriculum Re"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.12579","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.12579/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:02:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LhPZXXVMJWMSj3P999HaDsX4ixQz3IDoVbVdjgMSVXUIjQqrSilYt2hiysPHUZu+sZwHMSZTz7qV9Fg7n8RGDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T17:04:31.712055Z"},"content_sha256":"1bee9db73515ecc5b66881637d43043c5e4d46ad3adc8c9479c5639a289d9da0","schema_version":"1.0","event_id":"sha256:1bee9db73515ecc5b66881637d43043c5e4d46ad3adc8c9479c5639a289d9da0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H/bundle.json","state_url":"https://pith.science/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T17:04:31Z","links":{"resolver":"https://pith.science/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H","bundle":"https://pith.science/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H/bundle.json","state":"https://pith.science/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H/state.json","well_known_bundle":"https://pith.science/.well-known/pith/3NJ3S6G5ZDXLV37AJEMCHT7D7H/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:3NJ3S6G5ZDXLV37AJEMCHT7D7H","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2ac36c8c757ee37f44e31a760dbc07db5927f1eb7fd141b27b99914bd8cd8d0c","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-13T03:40:52Z","title_canon_sha256":"d0fe213f4d9e0c57760e55cc7ffced71890c342cf26de4252566538ee5cd6bf3"},"schema_version":"1.0","source":{"id":"2602.12579","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.12579","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"arxiv_version","alias_value":"2602.12579v2","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.12579","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"pith_short_12","alias_value":"3NJ3S6G5ZDXL","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"pith_short_16","alias_value":"3NJ3S6G5ZDXLV37A","created_at":"2026-05-25T02:02:13Z"},{"alias_kind":"pith_short_8","alias_value":"3NJ3S6G5","created_at":"2026-05-25T02:02:13Z"}],"graph_snapshots":[{"event_id":"sha256:1bee9db73515ecc5b66881637d43043c5e4d46ad3adc8c9479c5639a289d9da0","target":"graph","created_at":"2026-05-25T02:02:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.12579/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has emerged as a dominant paradigm for enhancing Large Language Models (LLMs) reasoning, yet its reliance on external verifiers limits its scalability. Recent findings suggest that RLVR primarily functions by eliciting latent capabilities, motivating the development of verifier-free algorithms. However, in such settings, standard methods like Group Relative Policy Optimization face a critical challenge: destructive gradient variance that often leads to training collapse. To address this issue, we introduce Verifier-Independent Curriculum Re","authors_text":"Masashi Sugiyama, Xin-Qiang Cai","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-13T03:40:52Z","title":"VI-CuRL: Stabilizing Verifier-Independent RL Reasoning via Confidence-Guided Variance Reduction"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.12579","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f0a37630d8ff7f9395e29960c7829955663d801ec552ba7fb8c10ba4c549e0c9","target":"record","created_at":"2026-05-25T02:02:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2ac36c8c757ee37f44e31a760dbc07db5927f1eb7fd141b27b99914bd8cd8d0c","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-02-13T03:40:52Z","title_canon_sha256":"d0fe213f4d9e0c57760e55cc7ffced71890c342cf26de4252566538ee5cd6bf3"},"schema_version":"1.0","source":{"id":"2602.12579","kind":"arxiv","version":2}},"canonical_sha256":"db53b978ddc8eebaefe0491823cfe3f9ef3c4bc295a3f8b2d2eec159745bc14b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"db53b978ddc8eebaefe0491823cfe3f9ef3c4bc295a3f8b2d2eec159745bc14b","first_computed_at":"2026-05-25T02:02:13.415539Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:02:13.415539Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"tTA3RykCq6l6fsa6BFSD4OkwzUSuNF7YZLOAnSQWUHNku+ToK4lgS/coYJcvYYyfa7Iu869ZSOW4radBvSP1BQ==","signature_status":"signed_v1","signed_at":"2026-05-25T02:02:13.416460Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.12579","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f0a37630d8ff7f9395e29960c7829955663d801ec552ba7fb8c10ba4c549e0c9","sha256:1bee9db73515ecc5b66881637d43043c5e4d46ad3adc8c9479c5639a289d9da0"],"state_sha256":"5c576027dd0e547fcd273c6108dcb401634342221ab06933730e72a2900db87b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UEHbeY00mX1MwOcACijl07q4Ws3t6KAhujZ2pbYgoL0NrDwPrueZAn1yeub00Mc8FOXHX+dkh8SslLptgvdXDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T17:04:31.714373Z","bundle_sha256":"ce808f74f08e0e450fe82636edf52c5fee4f342c3e411e7edf3906453b7b6588"}}