{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:7HYUXWVS37TLBJKEEYILH4DKPH","short_pith_number":"pith:7HYUXWVS","canonical_record":{"source":{"id":"2510.04140","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-05T10:38:55Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"e24415cf337e2ac4972be96fedd0e966aa6ea63c9a81a144369486224869187f","abstract_canon_sha256":"c88a16f5483942b4670de469cb3b187decebad3e69f8bff777ef8ff9408c8fdb"},"schema_version":"1.0"},"canonical_sha256":"f9f14bdab2dfe6b0a5442610b3f06a79c8107941b4f69f4eaa2a26e63cda3e46","source":{"kind":"arxiv","id":"2510.04140","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.04140","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"2510.04140v2","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.04140","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"7HYUXWVS37TL","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"pith_short_16","alias_value":"7HYUXWVS37TLBJKE","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"pith_short_8","alias_value":"7HYUXWVS","created_at":"2026-07-02T01:17:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:7HYUXWVS37TLBJKEEYILH4DKPH","target":"record","payload":{"canonical_record":{"source":{"id":"2510.04140","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-05T10:38:55Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"e24415cf337e2ac4972be96fedd0e966aa6ea63c9a81a144369486224869187f","abstract_canon_sha256":"c88a16f5483942b4670de469cb3b187decebad3e69f8bff777ef8ff9408c8fdb"},"schema_version":"1.0"},"canonical_sha256":"f9f14bdab2dfe6b0a5442610b3f06a79c8107941b4f69f4eaa2a26e63cda3e46","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:17:24.713667Z","signature_b64":"l1u9/SkGUItdSnsh2kBwEr49sBHw0UQEXg2odt4N4l1MPNjFFp0UvE5+bqKHx6gHSEOGMRp0GeOiXDoZWQD8DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f9f14bdab2dfe6b0a5442610b3f06a79c8107941b4f69f4eaa2a26e63cda3e46","last_reissued_at":"2026-07-02T01:17:24.713164Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:17:24.713164Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2510.04140","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T01:17:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HGjJ2sjngW2/j+ESpRzdYQ6HJ+qPn5sizyp9j0Hyc9pbkrSQAnn6khPNVKLUBwwmrAZYD14Seg2teUB8Xjy9BQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T08:56:12.264492Z"},"content_sha256":"32d00cb78e0589f00f5c9b1a482a825d88da8187dc2abe0fae67817cc178c2f4","schema_version":"1.0","event_id":"sha256:32d00cb78e0589f00f5c9b1a482a825d88da8187dc2abe0fae67817cc178c2f4"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:7HYUXWVS37TLBJKEEYILH4DKPH","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Selective Expert Guidance for Effective and Diverse Exploration in Reinforcement Learning of LLMs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.AI","authors_text":"Fei Yu, Jiaqing Liang, Jinyi Han, Shuguang Ma, Sihang Jiang, Tingyun Li, Xinyi Wang, Yanghua Xiao, Zhaoqian Dai, Zishang Jiang","submitted_at":"2025-10-05T10:38:55Z","abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has become a widely adopted technique for enhancing the reasoning ability of Large Language Models (LLMs). However, the effectiveness of RLVR strongly depends on the capability of base models. This issue arises because it requires the model to have sufficient capability to perform high-quality exploration, which involves both effectiveness and diversity. Unfortunately, existing methods address this issue by imitating expert trajectories, which improve effectiveness but neglect diversity. To address this, we argue that the expert only needs "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.04140","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2510.04140/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T01:17:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Uj14DUC4OjXhJzC/Fax4QYLf/V6xUYHZCvaWcqk5UuhO5G1itwYpKPZNG5YDpAbkZt1AqC2KWnAAV1kopjTyAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-02T08:56:12.264866Z"},"content_sha256":"32185eda9e1a9ee760f804b5a47c0aecae81a56d4b895c0c4e24fa511d3b1e92","schema_version":"1.0","event_id":"sha256:32185eda9e1a9ee760f804b5a47c0aecae81a56d4b895c0c4e24fa511d3b1e92"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7HYUXWVS37TLBJKEEYILH4DKPH/bundle.json","state_url":"https://pith.science/pith/7HYUXWVS37TLBJKEEYILH4DKPH/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7HYUXWVS37TLBJKEEYILH4DKPH/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-02T08:56:12Z","links":{"resolver":"https://pith.science/pith/7HYUXWVS37TLBJKEEYILH4DKPH","bundle":"https://pith.science/pith/7HYUXWVS37TLBJKEEYILH4DKPH/bundle.json","state":"https://pith.science/pith/7HYUXWVS37TLBJKEEYILH4DKPH/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7HYUXWVS37TLBJKEEYILH4DKPH/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:7HYUXWVS37TLBJKEEYILH4DKPH","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"c88a16f5483942b4670de469cb3b187decebad3e69f8bff777ef8ff9408c8fdb","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-05T10:38:55Z","title_canon_sha256":"e24415cf337e2ac4972be96fedd0e966aa6ea63c9a81a144369486224869187f"},"schema_version":"1.0","source":{"id":"2510.04140","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.04140","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"arxiv_version","alias_value":"2510.04140v2","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.04140","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"pith_short_12","alias_value":"7HYUXWVS37TL","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"pith_short_16","alias_value":"7HYUXWVS37TLBJKE","created_at":"2026-07-02T01:17:24Z"},{"alias_kind":"pith_short_8","alias_value":"7HYUXWVS","created_at":"2026-07-02T01:17:24Z"}],"graph_snapshots":[{"event_id":"sha256:32185eda9e1a9ee760f804b5a47c0aecae81a56d4b895c0c4e24fa511d3b1e92","target":"graph","created_at":"2026-07-02T01:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.04140/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement Learning with Verifiable Rewards (RLVR) has become a widely adopted technique for enhancing the reasoning ability of Large Language Models (LLMs). However, the effectiveness of RLVR strongly depends on the capability of base models. This issue arises because it requires the model to have sufficient capability to perform high-quality exploration, which involves both effectiveness and diversity. Unfortunately, existing methods address this issue by imitating expert trajectories, which improve effectiveness but neglect diversity. To address this, we argue that the expert only needs ","authors_text":"Fei Yu, Jiaqing Liang, Jinyi Han, Shuguang Ma, Sihang Jiang, Tingyun Li, Xinyi Wang, Yanghua Xiao, Zhaoqian Dai, Zishang Jiang","cross_cats":["cs.CL"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-05T10:38:55Z","title":"Selective Expert Guidance for Effective and Diverse Exploration in Reinforcement Learning of LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.04140","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:32d00cb78e0589f00f5c9b1a482a825d88da8187dc2abe0fae67817cc178c2f4","target":"record","created_at":"2026-07-02T01:17:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"c88a16f5483942b4670de469cb3b187decebad3e69f8bff777ef8ff9408c8fdb","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2025-10-05T10:38:55Z","title_canon_sha256":"e24415cf337e2ac4972be96fedd0e966aa6ea63c9a81a144369486224869187f"},"schema_version":"1.0","source":{"id":"2510.04140","kind":"arxiv","version":2}},"canonical_sha256":"f9f14bdab2dfe6b0a5442610b3f06a79c8107941b4f69f4eaa2a26e63cda3e46","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f9f14bdab2dfe6b0a5442610b3f06a79c8107941b4f69f4eaa2a26e63cda3e46","first_computed_at":"2026-07-02T01:17:24.713164Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T01:17:24.713164Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"l1u9/SkGUItdSnsh2kBwEr49sBHw0UQEXg2odt4N4l1MPNjFFp0UvE5+bqKHx6gHSEOGMRp0GeOiXDoZWQD8DQ==","signature_status":"signed_v1","signed_at":"2026-07-02T01:17:24.713667Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.04140","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:32d00cb78e0589f00f5c9b1a482a825d88da8187dc2abe0fae67817cc178c2f4","sha256:32185eda9e1a9ee760f804b5a47c0aecae81a56d4b895c0c4e24fa511d3b1e92"],"state_sha256":"3f46a65e4ca3128a4b8b45f4d6db8ac3c426f34acc3d0c81beb31851f87bdb10"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Mn5rC7KRex2PUoUMvc8veCLYyIneX/VCpqmwMaRUdFUt5AZTYugFxH2+Kqm4maZbTlfQ0cfySbQ7/t9R7WiTAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-02T08:56:12.266899Z","bundle_sha256":"732e7768252e9fcef95e207b8e954d95548e9c43ea0961d6a7444a2fd2031b35"}}