{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:PWK3NMHMKKFQRA4TWJJIPVKOJA","short_pith_number":"pith:PWK3NMHM","canonical_record":{"source":{"id":"2606.03021","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T01:55:54Z","cross_cats_sorted":[],"title_canon_sha256":"e7ae8e42101178225bf375156f86c5b3011a133635405a127d3cfbb9ecd3ff53","abstract_canon_sha256":"7c11bf83a396d8841ff5c3fbcd213c11c4fc4cef432a2dfe8da6856e1cbb74f4"},"schema_version":"1.0"},"canonical_sha256":"7d95b6b0ec528b088393b25287d54e48330846a512da1310d6cea4c60316aabb","source":{"kind":"arxiv","id":"2606.03021","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03021","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03021v1","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03021","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"pith_short_12","alias_value":"PWK3NMHMKKFQ","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"pith_short_16","alias_value":"PWK3NMHMKKFQRA4T","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"pith_short_8","alias_value":"PWK3NMHM","created_at":"2026-06-03T01:05:29Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:PWK3NMHMKKFQRA4TWJJIPVKOJA","target":"record","payload":{"canonical_record":{"source":{"id":"2606.03021","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T01:55:54Z","cross_cats_sorted":[],"title_canon_sha256":"e7ae8e42101178225bf375156f86c5b3011a133635405a127d3cfbb9ecd3ff53","abstract_canon_sha256":"7c11bf83a396d8841ff5c3fbcd213c11c4fc4cef432a2dfe8da6856e1cbb74f4"},"schema_version":"1.0"},"canonical_sha256":"7d95b6b0ec528b088393b25287d54e48330846a512da1310d6cea4c60316aabb","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:29.464783Z","signature_b64":"NlpkXXi25Ab+L98Qn17Mr690GAsCOQFVyy5iDk/hZNR+8DRGdTkbfTFR6O/E5/F2xg75699J5MxARyLNK2t9CA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7d95b6b0ec528b088393b25287d54e48330846a512da1310d6cea4c60316aabb","last_reissued_at":"2026-06-03T01:05:29.464344Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:29.464344Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.03021","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T01:05:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jnGCg7RzU2iYVydPmdl5q6R4f5utdlt7QHHr4jZdzAGBycbyVXtIfs+aUfhGclYCzfu9Vb5CfSvCy725ONEcDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T18:13:34.839935Z"},"content_sha256":"a5d218e6ef939d31c76da333da4ce04298594836bf5389bc89c58170f69f4f84","schema_version":"1.0","event_id":"sha256:a5d218e6ef939d31c76da333da4ce04298594836bf5389bc89c58170f69f4f84"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:PWK3NMHMKKFQRA4TWJJIPVKOJA","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Hint-Guided Diversified Policy Optimization for LLM Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Can Ye, Kaixin Wu, Mingjie Zhong, Peifeng Li, Qiaoming Zhu, Xiaobo Li, Zhiyu Cao","submitted_at":"2026-06-02T01:55:54Z","abstract_excerpt":"Recent developments in Large Language Models (LLMs) have showcased impressive reasoning capabilities, with Reinforcement Learning with Verifiable Rewards (RLVR) being a promising enhancement strategy. However, existing reward mechanisms are constrained to the outcome-level correctness and lack explicit signals to guide the model to consider diverse solutions. In contrast, human problem solving typically involves evaluating multiple potential approaches and selecting the most reliable solution, a cognitive process that current RLVR frameworks do not explicitly incentivize. Inspired by this, we "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03021","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.03021/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-03T01:05:29Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DJs10eRanKtaHZjcu2vsy18qVhK47cPbTeXY98yWawukDtuVFvJY8UTgFYaNxsxn5XCsKpB7Jk1UEtlcoXDDAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T18:13:34.840328Z"},"content_sha256":"6f7cf347fec8ad78ca8a44ab52f421a2d4566f1d1a6b4683108a9d47589faaa7","schema_version":"1.0","event_id":"sha256:6f7cf347fec8ad78ca8a44ab52f421a2d4566f1d1a6b4683108a9d47589faaa7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA/bundle.json","state_url":"https://pith.science/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T18:13:34Z","links":{"resolver":"https://pith.science/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA","bundle":"https://pith.science/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA/bundle.json","state":"https://pith.science/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA/state.json","well_known_bundle":"https://pith.science/.well-known/pith/PWK3NMHMKKFQRA4TWJJIPVKOJA/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:PWK3NMHMKKFQRA4TWJJIPVKOJA","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"7c11bf83a396d8841ff5c3fbcd213c11c4fc4cef432a2dfe8da6856e1cbb74f4","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T01:55:54Z","title_canon_sha256":"e7ae8e42101178225bf375156f86c5b3011a133635405a127d3cfbb9ecd3ff53"},"schema_version":"1.0","source":{"id":"2606.03021","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.03021","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"arxiv_version","alias_value":"2606.03021v1","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.03021","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"pith_short_12","alias_value":"PWK3NMHMKKFQ","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"pith_short_16","alias_value":"PWK3NMHMKKFQRA4T","created_at":"2026-06-03T01:05:29Z"},{"alias_kind":"pith_short_8","alias_value":"PWK3NMHM","created_at":"2026-06-03T01:05:29Z"}],"graph_snapshots":[{"event_id":"sha256:6f7cf347fec8ad78ca8a44ab52f421a2d4566f1d1a6b4683108a9d47589faaa7","target":"graph","created_at":"2026-06-03T01:05:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.03021/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Recent developments in Large Language Models (LLMs) have showcased impressive reasoning capabilities, with Reinforcement Learning with Verifiable Rewards (RLVR) being a promising enhancement strategy. However, existing reward mechanisms are constrained to the outcome-level correctness and lack explicit signals to guide the model to consider diverse solutions. In contrast, human problem solving typically involves evaluating multiple potential approaches and selecting the most reliable solution, a cognitive process that current RLVR frameworks do not explicitly incentivize. Inspired by this, we ","authors_text":"Can Ye, Kaixin Wu, Mingjie Zhong, Peifeng Li, Qiaoming Zhu, Xiaobo Li, Zhiyu Cao","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T01:55:54Z","title":"Hint-Guided Diversified Policy Optimization for LLM Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.03021","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a5d218e6ef939d31c76da333da4ce04298594836bf5389bc89c58170f69f4f84","target":"record","created_at":"2026-06-03T01:05:29Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"7c11bf83a396d8841ff5c3fbcd213c11c4fc4cef432a2dfe8da6856e1cbb74f4","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-02T01:55:54Z","title_canon_sha256":"e7ae8e42101178225bf375156f86c5b3011a133635405a127d3cfbb9ecd3ff53"},"schema_version":"1.0","source":{"id":"2606.03021","kind":"arxiv","version":1}},"canonical_sha256":"7d95b6b0ec528b088393b25287d54e48330846a512da1310d6cea4c60316aabb","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7d95b6b0ec528b088393b25287d54e48330846a512da1310d6cea4c60316aabb","first_computed_at":"2026-06-03T01:05:29.464344Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-03T01:05:29.464344Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"NlpkXXi25Ab+L98Qn17Mr690GAsCOQFVyy5iDk/hZNR+8DRGdTkbfTFR6O/E5/F2xg75699J5MxARyLNK2t9CA==","signature_status":"signed_v1","signed_at":"2026-06-03T01:05:29.464783Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.03021","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a5d218e6ef939d31c76da333da4ce04298594836bf5389bc89c58170f69f4f84","sha256:6f7cf347fec8ad78ca8a44ab52f421a2d4566f1d1a6b4683108a9d47589faaa7"],"state_sha256":"23a3b4b808dfba1df50155130ba7cef17838ed520334c71569e12063447d1c2d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NrVgsnQ3orG0Qpj2z/cYaksaQpUAjJQEB59ElY8Gam77eO8jOABvQ6Fxi3b1CzG8KhUQgpaHn11Nb6zixSKOBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T18:13:34.842398Z","bundle_sha256":"4583dc0c86b5ca47836c84bb8310c453525ba22b3304d464c64727f41a593987"}}