{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:CJLEFONXJJRN5SP7FUQYQHHQKJ","short_pith_number":"pith:CJLEFONX","schema_version":"1.0","canonical_sha256":"125642b9b74a62dec9ff2d21881cf0527094bc652a5ae4f925e300ec7387f9a6","source":{"kind":"arxiv","id":"2605.27765","version":1},"attestation_state":"computed","paper":{"title":"Restoring the Sweet Spot: Pass-Rate Weighted Self-Distillation for LLM Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jinghui Chen, Vasant G. Honavar, Yuanpu Cao, Zehao Liu","submitted_at":"2026-05-26T23:30:01Z","abstract_excerpt":"Self-Distillation Policy Optimization (SDPO) provides dense token-level credit assignment for reinforcement learning with large language models by leveraging the model's own feedback-conditioned predictions as a self-teacher. Unlike GRPO, however, whose group-relative advantage naturally concentrates learning on a sweet spot of intermediate-difficulty questions, SDPO's KL-based advantage lacks an implicit notion of difficulty awareness.\n  We analyze this gap through the lens of GRPO's advantage normalization. Extending the learnability framework to normalized rewards, we show that normalizatio"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.27765","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-26T23:30:01Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"823ce2018611d67546e1d93952525598a6d42498403957450540e0294d2221df","abstract_canon_sha256":"ceaee9abbb9eb708eb48f6f0ebcb14da28ec4d801a3bfce02ffbfb4982ed5c7c"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:48.330729Z","signature_b64":"54aT5sZuSYZrGl4JwoZSoaqP7kRkKcUtNqksz76RN/rellDwg3r6SPUYV316n1ax5TWMerT9CiY1QDIqKUj5Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"125642b9b74a62dec9ff2d21881cf0527094bc652a5ae4f925e300ec7387f9a6","last_reissued_at":"2026-05-28T01:04:48.330348Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:48.330348Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Restoring the Sweet Spot: Pass-Rate Weighted Self-Distillation for LLM Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jinghui Chen, Vasant G. Honavar, Yuanpu Cao, Zehao Liu","submitted_at":"2026-05-26T23:30:01Z","abstract_excerpt":"Self-Distillation Policy Optimization (SDPO) provides dense token-level credit assignment for reinforcement learning with large language models by leveraging the model's own feedback-conditioned predictions as a self-teacher. Unlike GRPO, however, whose group-relative advantage naturally concentrates learning on a sweet spot of intermediate-difficulty questions, SDPO's KL-based advantage lacks an implicit notion of difficulty awareness.\n  We analyze this gap through the lens of GRPO's advantage normalization. Extending the learnability framework to normalized rewards, we show that normalizatio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.27765","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.27765/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.27765","created_at":"2026-05-28T01:04:48.330403+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.27765v1","created_at":"2026-05-28T01:04:48.330403+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.27765","created_at":"2026-05-28T01:04:48.330403+00:00"},{"alias_kind":"pith_short_12","alias_value":"CJLEFONXJJRN","created_at":"2026-05-28T01:04:48.330403+00:00"},{"alias_kind":"pith_short_16","alias_value":"CJLEFONXJJRN5SP7","created_at":"2026-05-28T01:04:48.330403+00:00"},{"alias_kind":"pith_short_8","alias_value":"CJLEFONX","created_at":"2026-05-28T01:04:48.330403+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ","json":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ.json","graph_json":"https://pith.science/api/pith-number/CJLEFONXJJRN5SP7FUQYQHHQKJ/graph.json","events_json":"https://pith.science/api/pith-number/CJLEFONXJJRN5SP7FUQYQHHQKJ/events.json","paper":"https://pith.science/paper/CJLEFONX"},"agent_actions":{"view_html":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ","download_json":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ.json","view_paper":"https://pith.science/paper/CJLEFONX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.27765&json=true","fetch_graph":"https://pith.science/api/pith-number/CJLEFONXJJRN5SP7FUQYQHHQKJ/graph.json","fetch_events":"https://pith.science/api/pith-number/CJLEFONXJJRN5SP7FUQYQHHQKJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ/action/storage_attestation","attest_author":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ/action/author_attestation","sign_citation":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ/action/citation_signature","submit_replication":"https://pith.science/pith/CJLEFONXJJRN5SP7FUQYQHHQKJ/action/replication_record"}},"created_at":"2026-05-28T01:04:48.330403+00:00","updated_at":"2026-05-28T01:04:48.330403+00:00"}