{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:VFSZIZUGM3CRYZWD7NOGQM6KSB","short_pith_number":"pith:VFSZIZUG","schema_version":"1.0","canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","source":{"kind":"arxiv","id":"2509.16679","version":1},"attestation_state":"computed","paper":{"title":"Reinforcement Learning Meets Large Language Models: A Survey of Advancements and Applications Across the LLM Lifecycle","license":"","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dingkang Yang, Hongsheng Li, Jun Liu, Keliang Liu, Lihua Zhang, Peng Zhai, Weijie Yin, Yang Liu, Yuchi Wang, Ziyun Qian","submitted_at":"2025-09-20T13:11:28Z","abstract_excerpt":"In recent years, training methods centered on Reinforcement Learning (RL) have markedly enhanced the reasoning and alignment performance of Large Language Models (LLMs), particularly in understanding human intents, following user instructions, and bolstering inferential strength. Although existing surveys offer overviews of RL augmented LLMs, their scope is often limited, failing to provide a comprehensive summary of how RL operates across the full lifecycle of LLMs. We systematically review the theoretical and practical advancements whereby RL empowers LLMs, especially Reinforcement Learning "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2509.16679","kind":"arxiv","version":1},"metadata":{"license":"","primary_cat":"cs.CL","submitted_at":"2025-09-20T13:11:28Z","cross_cats_sorted":[],"title_canon_sha256":"04d9dba191602aa11aac11ee18a3864f6d8dfbc676909e99b9f294907ffd5376","abstract_canon_sha256":"0fab1bc352ab3d44443c76cf65d5f8cf7852182ca3089250dbbaf282d75880e9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T08:42:24.356629Z","signature_b64":"b4jZnbCPbHnRKMjpqWZd58BE2gH4QV+LEGS62lVBm/Xgpx9qGT4sDZgM6/1mHubaZzu54PU4WHc+UG4qXI2KDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","last_reissued_at":"2026-07-02T08:42:24.356126Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T08:42:24.356126Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Reinforcement Learning Meets Large Language Models: A Survey of Advancements and Applications Across the LLM Lifecycle","license":"","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dingkang Yang, Hongsheng Li, Jun Liu, Keliang Liu, Lihua Zhang, Peng Zhai, Weijie Yin, Yang Liu, Yuchi Wang, Ziyun Qian","submitted_at":"2025-09-20T13:11:28Z","abstract_excerpt":"In recent years, training methods centered on Reinforcement Learning (RL) have markedly enhanced the reasoning and alignment performance of Large Language Models (LLMs), particularly in understanding human intents, following user instructions, and bolstering inferential strength. Although existing surveys offer overviews of RL augmented LLMs, their scope is often limited, failing to provide a comprehensive summary of how RL operates across the full lifecycle of LLMs. We systematically review the theoretical and practical advancements whereby RL empowers LLMs, especially Reinforcement Learning "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.16679","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.16679/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2509.16679","created_at":"2026-07-02T08:42:24.356177+00:00"},{"alias_kind":"arxiv_version","alias_value":"2509.16679v1","created_at":"2026-07-02T08:42:24.356177+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.16679","created_at":"2026-07-02T08:42:24.356177+00:00"},{"alias_kind":"pith_short_12","alias_value":"VFSZIZUGM3CR","created_at":"2026-07-02T08:42:24.356177+00:00"},{"alias_kind":"pith_short_16","alias_value":"VFSZIZUGM3CRYZWD","created_at":"2026-07-02T08:42:24.356177+00:00"},{"alias_kind":"pith_short_8","alias_value":"VFSZIZUG","created_at":"2026-07-02T08:42:24.356177+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"2606.19771","citing_title":"Beyond Entropy: Learning from Token-Level Distributional Deviations for LLM Reasoning","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2606.18988","citing_title":"ThinkDeception: A Progressive Reinforcement Learning Framework for Interpretable Multimodal Deception Detection","ref_index":26,"is_internal_anchor":true},{"citing_arxiv_id":"2606.17735","citing_title":"Shattering the Autoregressive Curse: Dynamic Epistemic Entropy Orchestrated Erasable Reinforcement Learning for LLMs","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.28699","citing_title":"TRACER: Turn-level Regret Matching with Inner Reinforcement Credit for Cooperative Multi-LLM Reasoning","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2605.22263","citing_title":"Tailoring Teaching to Aptitude: Direction-Adaptive Self-Distillation for LLM Reasoning","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2510.04978","citing_title":"Aligning Perception, Reasoning, Modeling and Interaction: A Survey on Physical AI","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08905","citing_title":"StaRPO: Stability-Augmented Reinforcement Policy Optimization","ref_index":19,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02801","citing_title":"Reinforcement Learning for LLM-based Multi-Agent Systems through Orchestration Traces","ref_index":35,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB","json":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB.json","graph_json":"https://pith.science/api/pith-number/VFSZIZUGM3CRYZWD7NOGQM6KSB/graph.json","events_json":"https://pith.science/api/pith-number/VFSZIZUGM3CRYZWD7NOGQM6KSB/events.json","paper":"https://pith.science/paper/VFSZIZUG"},"agent_actions":{"view_html":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB","download_json":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB.json","view_paper":"https://pith.science/paper/VFSZIZUG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2509.16679&json=true","fetch_graph":"https://pith.science/api/pith-number/VFSZIZUGM3CRYZWD7NOGQM6KSB/graph.json","fetch_events":"https://pith.science/api/pith-number/VFSZIZUGM3CRYZWD7NOGQM6KSB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/action/storage_attestation","attest_author":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/action/author_attestation","sign_citation":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/action/citation_signature","submit_replication":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/action/replication_record"}},"created_at":"2026-07-02T08:42:24.356177+00:00","updated_at":"2026-07-02T08:42:24.356177+00:00"}