{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:VFSZIZUGM3CRYZWD7NOGQM6KSB","short_pith_number":"pith:VFSZIZUG","canonical_record":{"source":{"id":"2509.16679","kind":"arxiv","version":1},"metadata":{"license":"","primary_cat":"cs.CL","submitted_at":"2025-09-20T13:11:28Z","cross_cats_sorted":[],"title_canon_sha256":"04d9dba191602aa11aac11ee18a3864f6d8dfbc676909e99b9f294907ffd5376","abstract_canon_sha256":"0fab1bc352ab3d44443c76cf65d5f8cf7852182ca3089250dbbaf282d75880e9"},"schema_version":"1.0"},"canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","source":{"kind":"arxiv","id":"2509.16679","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.16679","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"arxiv_version","alias_value":"2509.16679v1","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.16679","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"pith_short_12","alias_value":"VFSZIZUGM3CR","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"pith_short_16","alias_value":"VFSZIZUGM3CRYZWD","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"pith_short_8","alias_value":"VFSZIZUG","created_at":"2026-07-02T08:42:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:VFSZIZUGM3CRYZWD7NOGQM6KSB","target":"record","payload":{"canonical_record":{"source":{"id":"2509.16679","kind":"arxiv","version":1},"metadata":{"license":"","primary_cat":"cs.CL","submitted_at":"2025-09-20T13:11:28Z","cross_cats_sorted":[],"title_canon_sha256":"04d9dba191602aa11aac11ee18a3864f6d8dfbc676909e99b9f294907ffd5376","abstract_canon_sha256":"0fab1bc352ab3d44443c76cf65d5f8cf7852182ca3089250dbbaf282d75880e9"},"schema_version":"1.0"},"canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T08:42:24.356629Z","signature_b64":"b4jZnbCPbHnRKMjpqWZd58BE2gH4QV+LEGS62lVBm/Xgpx9qGT4sDZgM6/1mHubaZzu54PU4WHc+UG4qXI2KDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","last_reissued_at":"2026-07-02T08:42:24.356126Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T08:42:24.356126Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2509.16679","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T08:42:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"u4Pq+0wtCy4ceLXyKSdpyjfuabniH1DwuSSJ1b+PrUlmryzqQH5D7sKUgz/DBaKi1JxYDpKT8yXwhyXxBy04AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T20:58:07.209229Z"},"content_sha256":"16901db12bf2a2cf299d0f96d98bab6279a9a4c62059085c3017f794fc83d3be","schema_version":"1.0","event_id":"sha256:16901db12bf2a2cf299d0f96d98bab6279a9a4c62059085c3017f794fc83d3be"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:VFSZIZUGM3CRYZWD7NOGQM6KSB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning Meets Large Language Models: A Survey of Advancements and Applications Across the LLM Lifecycle","license":"","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dingkang Yang, Hongsheng Li, Jun Liu, Keliang Liu, Lihua Zhang, Peng Zhai, Weijie Yin, Yang Liu, Yuchi Wang, Ziyun Qian","submitted_at":"2025-09-20T13:11:28Z","abstract_excerpt":"In recent years, training methods centered on Reinforcement Learning (RL) have markedly enhanced the reasoning and alignment performance of Large Language Models (LLMs), particularly in understanding human intents, following user instructions, and bolstering inferential strength. Although existing surveys offer overviews of RL augmented LLMs, their scope is often limited, failing to provide a comprehensive summary of how RL operates across the full lifecycle of LLMs. We systematically review the theoretical and practical advancements whereby RL empowers LLMs, especially Reinforcement Learning "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.16679","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.16679/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-02T08:42:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Ay4KqHz0PC2k3NmAODQIckENgn7hMCsyyYdwvQiXJtnfpWog/3ipSgKYWTdXsCK80/32jXSuiCk1OqEmQ5UZDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-04T20:58:07.209615Z"},"content_sha256":"fa60dff39af31447f26fd0d366ce887985719f29f444c855b433f21ba94e8c25","schema_version":"1.0","event_id":"sha256:fa60dff39af31447f26fd0d366ce887985719f29f444c855b433f21ba94e8c25"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/bundle.json","state_url":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-04T20:58:07Z","links":{"resolver":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB","bundle":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/bundle.json","state":"https://pith.science/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VFSZIZUGM3CRYZWD7NOGQM6KSB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:VFSZIZUGM3CRYZWD7NOGQM6KSB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0fab1bc352ab3d44443c76cf65d5f8cf7852182ca3089250dbbaf282d75880e9","cross_cats_sorted":[],"license":"","primary_cat":"cs.CL","submitted_at":"2025-09-20T13:11:28Z","title_canon_sha256":"04d9dba191602aa11aac11ee18a3864f6d8dfbc676909e99b9f294907ffd5376"},"schema_version":"1.0","source":{"id":"2509.16679","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.16679","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"arxiv_version","alias_value":"2509.16679v1","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.16679","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"pith_short_12","alias_value":"VFSZIZUGM3CR","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"pith_short_16","alias_value":"VFSZIZUGM3CRYZWD","created_at":"2026-07-02T08:42:24Z"},{"alias_kind":"pith_short_8","alias_value":"VFSZIZUG","created_at":"2026-07-02T08:42:24Z"}],"graph_snapshots":[{"event_id":"sha256:fa60dff39af31447f26fd0d366ce887985719f29f444c855b433f21ba94e8c25","target":"graph","created_at":"2026-07-02T08:42:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2509.16679/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In recent years, training methods centered on Reinforcement Learning (RL) have markedly enhanced the reasoning and alignment performance of Large Language Models (LLMs), particularly in understanding human intents, following user instructions, and bolstering inferential strength. Although existing surveys offer overviews of RL augmented LLMs, their scope is often limited, failing to provide a comprehensive summary of how RL operates across the full lifecycle of LLMs. We systematically review the theoretical and practical advancements whereby RL empowers LLMs, especially Reinforcement Learning ","authors_text":"Dingkang Yang, Hongsheng Li, Jun Liu, Keliang Liu, Lihua Zhang, Peng Zhai, Weijie Yin, Yang Liu, Yuchi Wang, Ziyun Qian","cross_cats":[],"headline":"","license":"","primary_cat":"cs.CL","submitted_at":"2025-09-20T13:11:28Z","title":"Reinforcement Learning Meets Large Language Models: A Survey of Advancements and Applications Across the LLM Lifecycle"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.16679","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:16901db12bf2a2cf299d0f96d98bab6279a9a4c62059085c3017f794fc83d3be","target":"record","created_at":"2026-07-02T08:42:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0fab1bc352ab3d44443c76cf65d5f8cf7852182ca3089250dbbaf282d75880e9","cross_cats_sorted":[],"license":"","primary_cat":"cs.CL","submitted_at":"2025-09-20T13:11:28Z","title_canon_sha256":"04d9dba191602aa11aac11ee18a3864f6d8dfbc676909e99b9f294907ffd5376"},"schema_version":"1.0","source":{"id":"2509.16679","kind":"arxiv","version":1}},"canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a96594668666c51c66c3fb5c6833ca904b618369490b50e8f6e321fa58d6eb7e","first_computed_at":"2026-07-02T08:42:24.356126Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-02T08:42:24.356126Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"b4jZnbCPbHnRKMjpqWZd58BE2gH4QV+LEGS62lVBm/Xgpx9qGT4sDZgM6/1mHubaZzu54PU4WHc+UG4qXI2KDg==","signature_status":"signed_v1","signed_at":"2026-07-02T08:42:24.356629Z","signed_message":"canonical_sha256_bytes"},"source_id":"2509.16679","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:16901db12bf2a2cf299d0f96d98bab6279a9a4c62059085c3017f794fc83d3be","sha256:fa60dff39af31447f26fd0d366ce887985719f29f444c855b433f21ba94e8c25"],"state_sha256":"d5694f4be42ae56656505dfc32ebc29f3cc09fd761791b65396613c922370554"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IgUoAIFKAyZkFrEcnOz3uTGzMx1aYXWEK2hMVPUQjdZgZf9d3jyc5FJ7qw+MR0SF5LKBkngIc2X0W3jVpSyLBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-04T20:58:07.211539Z","bundle_sha256":"eb3a48a27fc8f3b8a26ac4c036ca00bb5cc817b0515d93724169757f99c0a83f"}}