{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:NC3NA5XPVW4D3CFZ7KEPXKXP2W","short_pith_number":"pith:NC3NA5XP","canonical_record":{"source":{"id":"2512.12576","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-12-14T07:03:51Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f703f44e139aea11643ea1a4825af0d37463416942eac8664fcdf71999c4870d","abstract_canon_sha256":"2165143bef685ff0446864bd246b0ab82e868e8dca3bea0de3a9d2827b5f9570"},"schema_version":"1.0"},"canonical_sha256":"68b6d076efadb83d88b9fa88fbaaefd5b18f4a660f3352900c635de8034fcd7d","source":{"kind":"arxiv","id":"2512.12576","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.12576","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"arxiv_version","alias_value":"2512.12576v3","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.12576","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"pith_short_12","alias_value":"NC3NA5XPVW4D","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"pith_short_16","alias_value":"NC3NA5XPVW4D3CFZ","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"pith_short_8","alias_value":"NC3NA5XP","created_at":"2026-05-26T01:02:30Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:NC3NA5XPVW4D3CFZ7KEPXKXP2W","target":"record","payload":{"canonical_record":{"source":{"id":"2512.12576","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-12-14T07:03:51Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"f703f44e139aea11643ea1a4825af0d37463416942eac8664fcdf71999c4870d","abstract_canon_sha256":"2165143bef685ff0446864bd246b0ab82e868e8dca3bea0de3a9d2827b5f9570"},"schema_version":"1.0"},"canonical_sha256":"68b6d076efadb83d88b9fa88fbaaefd5b18f4a660f3352900c635de8034fcd7d","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:02:30.004286Z","signature_b64":"5TEUjVjdlElkafmgq5Udb+me7RxUgywaFz993UF5T4RLK7wBuTCOoHfmZgPMZyK3puEY/0tKADYybMSsrmS3Bw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"68b6d076efadb83d88b9fa88fbaaefd5b18f4a660f3352900c635de8034fcd7d","last_reissued_at":"2026-05-26T01:02:30.003171Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:02:30.003171Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2512.12576","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:02:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"P/uBM5RVt7HLYh1MWd4D4MjURhmw9jnpCEdIXmUDVGkeTI9WhTFAS9GkUhcogZFjaigJX3uqNnrObfSMnLKYBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T15:49:39.239458Z"},"content_sha256":"e5ccf4e2d3309f6c42c44167164d6ab52e9a321b12b42a62f2873a4f250965d3","schema_version":"1.0","event_id":"sha256:e5ccf4e2d3309f6c42c44167164d6ab52e9a321b12b42a62f2873a4f250965d3"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:NC3NA5XPVW4D3CFZ7KEPXKXP2W","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Coupled Variational Reinforcement Learning for Language Model General Reasoning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Ben He, Debing Zhang, Hongyu Lin, Jie Lou, Le Sun, Xianpei Han, Xueru Wen, Yanjiang Liu, Yaojie Lu","submitted_at":"2025-12-14T07:03:51Z","abstract_excerpt":"While reinforcement learning has achieved impressive progress in language model reasoning, it is constrained by the requirement for verifiable rewards. Recent verifier-free RL methods address this limitation by utilizing the probabilities that LLMs generate reference answers as reward signals. However, these approaches typically sample reasoning traces conditioned only on the question. This design decouples reasoning-trace sampling from answer information, leading to inefficient exploration and incoherence between traces and final answers. In this paper, we propose \\textit{\\b{Co}upled \\b{V}ari"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.12576","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.12576/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:02:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tvIsHyk8Q+HlEH97w0bEFl6ZZHJe2bpMbYS9sqWZxveY5uixq2cxrdwe7KuYyzTFGtrwriDnqHnbb2s88u9uDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T15:49:39.240176Z"},"content_sha256":"d2d10486b2015eb35f0f0ff017bc6d91126624473386fac4366049995dccd1c9","schema_version":"1.0","event_id":"sha256:d2d10486b2015eb35f0f0ff017bc6d91126624473386fac4366049995dccd1c9"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W/bundle.json","state_url":"https://pith.science/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T15:49:39Z","links":{"resolver":"https://pith.science/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W","bundle":"https://pith.science/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W/bundle.json","state":"https://pith.science/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W/state.json","well_known_bundle":"https://pith.science/.well-known/pith/NC3NA5XPVW4D3CFZ7KEPXKXP2W/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:NC3NA5XPVW4D3CFZ7KEPXKXP2W","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2165143bef685ff0446864bd246b0ab82e868e8dca3bea0de3a9d2827b5f9570","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-12-14T07:03:51Z","title_canon_sha256":"f703f44e139aea11643ea1a4825af0d37463416942eac8664fcdf71999c4870d"},"schema_version":"1.0","source":{"id":"2512.12576","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.12576","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"arxiv_version","alias_value":"2512.12576v3","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.12576","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"pith_short_12","alias_value":"NC3NA5XPVW4D","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"pith_short_16","alias_value":"NC3NA5XPVW4D3CFZ","created_at":"2026-05-26T01:02:30Z"},{"alias_kind":"pith_short_8","alias_value":"NC3NA5XP","created_at":"2026-05-26T01:02:30Z"}],"graph_snapshots":[{"event_id":"sha256:d2d10486b2015eb35f0f0ff017bc6d91126624473386fac4366049995dccd1c9","target":"graph","created_at":"2026-05-26T01:02:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.12576/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While reinforcement learning has achieved impressive progress in language model reasoning, it is constrained by the requirement for verifiable rewards. Recent verifier-free RL methods address this limitation by utilizing the probabilities that LLMs generate reference answers as reward signals. However, these approaches typically sample reasoning traces conditioned only on the question. This design decouples reasoning-trace sampling from answer information, leading to inefficient exploration and incoherence between traces and final answers. In this paper, we propose \\textit{\\b{Co}upled \\b{V}ari","authors_text":"Ben He, Debing Zhang, Hongyu Lin, Jie Lou, Le Sun, Xianpei Han, Xueru Wen, Yanjiang Liu, Yaojie Lu","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-12-14T07:03:51Z","title":"Coupled Variational Reinforcement Learning for Language Model General Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.12576","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e5ccf4e2d3309f6c42c44167164d6ab52e9a321b12b42a62f2873a4f250965d3","target":"record","created_at":"2026-05-26T01:02:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2165143bef685ff0446864bd246b0ab82e868e8dca3bea0de3a9d2827b5f9570","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2025-12-14T07:03:51Z","title_canon_sha256":"f703f44e139aea11643ea1a4825af0d37463416942eac8664fcdf71999c4870d"},"schema_version":"1.0","source":{"id":"2512.12576","kind":"arxiv","version":3}},"canonical_sha256":"68b6d076efadb83d88b9fa88fbaaefd5b18f4a660f3352900c635de8034fcd7d","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"68b6d076efadb83d88b9fa88fbaaefd5b18f4a660f3352900c635de8034fcd7d","first_computed_at":"2026-05-26T01:02:30.003171Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:02:30.003171Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5TEUjVjdlElkafmgq5Udb+me7RxUgywaFz993UF5T4RLK7wBuTCOoHfmZgPMZyK3puEY/0tKADYybMSsrmS3Bw==","signature_status":"signed_v1","signed_at":"2026-05-26T01:02:30.004286Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.12576","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e5ccf4e2d3309f6c42c44167164d6ab52e9a321b12b42a62f2873a4f250965d3","sha256:d2d10486b2015eb35f0f0ff017bc6d91126624473386fac4366049995dccd1c9"],"state_sha256":"042aefae046897cd201c83094dca0d76be06cfebe8d8c0b812a526e4e97b9abd"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mp4BRrMQAe2xrE913hvX472lUNsD89XVFLCMN++IS4eoSQP/m1KVK4kZtF6+watRjvlNpVllv+w174PYn793Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T15:49:39.243392Z","bundle_sha256":"a0ba1d1f90be223c716657c841220d136755414e013aa84d4dc9c91da155dcab"}}