{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:WFMWIHAPMG25JCPAX3GTQLQBCB","short_pith_number":"pith:WFMWIHAP","canonical_record":{"source":{"id":"2605.06638","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T17:48:42Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"cadbde38064688eda45f9f9931dbc3830cc800c0b2b88f2672c696b2c5e12ff0","abstract_canon_sha256":"cdfb6df287821caf8a26addbb8a2c859e647967bb784ed52a71031bd1bf95e5b"},"schema_version":"1.0"},"canonical_sha256":"b159641c0f61b5d489e0becd382e0110545ada71a70564aa4c8e21446ba06469","source":{"kind":"arxiv","id":"2605.06638","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.06638","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.06638v3","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.06638","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"WFMWIHAPMG25","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"WFMWIHAPMG25JCPA","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"WFMWIHAP","created_at":"2026-05-20T00:03:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:WFMWIHAPMG25JCPAX3GTQLQBCB","target":"record","payload":{"canonical_record":{"source":{"id":"2605.06638","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T17:48:42Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"cadbde38064688eda45f9f9931dbc3830cc800c0b2b88f2672c696b2c5e12ff0","abstract_canon_sha256":"cdfb6df287821caf8a26addbb8a2c859e647967bb784ed52a71031bd1bf95e5b"},"schema_version":"1.0"},"canonical_sha256":"b159641c0f61b5d489e0becd382e0110545ada71a70564aa4c8e21446ba06469","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:14.549662Z","signature_b64":"IgAE4i0GsL3NYVJpWHzZtJolutsjkttrQGB7botLDKGju3gZde3DbXL7OPsJGHT0GxP45Ifym/0jBsvn67orCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b159641c0f61b5d489e0becd382e0110545ada71a70564aa4c8e21446ba06469","last_reissued_at":"2026-05-20T00:03:14.548748Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:14.548748Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.06638","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"T+9IbVfyNcJvrfPcOiWETWyOyeSFoi081j1PmJ9EyocrmbEUHEf/fXvqLqq6iscsPatfL9vzv2Y/iGCylpN8AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T07:00:11.740616Z"},"content_sha256":"8fe79010559f070c517b94ade10fa774d3d597b42f16b770f2e0544c8b5332ee","schema_version":"1.0","event_id":"sha256:8fe79010559f070c517b94ade10fa774d3d597b42f16b770f2e0544c8b5332ee"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:WFMWIHAPMG25JCPAX3GTQLQBCB","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Can RL Teach Long-Horizon Reasoning to LLMs? Expressiveness Is Key","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"Reinforcement learning overcomes LLM long-horizon reasoning limits when training uses more expressive logic.","cross_cats":["cs.CL"],"primary_cat":"cs.AI","authors_text":"Abulhair Saparov, Guangchen Lan, Guanwen Qiu, Sipeng Zhang, Tianle Wang, Xinpeng Wei, Zhaoyang Wang","submitted_at":"2026-05-07T17:48:42Z","abstract_excerpt":"Reinforcement learning (RL) has been applied to improve large language model (LLM) reasoning, yet the systematic study of how training scales with task difficulty has been hampered by the lack of controlled, scalable environments. Observed LLM shortcomings in long-horizon reasoning have raised the prospect that they are fundamental to the autoregressive transformer architecture. To address this, we introduce ScaleLogic, a synthetic logical reasoning framework that offers independent control over two axes of difficulty: the depth of the required proof planning (i.e., the horizon) and the expres"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"LLM shortcomings in long-horizon reasoning are not fundamental to the underlying architecture, and can be addressed by improved training methodology and data.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That performance on the synthetic ScaleLogic tasks and their transfer to downstream benchmarks is a faithful proxy for the long-horizon reasoning difficulties encountered in real-world applications.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"RL training compute for logical reasoning follows a power law in proof depth whose exponent rises with logic expressiveness, and more expressive training yields larger gains on downstream benchmarks.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Reinforcement learning overcomes LLM long-horizon reasoning limits when training uses more expressive logic.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"cc1968ced6cc36fd7b85cecda14201d59a33c5f1c6c54575163f21e46f69e969"},"source":{"id":"2605.06638","kind":"arxiv","version":3},"verdict":{"id":"ec5134bf-9921-4393-9d8f-ceb9fb6f1521","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-12T03:05:35.344614Z","strongest_claim":"LLM shortcomings in long-horizon reasoning are not fundamental to the underlying architecture, and can be addressed by improved training methodology and data.","one_line_summary":"RL training compute for logical reasoning follows a power law in proof depth whose exponent rises with logic expressiveness, and more expressive training yields larger gains on downstream benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That performance on the synthetic ScaleLogic tasks and their transfer to downstream benchmarks is a faithful proxy for the long-horizon reasoning difficulties encountered in real-world applications.","pith_extraction_headline":"Reinforcement learning overcomes LLM long-horizon reasoning limits when training uses more expressive logic."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.06638/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T18:01:19.226776Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T12:32:15.301365Z","status":"completed","version":"1.0.0","findings_count":0}],"snapshot_sha256":"5c38e8c58ab8a9d6c5a32403e9960438879627bf5c1fecfdaeacac13652f9ddc"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"9a1ccd447b59b76bc0bb9128570eb18829217041a98062a3737881f652cb9601"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"ec5134bf-9921-4393-9d8f-ceb9fb6f1521"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:03:14Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"YKQqvPp750pj+gS4X6ECf9BM8vbVrrs4g9SktV/4g9jnE7U6WYQ0WsHtBd7GkN05kdOAYF90lyWTN0ZHr82QCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-22T07:00:11.741481Z"},"content_sha256":"ec1270b716f4d2c57084332fda5aaf48121f8c131ce504bc1de4332c5050e246","schema_version":"1.0","event_id":"sha256:ec1270b716f4d2c57084332fda5aaf48121f8c131ce504bc1de4332c5050e246"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WFMWIHAPMG25JCPAX3GTQLQBCB/bundle.json","state_url":"https://pith.science/pith/WFMWIHAPMG25JCPAX3GTQLQBCB/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WFMWIHAPMG25JCPAX3GTQLQBCB/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-22T07:00:11Z","links":{"resolver":"https://pith.science/pith/WFMWIHAPMG25JCPAX3GTQLQBCB","bundle":"https://pith.science/pith/WFMWIHAPMG25JCPAX3GTQLQBCB/bundle.json","state":"https://pith.science/pith/WFMWIHAPMG25JCPAX3GTQLQBCB/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WFMWIHAPMG25JCPAX3GTQLQBCB/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:WFMWIHAPMG25JCPAX3GTQLQBCB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"cdfb6df287821caf8a26addbb8a2c859e647967bb784ed52a71031bd1bf95e5b","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T17:48:42Z","title_canon_sha256":"cadbde38064688eda45f9f9931dbc3830cc800c0b2b88f2672c696b2c5e12ff0"},"schema_version":"1.0","source":{"id":"2605.06638","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.06638","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"arxiv_version","alias_value":"2605.06638v3","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.06638","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_12","alias_value":"WFMWIHAPMG25","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_16","alias_value":"WFMWIHAPMG25JCPA","created_at":"2026-05-20T00:03:14Z"},{"alias_kind":"pith_short_8","alias_value":"WFMWIHAP","created_at":"2026-05-20T00:03:14Z"}],"graph_snapshots":[{"event_id":"sha256:ec1270b716f4d2c57084332fda5aaf48121f8c131ce504bc1de4332c5050e246","target":"graph","created_at":"2026-05-20T00:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"LLM shortcomings in long-horizon reasoning are not fundamental to the underlying architecture, and can be addressed by improved training methodology and data."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That performance on the synthetic ScaleLogic tasks and their transfer to downstream benchmarks is a faithful proxy for the long-horizon reasoning difficulties encountered in real-world applications."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"RL training compute for logical reasoning follows a power law in proof depth whose exponent rises with logic expressiveness, and more expressive training yields larger gains on downstream benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Reinforcement learning overcomes LLM long-horizon reasoning limits when training uses more expressive logic."}],"snapshot_sha256":"cc1968ced6cc36fd7b85cecda14201d59a33c5f1c6c54575163f21e46f69e969"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"9a1ccd447b59b76bc0bb9128570eb18829217041a98062a3737881f652cb9601"},"integrity":{"available":true,"clean":true,"detectors_run":[{"findings_count":0,"name":"doi_title_agreement","ran_at":"2026-05-19T18:01:19.226776Z","status":"completed","version":"1.0.0"},{"findings_count":0,"name":"doi_compliance","ran_at":"2026-05-19T12:32:15.301365Z","status":"completed","version":"1.0.0"}],"endpoint":"/pith/2605.06638/integrity.json","findings":[],"snapshot_sha256":"5c38e8c58ab8a9d6c5a32403e9960438879627bf5c1fecfdaeacac13652f9ddc","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) has been applied to improve large language model (LLM) reasoning, yet the systematic study of how training scales with task difficulty has been hampered by the lack of controlled, scalable environments. Observed LLM shortcomings in long-horizon reasoning have raised the prospect that they are fundamental to the autoregressive transformer architecture. To address this, we introduce ScaleLogic, a synthetic logical reasoning framework that offers independent control over two axes of difficulty: the depth of the required proof planning (i.e., the horizon) and the expres","authors_text":"Abulhair Saparov, Guangchen Lan, Guanwen Qiu, Sipeng Zhang, Tianle Wang, Xinpeng Wei, Zhaoyang Wang","cross_cats":["cs.CL"],"headline":"Reinforcement learning overcomes LLM long-horizon reasoning limits when training uses more expressive logic.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T17:48:42Z","title":"Can RL Teach Long-Horizon Reasoning to LLMs? Expressiveness Is Key"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.06638","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-12T03:05:35.344614Z","id":"ec5134bf-9921-4393-9d8f-ceb9fb6f1521","model_set":{"reader":"grok-4.3"},"one_line_summary":"RL training compute for logical reasoning follows a power law in proof depth whose exponent rises with logic expressiveness, and more expressive training yields larger gains on downstream benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Reinforcement learning overcomes LLM long-horizon reasoning limits when training uses more expressive logic.","strongest_claim":"LLM shortcomings in long-horizon reasoning are not fundamental to the underlying architecture, and can be addressed by improved training methodology and data.","weakest_assumption":"That performance on the synthetic ScaleLogic tasks and their transfer to downstream benchmarks is a faithful proxy for the long-horizon reasoning difficulties encountered in real-world applications."}},"verdict_id":"ec5134bf-9921-4393-9d8f-ceb9fb6f1521"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:8fe79010559f070c517b94ade10fa774d3d597b42f16b770f2e0544c8b5332ee","target":"record","created_at":"2026-05-20T00:03:14Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"cdfb6df287821caf8a26addbb8a2c859e647967bb784ed52a71031bd1bf95e5b","cross_cats_sorted":["cs.CL"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-07T17:48:42Z","title_canon_sha256":"cadbde38064688eda45f9f9931dbc3830cc800c0b2b88f2672c696b2c5e12ff0"},"schema_version":"1.0","source":{"id":"2605.06638","kind":"arxiv","version":3}},"canonical_sha256":"b159641c0f61b5d489e0becd382e0110545ada71a70564aa4c8e21446ba06469","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b159641c0f61b5d489e0becd382e0110545ada71a70564aa4c8e21446ba06469","first_computed_at":"2026-05-20T00:03:14.548748Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:03:14.548748Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"IgAE4i0GsL3NYVJpWHzZtJolutsjkttrQGB7botLDKGju3gZde3DbXL7OPsJGHT0GxP45Ifym/0jBsvn67orCQ==","signature_status":"signed_v1","signed_at":"2026-05-20T00:03:14.549662Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.06638","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:8fe79010559f070c517b94ade10fa774d3d597b42f16b770f2e0544c8b5332ee","sha256:ec1270b716f4d2c57084332fda5aaf48121f8c131ce504bc1de4332c5050e246"],"state_sha256":"4b6343834a46b877ccc3e0fea3c71e6b3d6136e3c04f2af134df8dc3509a44a5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+gn+HQyBYdeVf0p0DA6z6I8g6fQozZbCg+LNf9cSsON9MBua5oIwxFE5KdK4nYVL5Xss/fEdLlmVx+ssUgJECw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-22T07:00:11.745561Z","bundle_sha256":"ca3aa6d79f2731289c6dfc925c41e987eb21f1aeed7a35e503616ed36c25c9fd"}}