{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:JDC3WCA4CAKSJML5K72JKMA6RN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6d6bada1857a010c54be59310cacc91998f790c783a34d057b3eaf47baef1e3b","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-10-02T09:28:13Z","title_canon_sha256":"5b89c03ae0a3d93d57208ec82269fe0ce899634cd42c6e721da6d859c58fb1e3"},"schema_version":"1.0","source":{"id":"2510.01833","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.01833","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"arxiv_version","alias_value":"2510.01833v2","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.01833","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_12","alias_value":"JDC3WCA4CAKS","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_16","alias_value":"JDC3WCA4CAKSJML5","created_at":"2026-05-27T01:04:51Z"},{"alias_kind":"pith_short_8","alias_value":"JDC3WCA4","created_at":"2026-05-27T01:04:51Z"}],"graph_snapshots":[{"event_id":"sha256:83b0b0bc04e28f2702f43b02282d6f62186fbfa64d8a9804459b27ce86f182d2","target":"graph","created_at":"2026-05-27T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.01833/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models (LLMs) demonstrate strong reasoning abilities via Chain-of-Thought (CoT), but their token-level generation encourages local decisions and lacks global planning, often leading to redundant or inaccurate reasoning. Existing methods, such as tree-based search and reinforcement learning (RL), attempt to address this issue but incur high computational costs and still struggle to produce reliable reasoning trajectories. To address these challenges, we propose Plan-Then-Action Enhanced Reasoning with Group Relative Policy Optimization (PTA-GRPO), a two-stage framework designed t","authors_text":"Benteng Chen, Chaoda Song, Dinggen Zhang, Qingtao Pan, Qinjian Zhao, Shufei Zhang, Sumon Biswas, Towsif Raiyan, Weida Wang, Yang Ouyang, Zhihao Dou, Zhiqiang Gao, Zhongwei Wan","cross_cats":["cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-10-02T09:28:13Z","title":"Plan Then Action:High-Level Planning Guidance Reinforcement Learning for LLM Reasoning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.01833","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5a54436060a33774feb93e9ae5cd967598d11e95b0413233ec8ea06fb29d84af","target":"record","created_at":"2026-05-27T01:04:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6d6bada1857a010c54be59310cacc91998f790c783a34d057b3eaf47baef1e3b","cross_cats_sorted":["cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2025-10-02T09:28:13Z","title_canon_sha256":"5b89c03ae0a3d93d57208ec82269fe0ce899634cd42c6e721da6d859c58fb1e3"},"schema_version":"1.0","source":{"id":"2510.01833","kind":"arxiv","version":2}},"canonical_sha256":"48c5bb081c101524b17d57f495301e8b67e5735fb18608f211ff74beddc4466b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"48c5bb081c101524b17d57f495301e8b67e5735fb18608f211ff74beddc4466b","first_computed_at":"2026-05-27T01:04:51.796881Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:04:51.796881Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gJ9QfTkruNQO2HRyPogAmlsEsmiqkIe7Fd/hPekgNYkcp8OVd+/Cn8dVd+NSZQtVIN31q71o10nzxKtQ6T6lDQ==","signature_status":"signed_v1","signed_at":"2026-05-27T01:04:51.797585Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.01833","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5a54436060a33774feb93e9ae5cd967598d11e95b0413233ec8ea06fb29d84af","sha256:83b0b0bc04e28f2702f43b02282d6f62186fbfa64d8a9804459b27ce86f182d2"],"state_sha256":"8487b4f7f2145201d85ce65eee4df7541610789126520e15a1b7ba46609b6446"}