{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:RXQJYKANGCKEDALKV6S32L4T56","short_pith_number":"pith:RXQJYKAN","schema_version":"1.0","canonical_sha256":"8de09c280d309441816aafa5bd2f93efadb7cbb1e94c3c84a797ba13ec5ae5f2","source":{"kind":"arxiv","id":"2606.05859","version":1},"attestation_state":"computed","paper":{"title":"TARPO: Token-Wise Latent-Explicit Reasoning via Action-Routing Policy Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Jianye Wang, Liting Zhang, Qicheng Li, Shiwan Zhao, Xuyang Zhao, Zichen Xu","submitted_at":"2026-06-04T08:30:53Z","abstract_excerpt":"Latent reasoning has emerged as a promising alternative to discrete Chain-of-Thought (CoT) in large language models (LLMs), enabling more expressive reasoning by operating over continuous representations. However, the inherently deterministic nature of continuous representations limits policy exploration in reinforcement learning (RL). To address this, we propose TARPO (Token-Wise Latent-Explicit Reasoning via Action-Routing Policy Optimization), a pure RL framework that adaptively switches between discrete token generation and continuous latent reasoning at each step. TARPO introduces a light"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.05859","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-04T08:30:53Z","cross_cats_sorted":[],"title_canon_sha256":"6b262ef2c5e3f151ad366fa48e9bbe5ffee88c1850f58f98ff30be22bb85c367","abstract_canon_sha256":"8ffb63e128ee735fcafa78ddba68e4c6e4d3293d103bb13a75306cba79af9655"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-05T01:15:05.845667Z","signature_b64":"vYdg7jZxRw3/u5Gc5W1C7wwmAmCaD5w2eYV58bA0lxOe2/oHUEswERXEvYU5PsVFuTW3agLJZqUSt9/tM8ozBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8de09c280d309441816aafa5bd2f93efadb7cbb1e94c3c84a797ba13ec5ae5f2","last_reissued_at":"2026-06-05T01:15:05.845138Z","signature_status":"signed_v1","first_computed_at":"2026-06-05T01:15:05.845138Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"TARPO: Token-Wise Latent-Explicit Reasoning via Action-Routing Policy Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Jianye Wang, Liting Zhang, Qicheng Li, Shiwan Zhao, Xuyang Zhao, Zichen Xu","submitted_at":"2026-06-04T08:30:53Z","abstract_excerpt":"Latent reasoning has emerged as a promising alternative to discrete Chain-of-Thought (CoT) in large language models (LLMs), enabling more expressive reasoning by operating over continuous representations. However, the inherently deterministic nature of continuous representations limits policy exploration in reinforcement learning (RL). To address this, we propose TARPO (Token-Wise Latent-Explicit Reasoning via Action-Routing Policy Optimization), a pure RL framework that adaptively switches between discrete token generation and continuous latent reasoning at each step. TARPO introduces a light"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.05859","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.05859/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.05859","created_at":"2026-06-05T01:15:05.845217+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.05859v1","created_at":"2026-06-05T01:15:05.845217+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.05859","created_at":"2026-06-05T01:15:05.845217+00:00"},{"alias_kind":"pith_short_12","alias_value":"RXQJYKANGCKE","created_at":"2026-06-05T01:15:05.845217+00:00"},{"alias_kind":"pith_short_16","alias_value":"RXQJYKANGCKEDALK","created_at":"2026-06-05T01:15:05.845217+00:00"},{"alias_kind":"pith_short_8","alias_value":"RXQJYKAN","created_at":"2026-06-05T01:15:05.845217+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56","json":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56.json","graph_json":"https://pith.science/api/pith-number/RXQJYKANGCKEDALKV6S32L4T56/graph.json","events_json":"https://pith.science/api/pith-number/RXQJYKANGCKEDALKV6S32L4T56/events.json","paper":"https://pith.science/paper/RXQJYKAN"},"agent_actions":{"view_html":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56","download_json":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56.json","view_paper":"https://pith.science/paper/RXQJYKAN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.05859&json=true","fetch_graph":"https://pith.science/api/pith-number/RXQJYKANGCKEDALKV6S32L4T56/graph.json","fetch_events":"https://pith.science/api/pith-number/RXQJYKANGCKEDALKV6S32L4T56/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56/action/storage_attestation","attest_author":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56/action/author_attestation","sign_citation":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56/action/citation_signature","submit_replication":"https://pith.science/pith/RXQJYKANGCKEDALKV6S32L4T56/action/replication_record"}},"created_at":"2026-06-05T01:15:05.845217+00:00","updated_at":"2026-06-05T01:15:05.845217+00:00"}