{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MBEA3IRIPZGAI6YGL2UOWP2Z7B","short_pith_number":"pith:MBEA3IRI","schema_version":"1.0","canonical_sha256":"60480da2287e4c047b065ea8eb3f59f859321b00b0708acd5ac27fbcccb583cb","source":{"kind":"arxiv","id":"2606.10646","version":1},"attestation_state":"computed","paper":{"title":"How Does Reasoning Flow? Tracing Attention-Induced Information Flow for Targeted RL in LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Bo Zheng, Chao Yang, Junchi Yan, Taiheng Ye, Weixun Wang, Wenbo Su, Yang Li, Yijia Luo, Yu Cheng, Yuhan Sun, Zhichen Dong, Zinian Peng","submitted_at":"2026-06-09T09:56:51Z","abstract_excerpt":"Token-level credit assignment remains a key obstacle for reinforcement learning (RL) in large language models (LLMs), where RL recipes typically treat all tokens equally, failing to distinguish decisive reasoning steps from routine formatting or fluent filler. Recent attempts leverage model-internal signals to assign finer-grained credit, but these are often point-wise heuristics that ignore the global structure of information propagation. We propose FlowTracer, an RL framework that traces answer-targeted reasoning flow on an attention-induced directed acyclic graph in which nodes correspond t"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.10646","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-09T09:56:51Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"e3c8c20cd356961f09a3a3bbaf5a63d0a68767eb5b9b36df575b6a1ca8c819af","abstract_canon_sha256":"fb08e16e643868bf3cca906a5eb3841c87264b2cc949337724dcdb2c3afbc0c0"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:10:31.622194Z","signature_b64":"1pkA8djO2RUUOQD56VxLL6NFdpGZjs40p3aOUHoXO96J9wIdTuhPLbaSHoVCMOVH5OZHSDRctA+oqxJg5dXBCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"60480da2287e4c047b065ea8eb3f59f859321b00b0708acd5ac27fbcccb583cb","last_reissued_at":"2026-06-10T01:10:31.621354Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:10:31.621354Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"How Does Reasoning Flow? Tracing Attention-Induced Information Flow for Targeted RL in LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.LG","authors_text":"Bo Zheng, Chao Yang, Junchi Yan, Taiheng Ye, Weixun Wang, Wenbo Su, Yang Li, Yijia Luo, Yu Cheng, Yuhan Sun, Zhichen Dong, Zinian Peng","submitted_at":"2026-06-09T09:56:51Z","abstract_excerpt":"Token-level credit assignment remains a key obstacle for reinforcement learning (RL) in large language models (LLMs), where RL recipes typically treat all tokens equally, failing to distinguish decisive reasoning steps from routine formatting or fluent filler. Recent attempts leverage model-internal signals to assign finer-grained credit, but these are often point-wise heuristics that ignore the global structure of information propagation. We propose FlowTracer, an RL framework that traces answer-targeted reasoning flow on an attention-induced directed acyclic graph in which nodes correspond t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.10646","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.10646/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.10646","created_at":"2026-06-10T01:10:31.621489+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.10646v1","created_at":"2026-06-10T01:10:31.621489+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.10646","created_at":"2026-06-10T01:10:31.621489+00:00"},{"alias_kind":"pith_short_12","alias_value":"MBEA3IRIPZGA","created_at":"2026-06-10T01:10:31.621489+00:00"},{"alias_kind":"pith_short_16","alias_value":"MBEA3IRIPZGAI6YG","created_at":"2026-06-10T01:10:31.621489+00:00"},{"alias_kind":"pith_short_8","alias_value":"MBEA3IRI","created_at":"2026-06-10T01:10:31.621489+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B","json":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B.json","graph_json":"https://pith.science/api/pith-number/MBEA3IRIPZGAI6YGL2UOWP2Z7B/graph.json","events_json":"https://pith.science/api/pith-number/MBEA3IRIPZGAI6YGL2UOWP2Z7B/events.json","paper":"https://pith.science/paper/MBEA3IRI"},"agent_actions":{"view_html":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B","download_json":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B.json","view_paper":"https://pith.science/paper/MBEA3IRI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.10646&json=true","fetch_graph":"https://pith.science/api/pith-number/MBEA3IRIPZGAI6YGL2UOWP2Z7B/graph.json","fetch_events":"https://pith.science/api/pith-number/MBEA3IRIPZGAI6YGL2UOWP2Z7B/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B/action/storage_attestation","attest_author":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B/action/author_attestation","sign_citation":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B/action/citation_signature","submit_replication":"https://pith.science/pith/MBEA3IRIPZGAI6YGL2UOWP2Z7B/action/replication_record"}},"created_at":"2026-06-10T01:10:31.621489+00:00","updated_at":"2026-06-10T01:10:31.621489+00:00"}