{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2SKSF422HJOT2BGNFJCMDNAQSJ","short_pith_number":"pith:2SKSF422","schema_version":"1.0","canonical_sha256":"d49522f35a3a5d3d04cd2a44c1b410926eb09a4894dc5b4778a16cff5ba8ea30","source":{"kind":"arxiv","id":"2602.03719","version":2},"attestation_state":"computed","paper":{"title":"BranPO: Scalable Contrastive Branch Sampling for Long-Horizon Agentic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chen Chen, Chengwei Qin, Ruochen Zhao, Sudong Wang, Weiquan Huang, Yao Shu, Yubao Zhao","submitted_at":"2026-02-03T16:43:09Z","abstract_excerpt":"Agentic reinforcement learning enables large language models to perform multi-turn planning and tool use, but long-horizon training remains challenging under sparse trajectory-level rewards, where a single outcome is uniformly assigned to all decisions. Prior methods introduce finer-grained supervision via tree-based exploration or process-level evaluation, but often incur high cost or produce noisy credit signals. In agentic trajectories, early mistakes may still be corrected by later actions, while seemingly promising intermediate states can fail due to poor subsequent decisions. We call thi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.03719","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-02-03T16:43:09Z","cross_cats_sorted":[],"title_canon_sha256":"2425ca290ad695f0d8b0bcb31d671122559005954766be411a3a70e030420a66","abstract_canon_sha256":"14e689e5c3a70e9cdf376328d664f1da3b41392e313af26303aee35b03051f08"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T03:04:37.990431Z","signature_b64":"Ir0BBpL4EslU3UH5dfAl23ibd99hmFXJ+ODwqoWicgvZY7c2/tt0ofCXabFjaqdReVbylwrk1Q6lpukpqns3BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d49522f35a3a5d3d04cd2a44c1b410926eb09a4894dc5b4778a16cff5ba8ea30","last_reissued_at":"2026-06-02T03:04:37.989921Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T03:04:37.989921Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"BranPO: Scalable Contrastive Branch Sampling for Long-Horizon Agentic Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chen Chen, Chengwei Qin, Ruochen Zhao, Sudong Wang, Weiquan Huang, Yao Shu, Yubao Zhao","submitted_at":"2026-02-03T16:43:09Z","abstract_excerpt":"Agentic reinforcement learning enables large language models to perform multi-turn planning and tool use, but long-horizon training remains challenging under sparse trajectory-level rewards, where a single outcome is uniformly assigned to all decisions. Prior methods introduce finer-grained supervision via tree-based exploration or process-level evaluation, but often incur high cost or produce noisy credit signals. In agentic trajectories, early mistakes may still be corrected by later actions, while seemingly promising intermediate states can fail due to poor subsequent decisions. We call thi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.03719","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.03719/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.03719","created_at":"2026-06-02T03:04:37.989987+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.03719v2","created_at":"2026-06-02T03:04:37.989987+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.03719","created_at":"2026-06-02T03:04:37.989987+00:00"},{"alias_kind":"pith_short_12","alias_value":"2SKSF422HJOT","created_at":"2026-06-02T03:04:37.989987+00:00"},{"alias_kind":"pith_short_16","alias_value":"2SKSF422HJOT2BGN","created_at":"2026-06-02T03:04:37.989987+00:00"},{"alias_kind":"pith_short_8","alias_value":"2SKSF422","created_at":"2026-06-02T03:04:37.989987+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ","json":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ.json","graph_json":"https://pith.science/api/pith-number/2SKSF422HJOT2BGNFJCMDNAQSJ/graph.json","events_json":"https://pith.science/api/pith-number/2SKSF422HJOT2BGNFJCMDNAQSJ/events.json","paper":"https://pith.science/paper/2SKSF422"},"agent_actions":{"view_html":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ","download_json":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ.json","view_paper":"https://pith.science/paper/2SKSF422","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.03719&json=true","fetch_graph":"https://pith.science/api/pith-number/2SKSF422HJOT2BGNFJCMDNAQSJ/graph.json","fetch_events":"https://pith.science/api/pith-number/2SKSF422HJOT2BGNFJCMDNAQSJ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ/action/storage_attestation","attest_author":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ/action/author_attestation","sign_citation":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ/action/citation_signature","submit_replication":"https://pith.science/pith/2SKSF422HJOT2BGNFJCMDNAQSJ/action/replication_record"}},"created_at":"2026-06-02T03:04:37.989987+00:00","updated_at":"2026-06-02T03:04:37.989987+00:00"}