{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:D5GPITUXFPKX7AHOYD4Z5BKRL6","short_pith_number":"pith:D5GPITUX","schema_version":"1.0","canonical_sha256":"1f4cf44e972bd57f80eec0f99e85515f9092e39ddc46ee853ee1cfb07983d060","source":{"kind":"arxiv","id":"2606.12908","version":1},"attestation_state":"computed","paper":{"title":"SENTINEL: Failure-Driven Reinforcement Learning for Training Tool-Using Language Model Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chen Luo, Dakuo Wang, Hanqing Lu, Jing Huang, Jiri Gesi, Manling Li, Qun Liu, Yimeng Zhang, Yisi Sang, Yuxuan Lu, Ziyi Wang","submitted_at":"2026-06-11T05:06:50Z","abstract_excerpt":"Language model agents are increasingly effective in solving realistic tasks through multi-turn tool use. However, training reliable tool-using agents remains challenging in practice. While reinforcement learning provides an on-policy paradigm for improving agents from their own environment interactions, its effectiveness depends heavily on the training task distribution. When tasks are fixed before training, the task distribution can become increasingly mismatched with the policy's evolving capabilities, causing many rollouts to be spent on uninformative tasks. We propose SENTINEL, a failure-d"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.12908","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-06-11T05:06:50Z","cross_cats_sorted":[],"title_canon_sha256":"1422bd741776ad668ae79e7c49d0f3560b141b7671e314a998484a1057b597ac","abstract_canon_sha256":"0a3c685b202ac0e32e158e33d756ef4cc912e7c6b84bb5ab8d7fec315e551710"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-12T01:09:34.110542Z","signature_b64":"JXZpXChvWawSOnfQkZo0NzcXZRcdaLFHdi5rtrKXNxyNV5pvQYhNg5afpZCol6jsxxRB0XLbPmXh39U/Kd4xCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1f4cf44e972bd57f80eec0f99e85515f9092e39ddc46ee853ee1cfb07983d060","last_reissued_at":"2026-06-12T01:09:34.109935Z","signature_status":"signed_v1","first_computed_at":"2026-06-12T01:09:34.109935Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"SENTINEL: Failure-Driven Reinforcement Learning for Training Tool-Using Language Model Agents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chen Luo, Dakuo Wang, Hanqing Lu, Jing Huang, Jiri Gesi, Manling Li, Qun Liu, Yimeng Zhang, Yisi Sang, Yuxuan Lu, Ziyi Wang","submitted_at":"2026-06-11T05:06:50Z","abstract_excerpt":"Language model agents are increasingly effective in solving realistic tasks through multi-turn tool use. However, training reliable tool-using agents remains challenging in practice. While reinforcement learning provides an on-policy paradigm for improving agents from their own environment interactions, its effectiveness depends heavily on the training task distribution. When tasks are fixed before training, the task distribution can become increasingly mismatched with the policy's evolving capabilities, causing many rollouts to be spent on uninformative tasks. We propose SENTINEL, a failure-d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.12908","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.12908/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.12908","created_at":"2026-06-12T01:09:34.110028+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.12908v1","created_at":"2026-06-12T01:09:34.110028+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.12908","created_at":"2026-06-12T01:09:34.110028+00:00"},{"alias_kind":"pith_short_12","alias_value":"D5GPITUXFPKX","created_at":"2026-06-12T01:09:34.110028+00:00"},{"alias_kind":"pith_short_16","alias_value":"D5GPITUXFPKX7AHO","created_at":"2026-06-12T01:09:34.110028+00:00"},{"alias_kind":"pith_short_8","alias_value":"D5GPITUX","created_at":"2026-06-12T01:09:34.110028+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6","json":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6.json","graph_json":"https://pith.science/api/pith-number/D5GPITUXFPKX7AHOYD4Z5BKRL6/graph.json","events_json":"https://pith.science/api/pith-number/D5GPITUXFPKX7AHOYD4Z5BKRL6/events.json","paper":"https://pith.science/paper/D5GPITUX"},"agent_actions":{"view_html":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6","download_json":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6.json","view_paper":"https://pith.science/paper/D5GPITUX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.12908&json=true","fetch_graph":"https://pith.science/api/pith-number/D5GPITUXFPKX7AHOYD4Z5BKRL6/graph.json","fetch_events":"https://pith.science/api/pith-number/D5GPITUXFPKX7AHOYD4Z5BKRL6/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6/action/timestamp_anchor","attest_storage":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6/action/storage_attestation","attest_author":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6/action/author_attestation","sign_citation":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6/action/citation_signature","submit_replication":"https://pith.science/pith/D5GPITUXFPKX7AHOYD4Z5BKRL6/action/replication_record"}},"created_at":"2026-06-12T01:09:34.110028+00:00","updated_at":"2026-06-12T01:09:34.110028+00:00"}