{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BB3KHRPUCJFXTAJI6GA2AAYFMF","short_pith_number":"pith:BB3KHRPU","schema_version":"1.0","canonical_sha256":"0876a3c5f4124b798128f181a00305617ffcfe4b7b90564b7bb7725498a59c6c","source":{"kind":"arxiv","id":"2606.11087","version":1},"attestation_state":"computed","paper":{"title":"Test-Time Gradient Guidance of Flow Policies in Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Andy Peng, Charles Xu, Kevin Frans, Qiyang Li, Sergey Levine, Tobias Springenberg, Zhiyuan Zhou","submitted_at":"2026-06-09T16:45:57Z","abstract_excerpt":"Expressive continuous control policies, such as diffusion and flow models, form the backbone of recent advances in scaling imitation learning for simulated and real robot control. While they are known to scale stably in the supervised imitation learning setting, incorporating them into reinforcement learning (RL) pipelines for policy improvement has proven more difficult. It often requires specialized training objectives or backpropagating through denoising processes, which cause well-known issues with stability and affect scalability. In this paper we study the question of whether simple poli"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.11087","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2026-06-09T16:45:57Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"212debc647671fae4abd50e2b56649efe027cb05d02f6dd957485bc1bd2eb5b8","abstract_canon_sha256":"20c338a082f27094618eede52d4a8c0ccdacbadc270e7ad71a38ad07780318a6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-10T01:11:09.384512Z","signature_b64":"vt2c3rOjOOg/WfWLNk0MTw+aOQr4LsrDGX/Jeeoab7qAXndsAhTcsiawyIW+5O6SX2itvGVffJ4RqWGV5JPfCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0876a3c5f4124b798128f181a00305617ffcfe4b7b90564b7bb7725498a59c6c","last_reissued_at":"2026-06-10T01:11:09.383669Z","signature_status":"signed_v1","first_computed_at":"2026-06-10T01:11:09.383669Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Test-Time Gradient Guidance of Flow Policies in Reinforcement Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Andy Peng, Charles Xu, Kevin Frans, Qiyang Li, Sergey Levine, Tobias Springenberg, Zhiyuan Zhou","submitted_at":"2026-06-09T16:45:57Z","abstract_excerpt":"Expressive continuous control policies, such as diffusion and flow models, form the backbone of recent advances in scaling imitation learning for simulated and real robot control. While they are known to scale stably in the supervised imitation learning setting, incorporating them into reinforcement learning (RL) pipelines for policy improvement has proven more difficult. It often requires specialized training objectives or backpropagating through denoising processes, which cause well-known issues with stability and affect scalability. In this paper we study the question of whether simple poli"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11087","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.11087/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.11087","created_at":"2026-06-10T01:11:09.383805+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.11087v1","created_at":"2026-06-10T01:11:09.383805+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11087","created_at":"2026-06-10T01:11:09.383805+00:00"},{"alias_kind":"pith_short_12","alias_value":"BB3KHRPUCJFX","created_at":"2026-06-10T01:11:09.383805+00:00"},{"alias_kind":"pith_short_16","alias_value":"BB3KHRPUCJFXTAJI","created_at":"2026-06-10T01:11:09.383805+00:00"},{"alias_kind":"pith_short_8","alias_value":"BB3KHRPU","created_at":"2026-06-10T01:11:09.383805+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF","json":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF.json","graph_json":"https://pith.science/api/pith-number/BB3KHRPUCJFXTAJI6GA2AAYFMF/graph.json","events_json":"https://pith.science/api/pith-number/BB3KHRPUCJFXTAJI6GA2AAYFMF/events.json","paper":"https://pith.science/paper/BB3KHRPU"},"agent_actions":{"view_html":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF","download_json":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF.json","view_paper":"https://pith.science/paper/BB3KHRPU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.11087&json=true","fetch_graph":"https://pith.science/api/pith-number/BB3KHRPUCJFXTAJI6GA2AAYFMF/graph.json","fetch_events":"https://pith.science/api/pith-number/BB3KHRPUCJFXTAJI6GA2AAYFMF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF/action/storage_attestation","attest_author":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF/action/author_attestation","sign_citation":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF/action/citation_signature","submit_replication":"https://pith.science/pith/BB3KHRPUCJFXTAJI6GA2AAYFMF/action/replication_record"}},"created_at":"2026-06-10T01:11:09.383805+00:00","updated_at":"2026-06-10T01:11:09.383805+00:00"}