{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:NGSJXOMNMWSRYBOIONSOZLABM2","short_pith_number":"pith:NGSJXOMN","schema_version":"1.0","canonical_sha256":"69a49bb98d65a51c05c87364ecac0166911b9f5d7392a1f05cefb9af1ee8984d","source":{"kind":"arxiv","id":"2606.00350","version":1},"attestation_state":"computed","paper":{"title":"Drift Q-Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Amin Abyaneh, Anas Houssaini, David Meger, Hsiu-Chin Lin, Mohamad H. Danesh, Scott Fujimoto","submitted_at":"2026-05-29T20:42:30Z","abstract_excerpt":"Offline reinforcement learning requires improving a policy from fixed data while avoiding out-of-distribution actions with unreliable value estimates. Diffusion and flow policies handle this trade-off by modeling the behavior distribution to regularize the RL objective, but they require iterative denoising, solver integrations, and in more efficient variants, distillation or other approximations at inference. We propose DriftQL, which combines a drift-based behavioral regularizer with critic-driven policy improvement. The value signal biases the policy toward high-value regions of the data sup"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.00350","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-29T20:42:30Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"159730241530d82c11d55bf15f4fcc8135ec2d0f651bd16920c344d30fbc1ccb","abstract_canon_sha256":"f5232ce9c71efcd465bd8fbd15e62221b367fda9094df7531fce86a678434235"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:52.344799Z","signature_b64":"SeEPOPNwQIASy4Cc7eItyHAIASLZ9uaTvTfYKgC5H+DzzPhYWTWewQUDafXKtuMZCag83JWtEpAcMc1FKIZcBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"69a49bb98d65a51c05c87364ecac0166911b9f5d7392a1f05cefb9af1ee8984d","last_reissued_at":"2026-06-02T01:03:52.344380Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:52.344380Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Drift Q-Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Amin Abyaneh, Anas Houssaini, David Meger, Hsiu-Chin Lin, Mohamad H. Danesh, Scott Fujimoto","submitted_at":"2026-05-29T20:42:30Z","abstract_excerpt":"Offline reinforcement learning requires improving a policy from fixed data while avoiding out-of-distribution actions with unreliable value estimates. Diffusion and flow policies handle this trade-off by modeling the behavior distribution to regularize the RL objective, but they require iterative denoising, solver integrations, and in more efficient variants, distillation or other approximations at inference. We propose DriftQL, which combines a drift-based behavioral regularizer with critic-driven policy improvement. The value signal biases the policy toward high-value regions of the data sup"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.00350","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.00350/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.00350","created_at":"2026-06-02T01:03:52.344437+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.00350v1","created_at":"2026-06-02T01:03:52.344437+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.00350","created_at":"2026-06-02T01:03:52.344437+00:00"},{"alias_kind":"pith_short_12","alias_value":"NGSJXOMNMWSR","created_at":"2026-06-02T01:03:52.344437+00:00"},{"alias_kind":"pith_short_16","alias_value":"NGSJXOMNMWSRYBOI","created_at":"2026-06-02T01:03:52.344437+00:00"},{"alias_kind":"pith_short_8","alias_value":"NGSJXOMN","created_at":"2026-06-02T01:03:52.344437+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2","json":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2.json","graph_json":"https://pith.science/api/pith-number/NGSJXOMNMWSRYBOIONSOZLABM2/graph.json","events_json":"https://pith.science/api/pith-number/NGSJXOMNMWSRYBOIONSOZLABM2/events.json","paper":"https://pith.science/paper/NGSJXOMN"},"agent_actions":{"view_html":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2","download_json":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2.json","view_paper":"https://pith.science/paper/NGSJXOMN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.00350&json=true","fetch_graph":"https://pith.science/api/pith-number/NGSJXOMNMWSRYBOIONSOZLABM2/graph.json","fetch_events":"https://pith.science/api/pith-number/NGSJXOMNMWSRYBOIONSOZLABM2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2/action/storage_attestation","attest_author":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2/action/author_attestation","sign_citation":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2/action/citation_signature","submit_replication":"https://pith.science/pith/NGSJXOMNMWSRYBOIONSOZLABM2/action/replication_record"}},"created_at":"2026-06-02T01:03:52.344437+00:00","updated_at":"2026-06-02T01:03:52.344437+00:00"}