{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2011:L6OAUC6SKFSGZAFUIAUAM6DX4Z","short_pith_number":"pith:L6OAUC6S","schema_version":"1.0","canonical_sha256":"5f9c0a0bd251646c80b44028067877e66c11be4bc4cdbfd86092c0cb1512ed9c","source":{"kind":"arxiv","id":"1108.5338","version":1},"attestation_state":"computed","paper":{"title":"Penalized Q-Learning for Dynamic Treatment Regimes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ME","authors_text":"Donglin Zeng, Michael R. Kosorok, Rui Song, Weiwei Wang","submitted_at":"2011-08-26T15:54:38Z","abstract_excerpt":"A dynamic treatment regime effectively incorporates both accrued information and long-term effects of treatment from specially designed clinical trials. As these become more and more popular in conjunction with longitudinal data from clinical studies, the development of statistical inference for optimal dynamic treatment regimes is a high priority. This is very challenging due to the difficulties arising form non-regularities in the treatment effect parameters. In this paper, we propose a new reinforcement learning framework called penalized Q-learning (PQ-learning), under which the non-regula"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1108.5338","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"stat.ME","submitted_at":"2011-08-26T15:54:38Z","cross_cats_sorted":[],"title_canon_sha256":"95c8d3894699c2d9024e46de0854bdd675a30c8239b6ee11332a88c2b36d230e","abstract_canon_sha256":"c205658f064d11167e4c395d52b6ef52f954b608dfb9cecbad3116e87dbeb7a9"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T04:14:38.506539Z","signature_b64":"qgiUpSjONjPWA2n3rgQqOgeTJJDZI4CSyQS7Ke46kEmiaoO7jrDJ4k3ob/mSI15ywSdbImtl1/BAoIXYosahAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5f9c0a0bd251646c80b44028067877e66c11be4bc4cdbfd86092c0cb1512ed9c","last_reissued_at":"2026-05-18T04:14:38.506116Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T04:14:38.506116Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Penalized Q-Learning for Dynamic Treatment Regimes","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"stat.ME","authors_text":"Donglin Zeng, Michael R. Kosorok, Rui Song, Weiwei Wang","submitted_at":"2011-08-26T15:54:38Z","abstract_excerpt":"A dynamic treatment regime effectively incorporates both accrued information and long-term effects of treatment from specially designed clinical trials. As these become more and more popular in conjunction with longitudinal data from clinical studies, the development of statistical inference for optimal dynamic treatment regimes is a high priority. This is very challenging due to the difficulties arising form non-regularities in the treatment effect parameters. In this paper, we propose a new reinforcement learning framework called penalized Q-learning (PQ-learning), under which the non-regula"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1108.5338","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1108.5338","created_at":"2026-05-18T04:14:38.506175+00:00"},{"alias_kind":"arxiv_version","alias_value":"1108.5338v1","created_at":"2026-05-18T04:14:38.506175+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1108.5338","created_at":"2026-05-18T04:14:38.506175+00:00"},{"alias_kind":"pith_short_12","alias_value":"L6OAUC6SKFSG","created_at":"2026-05-18T12:26:34.985390+00:00"},{"alias_kind":"pith_short_16","alias_value":"L6OAUC6SKFSGZAFU","created_at":"2026-05-18T12:26:34.985390+00:00"},{"alias_kind":"pith_short_8","alias_value":"L6OAUC6S","created_at":"2026-05-18T12:26:34.985390+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z","json":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z.json","graph_json":"https://pith.science/api/pith-number/L6OAUC6SKFSGZAFUIAUAM6DX4Z/graph.json","events_json":"https://pith.science/api/pith-number/L6OAUC6SKFSGZAFUIAUAM6DX4Z/events.json","paper":"https://pith.science/paper/L6OAUC6S"},"agent_actions":{"view_html":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z","download_json":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z.json","view_paper":"https://pith.science/paper/L6OAUC6S","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1108.5338&json=true","fetch_graph":"https://pith.science/api/pith-number/L6OAUC6SKFSGZAFUIAUAM6DX4Z/graph.json","fetch_events":"https://pith.science/api/pith-number/L6OAUC6SKFSGZAFUIAUAM6DX4Z/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z/action/timestamp_anchor","attest_storage":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z/action/storage_attestation","attest_author":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z/action/author_attestation","sign_citation":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z/action/citation_signature","submit_replication":"https://pith.science/pith/L6OAUC6SKFSGZAFUIAUAM6DX4Z/action/replication_record"}},"created_at":"2026-05-18T04:14:38.506175+00:00","updated_at":"2026-05-18T04:14:38.506175+00:00"}