{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:QKCTUFGMB47LUBOW35PBYJLP5F","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"282094354a8e70b111328dc543fcf64a627759cd97e6b0bc28e3a1cebf0aa271","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-31T02:53:34Z","title_canon_sha256":"2f512fc9779b0c93f4c8b467059b759813fb12aa6fa46031a440714da0fbf9ae"},"schema_version":"1.0","source":{"id":"1801.10287","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1801.10287","created_at":"2026-05-18T00:24:41Z"},{"alias_kind":"arxiv_version","alias_value":"1801.10287v1","created_at":"2026-05-18T00:24:41Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1801.10287","created_at":"2026-05-18T00:24:41Z"},{"alias_kind":"pith_short_12","alias_value":"QKCTUFGMB47L","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"QKCTUFGMB47LUBOW","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"QKCTUFGM","created_at":"2026-05-18T12:32:46Z"}],"graph_snapshots":[{"event_id":"sha256:fab05f42fabe88bc20bfd5b8db6b3c9934dfb0129e0197e1ddecb291a55678e4","target":"graph","created_at":"2026-05-18T00:24:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"In this paper, we consider a modified version of the control problem in a model free Markov decision process (MDP) setting with large state and action spaces. The control problem most commonly addressed in the contemporary literature is to find an optimal policy which maximizes the value function, i.e., the long run discounted reward of the MDP. The current settings also assume access to a generative model of the MDP with the hidden premise that observations of the system behaviour in the form of sample trajectories can be obtained with ease from the model. In this paper, we consider a modifie","authors_text":"Ajin George Joseph, Shalabh Bhatnagar","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-31T02:53:34Z","title":"An Incremental Off-policy Search in a Model-free Markov Decision Process Using a Single Sample Path"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1801.10287","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9a2904eb9e7ddc86a06fbf982ea269ddaeb975036a508f42695bd4a1d2d1fc7e","target":"record","created_at":"2026-05-18T00:24:41Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"282094354a8e70b111328dc543fcf64a627759cd97e6b0bc28e3a1cebf0aa271","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-01-31T02:53:34Z","title_canon_sha256":"2f512fc9779b0c93f4c8b467059b759813fb12aa6fa46031a440714da0fbf9ae"},"schema_version":"1.0","source":{"id":"1801.10287","kind":"arxiv","version":1}},"canonical_sha256":"82853a14cc0f3eba05d6df5e1c256fe95657a112d158571448904c6ceb7329d8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"82853a14cc0f3eba05d6df5e1c256fe95657a112d158571448904c6ceb7329d8","first_computed_at":"2026-05-18T00:24:41.207762Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:24:41.207762Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"3WJzogQSTDFR0fspvjTxhmyobTjy8k+glyPKeAA2Fe+nX/MbWGPlxKMXhLQSfVbxIpwGOKW1RK0YTt0UJ7LRCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:24:41.208567Z","signed_message":"canonical_sha256_bytes"},"source_id":"1801.10287","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9a2904eb9e7ddc86a06fbf982ea269ddaeb975036a508f42695bd4a1d2d1fc7e","sha256:fab05f42fabe88bc20bfd5b8db6b3c9934dfb0129e0197e1ddecb291a55678e4"],"state_sha256":"d977806c59f2ab94d9e324f68dbdc55014db754fe93c92c9af3258ce8239a317"}