{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:QC3QNDPXUC2LDSFYSZ7V3YS3US","short_pith_number":"pith:QC3QNDPX","schema_version":"1.0","canonical_sha256":"80b7068df7a0b4b1c8b8967f5de25ba4a673dcb428ab83d9284c230fe753bc78","source":{"kind":"arxiv","id":"1906.09114","version":2},"attestation_state":"computed","paper":{"title":"Near-optimal Bayesian Solution For Unknown Discrete Markov Decision Process","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.GT","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aristide Tossou, Christos Dimitrakakis, Debabrota Basu","submitted_at":"2019-06-20T06:32:36Z","abstract_excerpt":"We tackle the problem of acting in an unknown finite and discrete Markov Decision Process (MDP) for which the expected shortest path from any state to any other state is bounded by a finite number $D$. An MDP consists of $S$ states and $A$ possible actions per state. Upon choosing an action $a_t$ at state $s_t$, one receives a real value reward $r_t$, then one transits to a next state $s_{t+1}$. The reward $r_t$ is generated from a fixed reward distribution depending only on $(s_t, a_t)$ and similarly, the next state $s_{t+1}$ is generated from a fixed transition distribution depending only on"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.09114","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-20T06:32:36Z","cross_cats_sorted":["cs.AI","cs.GT","stat.ML"],"title_canon_sha256":"ef7daea42c2be913150a0755e8577878c5b103a0faff2cd287295212e7c729dd","abstract_canon_sha256":"789f281accb678d5f8fe93e1d1782989999d6651116b89714ad5615c247beda1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:41:02.131359Z","signature_b64":"FnF366IpFvAsUIFBfkyXBT6UnlEKA6GfCCarB6GbfrWWhk8EtuT2oEZ6EaL7qaYSZARP25jY+MbbNngfXQudCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"80b7068df7a0b4b1c8b8967f5de25ba4a673dcb428ab83d9284c230fe753bc78","last_reissued_at":"2026-05-17T23:41:02.130724Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:41:02.130724Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Near-optimal Bayesian Solution For Unknown Discrete Markov Decision Process","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.GT","stat.ML"],"primary_cat":"cs.LG","authors_text":"Aristide Tossou, Christos Dimitrakakis, Debabrota Basu","submitted_at":"2019-06-20T06:32:36Z","abstract_excerpt":"We tackle the problem of acting in an unknown finite and discrete Markov Decision Process (MDP) for which the expected shortest path from any state to any other state is bounded by a finite number $D$. An MDP consists of $S$ states and $A$ possible actions per state. Upon choosing an action $a_t$ at state $s_t$, one receives a real value reward $r_t$, then one transits to a next state $s_{t+1}$. The reward $r_t$ is generated from a fixed reward distribution depending only on $(s_t, a_t)$ and similarly, the next state $s_{t+1}$ is generated from a fixed transition distribution depending only on"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.09114","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.09114","created_at":"2026-05-17T23:41:02.130829+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.09114v2","created_at":"2026-05-17T23:41:02.130829+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.09114","created_at":"2026-05-17T23:41:02.130829+00:00"},{"alias_kind":"pith_short_12","alias_value":"QC3QNDPXUC2L","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_16","alias_value":"QC3QNDPXUC2LDSFY","created_at":"2026-05-18T12:33:27.125529+00:00"},{"alias_kind":"pith_short_8","alias_value":"QC3QNDPX","created_at":"2026-05-18T12:33:27.125529+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US","json":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US.json","graph_json":"https://pith.science/api/pith-number/QC3QNDPXUC2LDSFYSZ7V3YS3US/graph.json","events_json":"https://pith.science/api/pith-number/QC3QNDPXUC2LDSFYSZ7V3YS3US/events.json","paper":"https://pith.science/paper/QC3QNDPX"},"agent_actions":{"view_html":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US","download_json":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US.json","view_paper":"https://pith.science/paper/QC3QNDPX","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.09114&json=true","fetch_graph":"https://pith.science/api/pith-number/QC3QNDPXUC2LDSFYSZ7V3YS3US/graph.json","fetch_events":"https://pith.science/api/pith-number/QC3QNDPXUC2LDSFYSZ7V3YS3US/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US/action/timestamp_anchor","attest_storage":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US/action/storage_attestation","attest_author":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US/action/author_attestation","sign_citation":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US/action/citation_signature","submit_replication":"https://pith.science/pith/QC3QNDPXUC2LDSFYSZ7V3YS3US/action/replication_record"}},"created_at":"2026-05-17T23:41:02.130829+00:00","updated_at":"2026-05-17T23:41:02.130829+00:00"}