{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:KQ5MJM46GKY3I36PY7VH5XB3JG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e77772099e10cc2a50e7af5808b01e4985cbe47cac8dc094b3ef45c310159fc3","cross_cats_sorted":["math.OC","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-02T16:00:50Z","title_canon_sha256":"1bd5611897d57ed158d3ab36b34162cadf3006caff5d75ce6e8275bd8ac08d6e"},"schema_version":"1.0","source":{"id":"2510.02149","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2510.02149","created_at":"2026-06-11T01:09:18Z"},{"alias_kind":"arxiv_version","alias_value":"2510.02149v2","created_at":"2026-06-11T01:09:18Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2510.02149","created_at":"2026-06-11T01:09:18Z"},{"alias_kind":"pith_short_12","alias_value":"KQ5MJM46GKY3","created_at":"2026-06-11T01:09:18Z"},{"alias_kind":"pith_short_16","alias_value":"KQ5MJM46GKY3I36P","created_at":"2026-06-11T01:09:18Z"},{"alias_kind":"pith_short_8","alias_value":"KQ5MJM46","created_at":"2026-06-11T01:09:18Z"}],"graph_snapshots":[{"event_id":"sha256:f474e00cf721132fe2a46508627be2ceb6b5b3bb113d10a3d894dd69e528fd77","target":"graph","created_at":"2026-06-11T01:09:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2510.02149/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We introduce Action-Triggered Sporadically Traceable Markov Decision Processes (ATST-MDPs), a reinforcement learning framework for partial observability in which full state observations occur stochastically at each step, with probability determined by the chosen action. We derive Bellman equations tailored to this setting and establish the existence of an optimal policy. Exploiting the fact that sporadic observations reveal the full state, we provide an equivalent formulation in which agents commit to action-sequences between consecutive observations. Under the linear MDP assumption, we show t","authors_text":"Alexander Ryabchenko, Wenlong Mou","cross_cats":["math.OC","stat.ML"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-02T16:00:50Z","title":"Reinforcement Learning with Action-Triggered Observations"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2510.02149","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:db93100cf1727398243be2f1495730086662c8a37395fa0dad6e297277dbc5cf","target":"record","created_at":"2026-06-11T01:09:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e77772099e10cc2a50e7af5808b01e4985cbe47cac8dc094b3ef45c310159fc3","cross_cats_sorted":["math.OC","stat.ML"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-10-02T16:00:50Z","title_canon_sha256":"1bd5611897d57ed158d3ab36b34162cadf3006caff5d75ce6e8275bd8ac08d6e"},"schema_version":"1.0","source":{"id":"2510.02149","kind":"arxiv","version":2}},"canonical_sha256":"543ac4b39e32b1b46fcfc7ea7edc3b49a3308c38c5271009e6ea0754f3d48d62","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"543ac4b39e32b1b46fcfc7ea7edc3b49a3308c38c5271009e6ea0754f3d48d62","first_computed_at":"2026-06-11T01:09:18.349840Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:09:18.349840Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"brpEY53R4sSJxd0yXaCzZHXkVtLhje2uQsrTEl/sxfhJPsmGx77cMSQEotlGxovhfd5opCr4dUjsjlka/SldCw==","signature_status":"signed_v1","signed_at":"2026-06-11T01:09:18.350676Z","signed_message":"canonical_sha256_bytes"},"source_id":"2510.02149","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:db93100cf1727398243be2f1495730086662c8a37395fa0dad6e297277dbc5cf","sha256:f474e00cf721132fe2a46508627be2ceb6b5b3bb113d10a3d894dd69e528fd77"],"state_sha256":"c62c511d7899ab641193a31ea12e0f3fa57ebbebe373276d71d6024355edbec0"}