{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ECKEJ2PIHRIQ5QOYUAT2LKXJVP","short_pith_number":"pith:ECKEJ2PI","canonical_record":{"source":{"id":"2605.24740","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:29:15Z","cross_cats_sorted":["cs.GT"],"title_canon_sha256":"977fc835d4b73d327a04ee2aa6e7d320702862433b8fe2d1f5829d739f39757e","abstract_canon_sha256":"a150da83cdb8edf54f9f4c49603c94357891bdc7214e4c5a389c7f0fd4faa021"},"schema_version":"1.0"},"canonical_sha256":"209444e9e83c510ec1d8a027a5aae9abf80ddc717231d4d15a660ba45036b76f","source":{"kind":"arxiv","id":"2605.24740","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.24740","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.24740v1","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24740","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_12","alias_value":"ECKEJ2PIHRIQ","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_16","alias_value":"ECKEJ2PIHRIQ5QOY","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_8","alias_value":"ECKEJ2PI","created_at":"2026-05-26T01:03:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ECKEJ2PIHRIQ5QOYUAT2LKXJVP","target":"record","payload":{"canonical_record":{"source":{"id":"2605.24740","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:29:15Z","cross_cats_sorted":["cs.GT"],"title_canon_sha256":"977fc835d4b73d327a04ee2aa6e7d320702862433b8fe2d1f5829d739f39757e","abstract_canon_sha256":"a150da83cdb8edf54f9f4c49603c94357891bdc7214e4c5a389c7f0fd4faa021"},"schema_version":"1.0"},"canonical_sha256":"209444e9e83c510ec1d8a027a5aae9abf80ddc717231d4d15a660ba45036b76f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:55.842401Z","signature_b64":"KmD7lX0u1ytYpkS2KA4uMu5tRXTtgXbP12kK8KfMSK7hY2pkRxIyqsKtTjsttbFSQQsZJIGSP4DfWcL5/4RSCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"209444e9e83c510ec1d8a027a5aae9abf80ddc717231d4d15a660ba45036b76f","last_reissued_at":"2026-05-26T01:03:55.841632Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:55.841632Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.24740","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"805913rZl041CYG2JLKOwiDsMI2EAFrMPF5TgRGy8ia5/DrnV0p+RIHUSsOV7cEb1FERvIbwfGkZhIOsB3cSAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T10:22:18.888913Z"},"content_sha256":"3d528d20b4ef6babdce3828f70302cef724238c620034c29429ebbdd41de0b24","schema_version":"1.0","event_id":"sha256:3d528d20b4ef6babdce3828f70302cef724238c620034c29429ebbdd41de0b24"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ECKEJ2PIHRIQ5QOYUAT2LKXJVP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning for Reachability: Guaranteeing Asymptotic Optimality","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.GT"],"primary_cat":"cs.LG","authors_text":"Amogh Palasamudram, Jakub Svoboda, Krishnendu Chatterjee, Suguman Bansal","submitted_at":"2026-05-23T21:29:15Z","abstract_excerpt":"Reinforcement learning (RL) for reachability specifications is fundamental in sequential decision-making, yet theoretical guarantees remain less explored. A recent work achieves asymptotic convergence to optimal policies. However, this approach provides limited insight into convergence dynamics. In this work, we present an alternative approach that provides deeper theoretical insights into convergence. Our approach builds on PAC learning with assumptions. PAC learning guarantees near-optimal policies with high confidence in finite time but requires knowing internal MDP parameters like minimum "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24740","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.24740/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/KtNikLgCYfaAzX8+3zo+dmKOZwqAVgZyOSvbeV4ipi/JQ8BxHr5hrH5CWgeK82zD51elbrBmR49r1+OmfnODw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T10:22:18.889295Z"},"content_sha256":"c90b2a53b8fd72de7a4aa15ee854836957685a65b0a19500c02eb8f551205758","schema_version":"1.0","event_id":"sha256:c90b2a53b8fd72de7a4aa15ee854836957685a65b0a19500c02eb8f551205758"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP/bundle.json","state_url":"https://pith.science/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T10:22:18Z","links":{"resolver":"https://pith.science/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP","bundle":"https://pith.science/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP/bundle.json","state":"https://pith.science/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ECKEJ2PIHRIQ5QOYUAT2LKXJVP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ECKEJ2PIHRIQ5QOYUAT2LKXJVP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a150da83cdb8edf54f9f4c49603c94357891bdc7214e4c5a389c7f0fd4faa021","cross_cats_sorted":["cs.GT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:29:15Z","title_canon_sha256":"977fc835d4b73d327a04ee2aa6e7d320702862433b8fe2d1f5829d739f39757e"},"schema_version":"1.0","source":{"id":"2605.24740","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.24740","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.24740v1","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24740","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_12","alias_value":"ECKEJ2PIHRIQ","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_16","alias_value":"ECKEJ2PIHRIQ5QOY","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_8","alias_value":"ECKEJ2PI","created_at":"2026-05-26T01:03:55Z"}],"graph_snapshots":[{"event_id":"sha256:c90b2a53b8fd72de7a4aa15ee854836957685a65b0a19500c02eb8f551205758","target":"graph","created_at":"2026-05-26T01:03:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.24740/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning (RL) for reachability specifications is fundamental in sequential decision-making, yet theoretical guarantees remain less explored. A recent work achieves asymptotic convergence to optimal policies. However, this approach provides limited insight into convergence dynamics. In this work, we present an alternative approach that provides deeper theoretical insights into convergence. Our approach builds on PAC learning with assumptions. PAC learning guarantees near-optimal policies with high confidence in finite time but requires knowing internal MDP parameters like minimum ","authors_text":"Amogh Palasamudram, Jakub Svoboda, Krishnendu Chatterjee, Suguman Bansal","cross_cats":["cs.GT"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:29:15Z","title":"Reinforcement Learning for Reachability: Guaranteeing Asymptotic Optimality"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24740","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3d528d20b4ef6babdce3828f70302cef724238c620034c29429ebbdd41de0b24","target":"record","created_at":"2026-05-26T01:03:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a150da83cdb8edf54f9f4c49603c94357891bdc7214e4c5a389c7f0fd4faa021","cross_cats_sorted":["cs.GT"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:29:15Z","title_canon_sha256":"977fc835d4b73d327a04ee2aa6e7d320702862433b8fe2d1f5829d739f39757e"},"schema_version":"1.0","source":{"id":"2605.24740","kind":"arxiv","version":1}},"canonical_sha256":"209444e9e83c510ec1d8a027a5aae9abf80ddc717231d4d15a660ba45036b76f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"209444e9e83c510ec1d8a027a5aae9abf80ddc717231d4d15a660ba45036b76f","first_computed_at":"2026-05-26T01:03:55.841632Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:55.841632Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KmD7lX0u1ytYpkS2KA4uMu5tRXTtgXbP12kK8KfMSK7hY2pkRxIyqsKtTjsttbFSQQsZJIGSP4DfWcL5/4RSCQ==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:55.842401Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.24740","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3d528d20b4ef6babdce3828f70302cef724238c620034c29429ebbdd41de0b24","sha256:c90b2a53b8fd72de7a4aa15ee854836957685a65b0a19500c02eb8f551205758"],"state_sha256":"80747b845f2f55df8a4f7078bd22dcfd9c948be6ee62f8137e5f0e7092931fd5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"xcN0Kdyy9pFhbpBnl7US049kpR8znBN23Cwf+TzRiGWHDwF9fhiwEw1wdNEkYNrduK/4qRiBApzki7RWv74oAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T10:22:18.891364Z","bundle_sha256":"365bf184ee3f3a190352f29047900554d40bd6ddc48311d6de7e236127cb290e"}}