{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:6RBGQEMF3N4UWAO72VYINTQ2Q6","short_pith_number":"pith:6RBGQEMF","canonical_record":{"source":{"id":"2605.20854","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T07:44:43Z","cross_cats_sorted":[],"title_canon_sha256":"e3859cc0f5fcfe5bad5a30b0426431ad311f09809d44348f5a24bd3bc109d2f8","abstract_canon_sha256":"d5cd093a410717fd5159249a95754e3f2d59cffe0431ca92f2a0cf9c5c27d5d1"},"schema_version":"1.0"},"canonical_sha256":"f442681185db794b01dfd57086ce1a87826dd5d99e67370b3c3fac639a02b727","source":{"kind":"arxiv","id":"2605.20854","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20854","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20854v1","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20854","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"pith_short_12","alias_value":"6RBGQEMF3N4U","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"pith_short_16","alias_value":"6RBGQEMF3N4UWAO7","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"pith_short_8","alias_value":"6RBGQEMF","created_at":"2026-05-21T01:05:24Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:6RBGQEMF3N4UWAO72VYINTQ2Q6","target":"record","payload":{"canonical_record":{"source":{"id":"2605.20854","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T07:44:43Z","cross_cats_sorted":[],"title_canon_sha256":"e3859cc0f5fcfe5bad5a30b0426431ad311f09809d44348f5a24bd3bc109d2f8","abstract_canon_sha256":"d5cd093a410717fd5159249a95754e3f2d59cffe0431ca92f2a0cf9c5c27d5d1"},"schema_version":"1.0"},"canonical_sha256":"f442681185db794b01dfd57086ce1a87826dd5d99e67370b3c3fac639a02b727","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-21T01:05:24.588142Z","signature_b64":"2SjqVH2AyQGenJoB54/6oDn0wL3penjePr9MmM+MoFg+cOQ9WlbvpXDbVWxRol4uNz/fOdQot6nisOZeNFq5AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f442681185db794b01dfd57086ce1a87826dd5d99e67370b3c3fac639a02b727","last_reissued_at":"2026-05-21T01:05:24.587624Z","signature_status":"signed_v1","first_computed_at":"2026-05-21T01:05:24.587624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.20854","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AgTlBz75H2LvOFRbz/uoW6CT7ml8F+EJPPmqZNJS/HSuWBaF1sYbv8oxOOWfBQX44VZbRMAE2nDv9QFwXFYGCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T04:50:00.896598Z"},"content_sha256":"eb2473c131cc6dad911bcd207d5d8c5d02c00c5adc1897846b38b49768e7d918","schema_version":"1.0","event_id":"sha256:eb2473c131cc6dad911bcd207d5d8c5d02c00c5adc1897846b38b49768e7d918"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:6RBGQEMF3N4UWAO72VYINTQ2Q6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Finite-Time Regret Analysis of Retry-Aware Bandits","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Bingkui Tong, Junpei Komiyama, Paavo Parmas, Soichiro Nishimori","submitted_at":"2026-05-20T07:44:43Z","abstract_excerpt":"We study a stochastic bandit algorithm motivated by retry-aware objectives that value the best outcome among multiple attempts, such as pass@$k$ and max@$k$. Given a posterior over arm values, ReMax chooses a sampling distribution that maximizes the posterior expected maximum reward over $M$ virtual draws. Although this objective was introduced in reinforcement learning as an exploration mechanism under uncertainty, its regret properties in bandit problems have remained unclear. For Gaussian rewards and the first nontrivial case $M=2$, we characterize the optimal ReMax distribution through an "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20854","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.20854/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-21T01:05:24Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"XXEgTWVKIwh+mjOIjTOUDdFAiMeaBBrCyh4ZPNNvfeFLL4UEdzTYYiLf61xuvTd1jm9qSHd5PU41m0XVcRRDDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T04:50:00.897091Z"},"content_sha256":"656b1bbc7515f0f5bf507db4c59fa36c847cc06ed3a934987ccfed2a973dded5","schema_version":"1.0","event_id":"sha256:656b1bbc7515f0f5bf507db4c59fa36c847cc06ed3a934987ccfed2a973dded5"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6/bundle.json","state_url":"https://pith.science/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T04:50:00Z","links":{"resolver":"https://pith.science/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6","bundle":"https://pith.science/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6/bundle.json","state":"https://pith.science/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/6RBGQEMF3N4UWAO72VYINTQ2Q6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:6RBGQEMF3N4UWAO72VYINTQ2Q6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"d5cd093a410717fd5159249a95754e3f2d59cffe0431ca92f2a0cf9c5c27d5d1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T07:44:43Z","title_canon_sha256":"e3859cc0f5fcfe5bad5a30b0426431ad311f09809d44348f5a24bd3bc109d2f8"},"schema_version":"1.0","source":{"id":"2605.20854","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.20854","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"arxiv_version","alias_value":"2605.20854v1","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.20854","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"pith_short_12","alias_value":"6RBGQEMF3N4U","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"pith_short_16","alias_value":"6RBGQEMF3N4UWAO7","created_at":"2026-05-21T01:05:24Z"},{"alias_kind":"pith_short_8","alias_value":"6RBGQEMF","created_at":"2026-05-21T01:05:24Z"}],"graph_snapshots":[{"event_id":"sha256:656b1bbc7515f0f5bf507db4c59fa36c847cc06ed3a934987ccfed2a973dded5","target":"graph","created_at":"2026-05-21T01:05:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.20854/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study a stochastic bandit algorithm motivated by retry-aware objectives that value the best outcome among multiple attempts, such as pass@$k$ and max@$k$. Given a posterior over arm values, ReMax chooses a sampling distribution that maximizes the posterior expected maximum reward over $M$ virtual draws. Although this objective was introduced in reinforcement learning as an exploration mechanism under uncertainty, its regret properties in bandit problems have remained unclear. For Gaussian rewards and the first nontrivial case $M=2$, we characterize the optimal ReMax distribution through an ","authors_text":"Bingkui Tong, Junpei Komiyama, Paavo Parmas, Soichiro Nishimori","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T07:44:43Z","title":"Finite-Time Regret Analysis of Retry-Aware Bandits"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.20854","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:eb2473c131cc6dad911bcd207d5d8c5d02c00c5adc1897846b38b49768e7d918","target":"record","created_at":"2026-05-21T01:05:24Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"d5cd093a410717fd5159249a95754e3f2d59cffe0431ca92f2a0cf9c5c27d5d1","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-20T07:44:43Z","title_canon_sha256":"e3859cc0f5fcfe5bad5a30b0426431ad311f09809d44348f5a24bd3bc109d2f8"},"schema_version":"1.0","source":{"id":"2605.20854","kind":"arxiv","version":1}},"canonical_sha256":"f442681185db794b01dfd57086ce1a87826dd5d99e67370b3c3fac639a02b727","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f442681185db794b01dfd57086ce1a87826dd5d99e67370b3c3fac639a02b727","first_computed_at":"2026-05-21T01:05:24.587624Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-21T01:05:24.587624Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2SjqVH2AyQGenJoB54/6oDn0wL3penjePr9MmM+MoFg+cOQ9WlbvpXDbVWxRol4uNz/fOdQot6nisOZeNFq5AA==","signature_status":"signed_v1","signed_at":"2026-05-21T01:05:24.588142Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.20854","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:eb2473c131cc6dad911bcd207d5d8c5d02c00c5adc1897846b38b49768e7d918","sha256:656b1bbc7515f0f5bf507db4c59fa36c847cc06ed3a934987ccfed2a973dded5"],"state_sha256":"5235f05e342fd1ad4eec35c846a674890725aabd739f9e0591539ff4c94dec25"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"959WoJvQ7S71kiK9SY2laVw4XIOnDX0aIqu3SGilRfLTqDK4sceINhVuLtgHCryo6Onx6crp23hc9TZUSVXKCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T04:50:00.900479Z","bundle_sha256":"359b988e8b442fdb981b67f18437f3dd5d86bf1e3874ccf1572ad99ccb8f53a7"}}