{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:TPV3T6STYXAPGBSNNTSOJYK5II","short_pith_number":"pith:TPV3T6ST","canonical_record":{"source":{"id":"2602.05139","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T23:49:39Z","cross_cats_sorted":[],"title_canon_sha256":"a180c79f32b277d763919d1273cd2a7c274884e809b5bbe7452f27ec343b7796","abstract_canon_sha256":"8b94704e2482b9827f8cec25ec963d7ef86705d1fc51e8b1705c0d736d4d4fda"},"schema_version":"1.0"},"canonical_sha256":"9bebb9fa53c5c0f3064d6ce4e4e15d421bc8f6253dcd241a9dfe4f15360cd0e4","source":{"kind":"arxiv","id":"2602.05139","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.05139","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"arxiv_version","alias_value":"2602.05139v3","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.05139","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"pith_short_12","alias_value":"TPV3T6STYXAP","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"pith_short_16","alias_value":"TPV3T6STYXAPGBSN","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"pith_short_8","alias_value":"TPV3T6ST","created_at":"2026-06-02T02:04:13Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:TPV3T6STYXAPGBSNNTSOJYK5II","target":"record","payload":{"canonical_record":{"source":{"id":"2602.05139","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T23:49:39Z","cross_cats_sorted":[],"title_canon_sha256":"a180c79f32b277d763919d1273cd2a7c274884e809b5bbe7452f27ec343b7796","abstract_canon_sha256":"8b94704e2482b9827f8cec25ec963d7ef86705d1fc51e8b1705c0d736d4d4fda"},"schema_version":"1.0"},"canonical_sha256":"9bebb9fa53c5c0f3064d6ce4e4e15d421bc8f6253dcd241a9dfe4f15360cd0e4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T02:04:13.988471Z","signature_b64":"1aGAFfDg1VKkYEhNMVcibZzYRjB6vGoV7vPc9G6/+MCo2Evuw6uERHeAso75RwGxGmkU3L/YU8ijnLsxELt5Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9bebb9fa53c5c0f3064d6ce4e4e15d421bc8f6253dcd241a9dfe4f15360cd0e4","last_reissued_at":"2026-06-02T02:04:13.987953Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T02:04:13.987953Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.05139","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T02:04:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iPotDCivtaYrcX5XqmjEbec564CG2Gs2KTfB6+9QrB0l8r8tafqECPFDvfQd1nxEIg9hJe/KNRBZtmEXbeZzAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T01:37:37.584234Z"},"content_sha256":"474bd82d4aca15eebd7afef9deddcd97177d06fba5d6862dc0bf868f60a309e2","schema_version":"1.0","event_id":"sha256:474bd82d4aca15eebd7afef9deddcd97177d06fba5d6862dc0bf868f60a309e2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:TPV3T6STYXAPGBSNNTSOJYK5II","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Adaptive Exploration for Latent-State Bandits","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Baoyi Shi, Congshan Zhang, Jikai Jin, Kenneth Hung, Sanath Kumar Krishnamurthy","submitted_at":"2026-02-04T23:49:39Z","abstract_excerpt":"We study bandits whose rewards depend on an unobserved Markov state that evolves independently of the learner's actions. The optimal arm can change even though the learner observes only past actions and rewards. We propose algorithms that feed LinUCB with two summaries of the hidden state: a lagged action-reward pair and, when available, a probe fingerprint formed from rewards of multiple arms. The adaptive variants refresh the fingerprint using residual, margin, and staleness tests. In synthetic stress tests over state count, transition rate, noise, and horizon, these methods reduce dynamic r"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.05139","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.05139/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-02T02:04:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kveSEFgHgwaHtwH+vNgCAEJbobNW0VPO8lpYLmmuesNAzGPEXwCeZyk367S+7WBUHkl5sy5tV8ob6WpBVg5nAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T01:37:37.584709Z"},"content_sha256":"61810eb50c927d2146e7ca7ebdf385b60bfbecee3257c85a8c7ab21462136491","schema_version":"1.0","event_id":"sha256:61810eb50c927d2146e7ca7ebdf385b60bfbecee3257c85a8c7ab21462136491"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/TPV3T6STYXAPGBSNNTSOJYK5II/bundle.json","state_url":"https://pith.science/pith/TPV3T6STYXAPGBSNNTSOJYK5II/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/TPV3T6STYXAPGBSNNTSOJYK5II/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T01:37:37Z","links":{"resolver":"https://pith.science/pith/TPV3T6STYXAPGBSNNTSOJYK5II","bundle":"https://pith.science/pith/TPV3T6STYXAPGBSNNTSOJYK5II/bundle.json","state":"https://pith.science/pith/TPV3T6STYXAPGBSNNTSOJYK5II/state.json","well_known_bundle":"https://pith.science/.well-known/pith/TPV3T6STYXAPGBSNNTSOJYK5II/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:TPV3T6STYXAPGBSNNTSOJYK5II","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8b94704e2482b9827f8cec25ec963d7ef86705d1fc51e8b1705c0d736d4d4fda","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T23:49:39Z","title_canon_sha256":"a180c79f32b277d763919d1273cd2a7c274884e809b5bbe7452f27ec343b7796"},"schema_version":"1.0","source":{"id":"2602.05139","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.05139","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"arxiv_version","alias_value":"2602.05139v3","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.05139","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"pith_short_12","alias_value":"TPV3T6STYXAP","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"pith_short_16","alias_value":"TPV3T6STYXAPGBSN","created_at":"2026-06-02T02:04:13Z"},{"alias_kind":"pith_short_8","alias_value":"TPV3T6ST","created_at":"2026-06-02T02:04:13Z"}],"graph_snapshots":[{"event_id":"sha256:61810eb50c927d2146e7ca7ebdf385b60bfbecee3257c85a8c7ab21462136491","target":"graph","created_at":"2026-06-02T02:04:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2602.05139/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We study bandits whose rewards depend on an unobserved Markov state that evolves independently of the learner's actions. The optimal arm can change even though the learner observes only past actions and rewards. We propose algorithms that feed LinUCB with two summaries of the hidden state: a lagged action-reward pair and, when available, a probe fingerprint formed from rewards of multiple arms. The adaptive variants refresh the fingerprint using residual, margin, and staleness tests. In synthetic stress tests over state count, transition rate, noise, and horizon, these methods reduce dynamic r","authors_text":"Baoyi Shi, Congshan Zhang, Jikai Jin, Kenneth Hung, Sanath Kumar Krishnamurthy","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T23:49:39Z","title":"Adaptive Exploration for Latent-State Bandits"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.05139","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:474bd82d4aca15eebd7afef9deddcd97177d06fba5d6862dc0bf868f60a309e2","target":"record","created_at":"2026-06-02T02:04:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8b94704e2482b9827f8cec25ec963d7ef86705d1fc51e8b1705c0d736d4d4fda","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T23:49:39Z","title_canon_sha256":"a180c79f32b277d763919d1273cd2a7c274884e809b5bbe7452f27ec343b7796"},"schema_version":"1.0","source":{"id":"2602.05139","kind":"arxiv","version":3}},"canonical_sha256":"9bebb9fa53c5c0f3064d6ce4e4e15d421bc8f6253dcd241a9dfe4f15360cd0e4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"9bebb9fa53c5c0f3064d6ce4e4e15d421bc8f6253dcd241a9dfe4f15360cd0e4","first_computed_at":"2026-06-02T02:04:13.987953Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-02T02:04:13.987953Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1aGAFfDg1VKkYEhNMVcibZzYRjB6vGoV7vPc9G6/+MCo2Evuw6uERHeAso75RwGxGmkU3L/YU8ijnLsxELt5Cg==","signature_status":"signed_v1","signed_at":"2026-06-02T02:04:13.988471Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.05139","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:474bd82d4aca15eebd7afef9deddcd97177d06fba5d6862dc0bf868f60a309e2","sha256:61810eb50c927d2146e7ca7ebdf385b60bfbecee3257c85a8c7ab21462136491"],"state_sha256":"fa77cb5b2869bb816954c23191be992b94f52bf91a7ab2718c6531fbdab66dda"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"AXa2c+IW3gSXBPm+VY8culpkoq7qJ0ljOXuAGZFkRJw1OZYFXwf9HZUD2G9iFt8noqFTlkw9Nv1eGvgxsLwDAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T01:37:37.588223Z","bundle_sha256":"67aae8a4a797be4228756e3a322c7bb03b2507d3b72d9ad59d34c99e51fbe6d2"}}