{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:OHECTYOFPI65FRY7UJ65VKYB5N","short_pith_number":"pith:OHECTYOF","canonical_record":{"source":{"id":"2509.25582","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-29T23:07:32Z","cross_cats_sorted":[],"title_canon_sha256":"67d21d463292c5c9cb7a62094770741f92b5e948e7f6c8ef20b01dfa54f4650b","abstract_canon_sha256":"993fb22a5802f2ee7317c84110b3ef2d70d1f889da1f0e06be8868b79e1c3315"},"schema_version":"1.0"},"canonical_sha256":"71c829e1c57a3dd2c71fa27ddaab01eb63c76a0b52c19759007f40dccf75ceb8","source":{"kind":"arxiv","id":"2509.25582","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.25582","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"arxiv_version","alias_value":"2509.25582v3","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.25582","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_12","alias_value":"OHECTYOFPI65","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_16","alias_value":"OHECTYOFPI65FRY7","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_8","alias_value":"OHECTYOF","created_at":"2026-05-28T02:04:43Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:OHECTYOFPI65FRY7UJ65VKYB5N","target":"record","payload":{"canonical_record":{"source":{"id":"2509.25582","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-29T23:07:32Z","cross_cats_sorted":[],"title_canon_sha256":"67d21d463292c5c9cb7a62094770741f92b5e948e7f6c8ef20b01dfa54f4650b","abstract_canon_sha256":"993fb22a5802f2ee7317c84110b3ef2d70d1f889da1f0e06be8868b79e1c3315"},"schema_version":"1.0"},"canonical_sha256":"71c829e1c57a3dd2c71fa27ddaab01eb63c76a0b52c19759007f40dccf75ceb8","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T02:04:43.104879Z","signature_b64":"LX0XGYJw5wntZYq1sfQJ9Czt+eRGf1L8jaDF0a71ryaRVFs5PnU8RxLhcQQXBc1Ea6CzMECqHIEu91Q0eFnkBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"71c829e1c57a3dd2c71fa27ddaab01eb63c76a0b52c19759007f40dccf75ceb8","last_reissued_at":"2026-05-28T02:04:43.104298Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T02:04:43.104298Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2509.25582","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T02:04:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SFj2vRQ7h8IVKGElnlHMYDSkF/V5VCDt1Q3d1mQcM9gE+vrm84mnGeqMJpeo2RoDqRjaptQeDRLgyNvP45snAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T06:33:51.931621Z"},"content_sha256":"d4ef3831c389fa98527b171f5936e9b49bd0b802e3ffc4b2e7773615e5dfae1c","schema_version":"1.0","event_id":"sha256:d4ef3831c389fa98527b171f5936e9b49bd0b802e3ffc4b2e7773615e5dfae1c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:OHECTYOFPI65FRY7UJ65VKYB5N","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Safe In-Context Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Alper Kamil Bozkurt, Amir Moeini, Lu Feng, Minjae Kwon, Rohan Chandra, Shangtong Zhang, Yuichi Motai","submitted_at":"2025-09-29T23:07:32Z","abstract_excerpt":"In-context reinforcement learning (ICRL) is an emerging RL paradigm where an agent, after pretraining, can adapt to out-of-distribution test tasks without any parameter updates, instead relying on an expanding context of interaction history. While ICRL has shown impressive generalization, safety during this adaptation process remains unexplored, limiting its applicability in real-world deployments where test-time behavior is expected to be safe. In this work, we propose SCARED: Safe Contextual Adaptive Reinforcement via Exact-penalty Dual, the first method that promotes safe adaptation of ICRL"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.25582","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.25582/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-28T02:04:43Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BFJfnhCbgn3d0zicoyyEvKhSQoe9UQnm52qKKq0rMfoIR73UxL6NfWDEnNcajGP+Lnk823DvCOOf6CHjWReiCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T06:33:51.932353Z"},"content_sha256":"ca886b2d89b1ec219873d8b7a7c0465622bad6076dbd45afa6247468284afb27","schema_version":"1.0","event_id":"sha256:ca886b2d89b1ec219873d8b7a7c0465622bad6076dbd45afa6247468284afb27"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OHECTYOFPI65FRY7UJ65VKYB5N/bundle.json","state_url":"https://pith.science/pith/OHECTYOFPI65FRY7UJ65VKYB5N/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OHECTYOFPI65FRY7UJ65VKYB5N/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T06:33:51Z","links":{"resolver":"https://pith.science/pith/OHECTYOFPI65FRY7UJ65VKYB5N","bundle":"https://pith.science/pith/OHECTYOFPI65FRY7UJ65VKYB5N/bundle.json","state":"https://pith.science/pith/OHECTYOFPI65FRY7UJ65VKYB5N/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OHECTYOFPI65FRY7UJ65VKYB5N/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:OHECTYOFPI65FRY7UJ65VKYB5N","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"993fb22a5802f2ee7317c84110b3ef2d70d1f889da1f0e06be8868b79e1c3315","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-29T23:07:32Z","title_canon_sha256":"67d21d463292c5c9cb7a62094770741f92b5e948e7f6c8ef20b01dfa54f4650b"},"schema_version":"1.0","source":{"id":"2509.25582","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.25582","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"arxiv_version","alias_value":"2509.25582v3","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.25582","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_12","alias_value":"OHECTYOFPI65","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_16","alias_value":"OHECTYOFPI65FRY7","created_at":"2026-05-28T02:04:43Z"},{"alias_kind":"pith_short_8","alias_value":"OHECTYOF","created_at":"2026-05-28T02:04:43Z"}],"graph_snapshots":[{"event_id":"sha256:ca886b2d89b1ec219873d8b7a7c0465622bad6076dbd45afa6247468284afb27","target":"graph","created_at":"2026-05-28T02:04:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2509.25582/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"In-context reinforcement learning (ICRL) is an emerging RL paradigm where an agent, after pretraining, can adapt to out-of-distribution test tasks without any parameter updates, instead relying on an expanding context of interaction history. While ICRL has shown impressive generalization, safety during this adaptation process remains unexplored, limiting its applicability in real-world deployments where test-time behavior is expected to be safe. In this work, we propose SCARED: Safe Contextual Adaptive Reinforcement via Exact-penalty Dual, the first method that promotes safe adaptation of ICRL","authors_text":"Alper Kamil Bozkurt, Amir Moeini, Lu Feng, Minjae Kwon, Rohan Chandra, Shangtong Zhang, Yuichi Motai","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-29T23:07:32Z","title":"Safe In-Context Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.25582","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:d4ef3831c389fa98527b171f5936e9b49bd0b802e3ffc4b2e7773615e5dfae1c","target":"record","created_at":"2026-05-28T02:04:43Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"993fb22a5802f2ee7317c84110b3ef2d70d1f889da1f0e06be8868b79e1c3315","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-29T23:07:32Z","title_canon_sha256":"67d21d463292c5c9cb7a62094770741f92b5e948e7f6c8ef20b01dfa54f4650b"},"schema_version":"1.0","source":{"id":"2509.25582","kind":"arxiv","version":3}},"canonical_sha256":"71c829e1c57a3dd2c71fa27ddaab01eb63c76a0b52c19759007f40dccf75ceb8","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"71c829e1c57a3dd2c71fa27ddaab01eb63c76a0b52c19759007f40dccf75ceb8","first_computed_at":"2026-05-28T02:04:43.104298Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T02:04:43.104298Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"LX0XGYJw5wntZYq1sfQJ9Czt+eRGf1L8jaDF0a71ryaRVFs5PnU8RxLhcQQXBc1Ea6CzMECqHIEu91Q0eFnkBg==","signature_status":"signed_v1","signed_at":"2026-05-28T02:04:43.104879Z","signed_message":"canonical_sha256_bytes"},"source_id":"2509.25582","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:d4ef3831c389fa98527b171f5936e9b49bd0b802e3ffc4b2e7773615e5dfae1c","sha256:ca886b2d89b1ec219873d8b7a7c0465622bad6076dbd45afa6247468284afb27"],"state_sha256":"1b49fc7630c989fe16c5a6f49c275536e93028658b03fd73a59c9f819cc5cdf5"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SqhSHB1vQn2KuycGdcPQFAyu9JD1yE6LJ8BWnuEwdwhraWZ6a4Xa6QMK0zcoVBUhfjJHHpyw0bQT585BhU1ICA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T06:33:51.935757Z","bundle_sha256":"4478313c9f1bdf81cde1bd936654eca98641e190d0568377ff69b534b16568db"}}