{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:AQ652APB4V6JVBSCNNO4E7733X","short_pith_number":"pith:AQ652APB","canonical_record":{"source":{"id":"2502.17666","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-24T21:29:06Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a296c75510dde7aa45197634878e7d4dfe29f2063b72e96c359718652fdcca92","abstract_canon_sha256":"908786bb2ff3f93f1249e0fd206327d5182a0263f618c7637322f5fda01076c0"},"schema_version":"1.0"},"canonical_sha256":"043ddd01e1e57c9a86426b5dc27ffbddee0e8a2025a78b8ae8dc51945c73816f","source":{"kind":"arxiv","id":"2502.17666","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.17666","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"arxiv_version","alias_value":"2502.17666v4","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.17666","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"pith_short_12","alias_value":"AQ652APB4V6J","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"pith_short_16","alias_value":"AQ652APB4V6JVBSC","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"pith_short_8","alias_value":"AQ652APB","created_at":"2026-05-27T01:04:47Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:AQ652APB4V6JVBSCNNO4E7733X","target":"record","payload":{"canonical_record":{"source":{"id":"2502.17666","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-24T21:29:06Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a296c75510dde7aa45197634878e7d4dfe29f2063b72e96c359718652fdcca92","abstract_canon_sha256":"908786bb2ff3f93f1249e0fd206327d5182a0263f618c7637322f5fda01076c0"},"schema_version":"1.0"},"canonical_sha256":"043ddd01e1e57c9a86426b5dc27ffbddee0e8a2025a78b8ae8dc51945c73816f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:04:47.981001Z","signature_b64":"AHwbsCEcpsMg90Ig16FBdm8ScHr4WnPgqMwzoLHkCkrQ9671mCyuMkBBxKAG/1HoUms0dESFzL7sf8SfaIupAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"043ddd01e1e57c9a86426b5dc27ffbddee0e8a2025a78b8ae8dc51945c73816f","last_reissued_at":"2026-05-27T01:04:47.980500Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:04:47.980500Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2502.17666","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:04:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"afSA0TlEF3m3JounlWjW4FQ/HP0l3ouzjb0j3CmjexKSF1S6skvGRcUgSsWpTcHRUsZJyewR0Niqq3fqTQC8Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T12:31:17.621624Z"},"content_sha256":"a6706a191ff69ea6f908d29322abc03952b2a11400935fe445a6acea639adde4","schema_version":"1.0","event_id":"sha256:a6706a191ff69ea6f908d29322abc03952b2a11400935fe445a6acea639adde4"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:AQ652APB4V6JVBSCNNO4E7733X","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Yes, Q-learning Helps Offline In-Context RL","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Albina Klepach, Alexander Derevyagin, Alexander Nikulin, Andrei Polubarov, Denis Tarasov, Igor Kiselev, Ilya Zisman, Nikita Lyubaykin, Vladislav Kurenkov","submitted_at":"2025-02-24T21:29:06Z","abstract_excerpt":"Existing offline in-context reinforcement learning (ICRL) methods have predominantly relied on supervised training objectives, which are known to have limitations in offline RL settings. In this study, we explore the integration of RL objectives within an offline ICRL framework. Through experiments on more than 150 GridWorld and MuJoCo environment-derived datasets, we demonstrate that optimizing RL objectives directly improves performance by approximately 30% on average compared to widely adopted Algorithm Distillation (AD), across various dataset coverages, structures, expertise levels, and e"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.17666","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2502.17666/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:04:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"fpxUCVAVkdxTcJEycIjCudpdQkjyNGlr3+eHb6jtFPvVn5YFEjyagm8jLZX0PV7rCT1rstnEdYHmZUP4nB8oDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T12:31:17.622007Z"},"content_sha256":"732dd20f3fe6473d95e4e585aa09e6819fe098c2043ef412265740f4744b17e2","schema_version":"1.0","event_id":"sha256:732dd20f3fe6473d95e4e585aa09e6819fe098c2043ef412265740f4744b17e2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/AQ652APB4V6JVBSCNNO4E7733X/bundle.json","state_url":"https://pith.science/pith/AQ652APB4V6JVBSCNNO4E7733X/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/AQ652APB4V6JVBSCNNO4E7733X/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T12:31:17Z","links":{"resolver":"https://pith.science/pith/AQ652APB4V6JVBSCNNO4E7733X","bundle":"https://pith.science/pith/AQ652APB4V6JVBSCNNO4E7733X/bundle.json","state":"https://pith.science/pith/AQ652APB4V6JVBSCNNO4E7733X/state.json","well_known_bundle":"https://pith.science/.well-known/pith/AQ652APB4V6JVBSCNNO4E7733X/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:AQ652APB4V6JVBSCNNO4E7733X","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"908786bb2ff3f93f1249e0fd206327d5182a0263f618c7637322f5fda01076c0","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-24T21:29:06Z","title_canon_sha256":"a296c75510dde7aa45197634878e7d4dfe29f2063b72e96c359718652fdcca92"},"schema_version":"1.0","source":{"id":"2502.17666","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.17666","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"arxiv_version","alias_value":"2502.17666v4","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.17666","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"pith_short_12","alias_value":"AQ652APB4V6J","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"pith_short_16","alias_value":"AQ652APB4V6JVBSC","created_at":"2026-05-27T01:04:47Z"},{"alias_kind":"pith_short_8","alias_value":"AQ652APB","created_at":"2026-05-27T01:04:47Z"}],"graph_snapshots":[{"event_id":"sha256:732dd20f3fe6473d95e4e585aa09e6819fe098c2043ef412265740f4744b17e2","target":"graph","created_at":"2026-05-27T01:04:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2502.17666/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Existing offline in-context reinforcement learning (ICRL) methods have predominantly relied on supervised training objectives, which are known to have limitations in offline RL settings. In this study, we explore the integration of RL objectives within an offline ICRL framework. Through experiments on more than 150 GridWorld and MuJoCo environment-derived datasets, we demonstrate that optimizing RL objectives directly improves performance by approximately 30% on average compared to widely adopted Algorithm Distillation (AD), across various dataset coverages, structures, expertise levels, and e","authors_text":"Albina Klepach, Alexander Derevyagin, Alexander Nikulin, Andrei Polubarov, Denis Tarasov, Igor Kiselev, Ilya Zisman, Nikita Lyubaykin, Vladislav Kurenkov","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-24T21:29:06Z","title":"Yes, Q-learning Helps Offline In-Context RL"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.17666","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a6706a191ff69ea6f908d29322abc03952b2a11400935fe445a6acea639adde4","target":"record","created_at":"2026-05-27T01:04:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"908786bb2ff3f93f1249e0fd206327d5182a0263f618c7637322f5fda01076c0","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-24T21:29:06Z","title_canon_sha256":"a296c75510dde7aa45197634878e7d4dfe29f2063b72e96c359718652fdcca92"},"schema_version":"1.0","source":{"id":"2502.17666","kind":"arxiv","version":4}},"canonical_sha256":"043ddd01e1e57c9a86426b5dc27ffbddee0e8a2025a78b8ae8dc51945c73816f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"043ddd01e1e57c9a86426b5dc27ffbddee0e8a2025a78b8ae8dc51945c73816f","first_computed_at":"2026-05-27T01:04:47.980500Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:04:47.980500Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"AHwbsCEcpsMg90Ig16FBdm8ScHr4WnPgqMwzoLHkCkrQ9671mCyuMkBBxKAG/1HoUms0dESFzL7sf8SfaIupAw==","signature_status":"signed_v1","signed_at":"2026-05-27T01:04:47.981001Z","signed_message":"canonical_sha256_bytes"},"source_id":"2502.17666","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a6706a191ff69ea6f908d29322abc03952b2a11400935fe445a6acea639adde4","sha256:732dd20f3fe6473d95e4e585aa09e6819fe098c2043ef412265740f4744b17e2"],"state_sha256":"39768c9f7ab41102f94b5d207448b7115849661177d7d850d9d1ba1eef694e47"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"25TQNlfP8EpmAl36BdTY1rWKAYCiztpz/3zI+DsY0wDQb6WB4PpL/UE0f4p8XLAdpnOYB0mxyaOwek7VLDTfBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T12:31:17.624236Z","bundle_sha256":"89277da96541a6016daa0a3989c4c678f339950d084ae8b5dff8dbcca90bfda0"}}