{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:4UOT2D5WDPTVLZ4UUOCQ2QC72H","short_pith_number":"pith:4UOT2D5W","canonical_record":{"source":{"id":"2605.23565","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-22T12:31:18Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"b70436077de6a93bede760d05a9bef9f8cf9287dd7ce7506cedbae18b0da844e","abstract_canon_sha256":"73eb856b0fd92370714c9009271eeb02e849c91dcaa80c04a68a7395d225d4ab"},"schema_version":"1.0"},"canonical_sha256":"e51d3d0fb61be755e794a3850d405fd1f34ebeda460a0b56034ab20ab36e1532","source":{"kind":"arxiv","id":"2605.23565","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23565","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23565v1","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23565","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"pith_short_12","alias_value":"4UOT2D5WDPTV","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"pith_short_16","alias_value":"4UOT2D5WDPTVLZ4U","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"pith_short_8","alias_value":"4UOT2D5W","created_at":"2026-05-25T02:02:19Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:4UOT2D5WDPTVLZ4UUOCQ2QC72H","target":"record","payload":{"canonical_record":{"source":{"id":"2605.23565","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-22T12:31:18Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"b70436077de6a93bede760d05a9bef9f8cf9287dd7ce7506cedbae18b0da844e","abstract_canon_sha256":"73eb856b0fd92370714c9009271eeb02e849c91dcaa80c04a68a7395d225d4ab"},"schema_version":"1.0"},"canonical_sha256":"e51d3d0fb61be755e794a3850d405fd1f34ebeda460a0b56034ab20ab36e1532","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:02:19.721438Z","signature_b64":"/b4Yx/Gxe290jRxXT3i+B3qGKrA2flycudail16uWD/7X4wzX/IkofIHPMufkW/aOplbchm3HhPb7b2saiSMAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e51d3d0fb61be755e794a3850d405fd1f34ebeda460a0b56034ab20ab36e1532","last_reissued_at":"2026-05-25T02:02:19.720618Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:02:19.720618Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.23565","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:02:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"jcYXfTTGQc7ua8VLRwQkmvCUG49ob8qrUaB5PUKondLvdP3R6I7keECnjfERpjlf405mvD8jCRFl+eztC8aPAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T17:23:27.590853Z"},"content_sha256":"5ecd00e6d15fce6e765d51c4abaa3938c03f394300e328c0a197f010cd5513e1","schema_version":"1.0","event_id":"sha256:5ecd00e6d15fce6e765d51c4abaa3938c03f394300e328c0a197f010cd5513e1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:4UOT2D5WDPTVLZ4UUOCQ2QC72H","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Understanding Goal Generalisation in Sequential Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Edward James Young, Jason Ross Brown","submitted_at":"2026-05-22T12:31:18Z","abstract_excerpt":"Reinforcement learning agents often exhibit unintended goal-directed behaviour outside their training distribution, but we currently lack a principled understanding of how such agents will generalise to novel environments based on their training history. We address this gap for agents trained sequentially on one or more tasks. We study over 100 sequential training pipelines, evaluating behaviour across over 250 out-of-distribution environments. We find that salient features drive generalisation, and that goals learnt early in training can persist and influence those acquired later. To explain "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23565","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.23565/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:02:19Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"W/FUgU0u6zMjumFsjSBMwnPnXfRWK+7akWnNvU3bsL4IUN+EKpC7+dRIT6VfPNzrJ3DldF/7VCIJFDl1s+8QBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T17:23:27.591555Z"},"content_sha256":"5e28c0761d8b2b02482fa0eee60afa3b036488cd1e2793d26df515b01ba01372","schema_version":"1.0","event_id":"sha256:5e28c0761d8b2b02482fa0eee60afa3b036488cd1e2793d26df515b01ba01372"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H/bundle.json","state_url":"https://pith.science/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T17:23:27Z","links":{"resolver":"https://pith.science/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H","bundle":"https://pith.science/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H/bundle.json","state":"https://pith.science/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4UOT2D5WDPTVLZ4UUOCQ2QC72H/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:4UOT2D5WDPTVLZ4UUOCQ2QC72H","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"73eb856b0fd92370714c9009271eeb02e849c91dcaa80c04a68a7395d225d4ab","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-22T12:31:18Z","title_canon_sha256":"b70436077de6a93bede760d05a9bef9f8cf9287dd7ce7506cedbae18b0da844e"},"schema_version":"1.0","source":{"id":"2605.23565","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.23565","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"arxiv_version","alias_value":"2605.23565v1","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.23565","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"pith_short_12","alias_value":"4UOT2D5WDPTV","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"pith_short_16","alias_value":"4UOT2D5WDPTVLZ4U","created_at":"2026-05-25T02:02:19Z"},{"alias_kind":"pith_short_8","alias_value":"4UOT2D5W","created_at":"2026-05-25T02:02:19Z"}],"graph_snapshots":[{"event_id":"sha256:5e28c0761d8b2b02482fa0eee60afa3b036488cd1e2793d26df515b01ba01372","target":"graph","created_at":"2026-05-25T02:02:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.23565/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Reinforcement learning agents often exhibit unintended goal-directed behaviour outside their training distribution, but we currently lack a principled understanding of how such agents will generalise to novel environments based on their training history. We address this gap for agents trained sequentially on one or more tasks. We study over 100 sequential training pipelines, evaluating behaviour across over 250 out-of-distribution environments. We find that salient features drive generalisation, and that goals learnt early in training can persist and influence those acquired later. To explain ","authors_text":"Edward James Young, Jason Ross Brown","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-22T12:31:18Z","title":"Understanding Goal Generalisation in Sequential Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.23565","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5ecd00e6d15fce6e765d51c4abaa3938c03f394300e328c0a197f010cd5513e1","target":"record","created_at":"2026-05-25T02:02:19Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"73eb856b0fd92370714c9009271eeb02e849c91dcaa80c04a68a7395d225d4ab","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-22T12:31:18Z","title_canon_sha256":"b70436077de6a93bede760d05a9bef9f8cf9287dd7ce7506cedbae18b0da844e"},"schema_version":"1.0","source":{"id":"2605.23565","kind":"arxiv","version":1}},"canonical_sha256":"e51d3d0fb61be755e794a3850d405fd1f34ebeda460a0b56034ab20ab36e1532","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e51d3d0fb61be755e794a3850d405fd1f34ebeda460a0b56034ab20ab36e1532","first_computed_at":"2026-05-25T02:02:19.720618Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:02:19.720618Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"/b4Yx/Gxe290jRxXT3i+B3qGKrA2flycudail16uWD/7X4wzX/IkofIHPMufkW/aOplbchm3HhPb7b2saiSMAQ==","signature_status":"signed_v1","signed_at":"2026-05-25T02:02:19.721438Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.23565","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5ecd00e6d15fce6e765d51c4abaa3938c03f394300e328c0a197f010cd5513e1","sha256:5e28c0761d8b2b02482fa0eee60afa3b036488cd1e2793d26df515b01ba01372"],"state_sha256":"b4f8fea295c9274e0cf38e88aa3f75a6ff35cfe6701138a63d097ed0614b7443"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"D73uiimi6p/C21qlOWILMUMfVMnbzci3m8ZDpkJMXEL7NLv29L/zzzCDeTYN7Im30YlkZ1OU84C/WlTsAtD4DA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T17:23:27.595155Z","bundle_sha256":"461e7f168f113397e8feb02a7ede8190ca69e22fdc23710fa314083b28e16744"}}