{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:HSNX6NBKJLECVX6TB3VGWKIMIX","short_pith_number":"pith:HSNX6NBK","canonical_record":{"source":{"id":"1803.07131","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-19T19:35:44Z","cross_cats_sorted":[],"title_canon_sha256":"1069f2c4af5c3e0aa55d07e584505aea8e0fe7d068fe6c46cea4915d8ceeecf5","abstract_canon_sha256":"8cd6320342ba96eefb3da1ab20d1fab92fa2df912233ead848e00dd65f558cc8"},"schema_version":"1.0"},"canonical_sha256":"3c9b7f342a4ac82adfd30eea6b290c45cbcf2380e053ef0d22d650498dd71006","source":{"kind":"arxiv","id":"1803.07131","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.07131","created_at":"2026-05-18T00:13:50Z"},{"alias_kind":"arxiv_version","alias_value":"1803.07131v2","created_at":"2026-05-18T00:13:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.07131","created_at":"2026-05-18T00:13:50Z"},{"alias_kind":"pith_short_12","alias_value":"HSNX6NBKJLEC","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"HSNX6NBKJLECVX6T","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"HSNX6NBK","created_at":"2026-05-18T12:32:28Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:HSNX6NBKJLECVX6TB3VGWKIMIX","target":"record","payload":{"canonical_record":{"source":{"id":"1803.07131","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-19T19:35:44Z","cross_cats_sorted":[],"title_canon_sha256":"1069f2c4af5c3e0aa55d07e584505aea8e0fe7d068fe6c46cea4915d8ceeecf5","abstract_canon_sha256":"8cd6320342ba96eefb3da1ab20d1fab92fa2df912233ead848e00dd65f558cc8"},"schema_version":"1.0"},"canonical_sha256":"3c9b7f342a4ac82adfd30eea6b290c45cbcf2380e053ef0d22d650498dd71006","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:13:50.628332Z","signature_b64":"e+BFxI29HqjObYesbN/Y9WRMoFjzbQ0DLwvvmMZzqJHgfVFK028kMvbHCHdT5LfdIWltoEN1l/MVpb/SVDkCAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"3c9b7f342a4ac82adfd30eea6b290c45cbcf2380e053ef0d22d650498dd71006","last_reissued_at":"2026-05-18T00:13:50.627602Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:13:50.627602Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.07131","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"BrXM4zXKSCo8bTF18QZ9mgF4S+Q1N+UPmBbR9Oz2fmZIqFaT4zlVAAq1QJy9lMvmH1mefLzj50K1gEpSZmD4Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T02:23:29.763594Z"},"content_sha256":"f8851ceb9ec919cfdff42aa7ee0078214c1fa622f0d53da5d42cd239e94d4890","schema_version":"1.0","event_id":"sha256:f8851ceb9ec919cfdff42aa7ee0078214c1fa622f0d53da5d42cd239e94d4890"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:HSNX6NBKJLECVX6TB3VGWKIMIX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Automated Curriculum Learning by Rewarding Temporally Rare Events","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Niels Justesen, Sebastian Risi","submitted_at":"2018-03-19T19:35:44Z","abstract_excerpt":"Reward shaping allows reinforcement learning (RL) agents to accelerate learning by receiving additional reward signals. However, these signals can be difficult to design manually, especially for complex RL tasks. We propose a simple and general approach that determines the reward of pre-defined events by their rarity alone. Here events become less rewarding as they are experienced more often, which encourages the agent to continually explore new types of events as it learns. The adaptiveness of this reward function results in a form of automated curriculum learning that does not have to be spe"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.07131","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:13:50Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"dMxOSNp+PK3pUtbh1hhyKnDdPwFpEFuCM27128Ztj/hhQkHccUjveZSVwGCzY140IkBdOaiPhQ46pCQuAUZ+Ag==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T02:23:29.764360Z"},"content_sha256":"997bf4be3ab3817749397df3ff6960f1344390d8e3d8e350dafe8cb858aa48d8","schema_version":"1.0","event_id":"sha256:997bf4be3ab3817749397df3ff6960f1344390d8e3d8e350dafe8cb858aa48d8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/HSNX6NBKJLECVX6TB3VGWKIMIX/bundle.json","state_url":"https://pith.science/pith/HSNX6NBKJLECVX6TB3VGWKIMIX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/HSNX6NBKJLECVX6TB3VGWKIMIX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T02:23:29Z","links":{"resolver":"https://pith.science/pith/HSNX6NBKJLECVX6TB3VGWKIMIX","bundle":"https://pith.science/pith/HSNX6NBKJLECVX6TB3VGWKIMIX/bundle.json","state":"https://pith.science/pith/HSNX6NBKJLECVX6TB3VGWKIMIX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/HSNX6NBKJLECVX6TB3VGWKIMIX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:HSNX6NBKJLECVX6TB3VGWKIMIX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8cd6320342ba96eefb3da1ab20d1fab92fa2df912233ead848e00dd65f558cc8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-19T19:35:44Z","title_canon_sha256":"1069f2c4af5c3e0aa55d07e584505aea8e0fe7d068fe6c46cea4915d8ceeecf5"},"schema_version":"1.0","source":{"id":"1803.07131","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.07131","created_at":"2026-05-18T00:13:50Z"},{"alias_kind":"arxiv_version","alias_value":"1803.07131v2","created_at":"2026-05-18T00:13:50Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.07131","created_at":"2026-05-18T00:13:50Z"},{"alias_kind":"pith_short_12","alias_value":"HSNX6NBKJLEC","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_16","alias_value":"HSNX6NBKJLECVX6T","created_at":"2026-05-18T12:32:28Z"},{"alias_kind":"pith_short_8","alias_value":"HSNX6NBK","created_at":"2026-05-18T12:32:28Z"}],"graph_snapshots":[{"event_id":"sha256:997bf4be3ab3817749397df3ff6960f1344390d8e3d8e350dafe8cb858aa48d8","target":"graph","created_at":"2026-05-18T00:13:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Reward shaping allows reinforcement learning (RL) agents to accelerate learning by receiving additional reward signals. However, these signals can be difficult to design manually, especially for complex RL tasks. We propose a simple and general approach that determines the reward of pre-defined events by their rarity alone. Here events become less rewarding as they are experienced more often, which encourages the agent to continually explore new types of events as it learns. The adaptiveness of this reward function results in a form of automated curriculum learning that does not have to be spe","authors_text":"Niels Justesen, Sebastian Risi","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-19T19:35:44Z","title":"Automated Curriculum Learning by Rewarding Temporally Rare Events"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.07131","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f8851ceb9ec919cfdff42aa7ee0078214c1fa622f0d53da5d42cd239e94d4890","target":"record","created_at":"2026-05-18T00:13:50Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8cd6320342ba96eefb3da1ab20d1fab92fa2df912233ead848e00dd65f558cc8","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2018-03-19T19:35:44Z","title_canon_sha256":"1069f2c4af5c3e0aa55d07e584505aea8e0fe7d068fe6c46cea4915d8ceeecf5"},"schema_version":"1.0","source":{"id":"1803.07131","kind":"arxiv","version":2}},"canonical_sha256":"3c9b7f342a4ac82adfd30eea6b290c45cbcf2380e053ef0d22d650498dd71006","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"3c9b7f342a4ac82adfd30eea6b290c45cbcf2380e053ef0d22d650498dd71006","first_computed_at":"2026-05-18T00:13:50.627602Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:13:50.627602Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"e+BFxI29HqjObYesbN/Y9WRMoFjzbQ0DLwvvmMZzqJHgfVFK028kMvbHCHdT5LfdIWltoEN1l/MVpb/SVDkCAA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:13:50.628332Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.07131","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f8851ceb9ec919cfdff42aa7ee0078214c1fa622f0d53da5d42cd239e94d4890","sha256:997bf4be3ab3817749397df3ff6960f1344390d8e3d8e350dafe8cb858aa48d8"],"state_sha256":"6ba7be6285e6178a4019b99917bcce7e1d29b1c40d61aafe7cf12e929ee5ab1e"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OqOZJ19DRRj3wMoZu7ZHHnrRRHtLfbh3nv1JldfRzEFZ/cBIlaif2bPNaCHjK5tj0LCWiKJ5DedNS/8bSYbyCw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T02:23:29.770122Z","bundle_sha256":"a80f5cf51f5515da23b4e82b7aba0a0c3c8849f08a8306f36f432734f4176cb6"}}