{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:QOMF3NLUHEGDHLWEAYSZ44G75N","short_pith_number":"pith:QOMF3NLU","canonical_record":{"source":{"id":"1809.06719","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-16T17:07:33Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"67ce0e70e4e1246a95147a262b92e9cce94ee8333c58fd3a17dd6b812fc866ae","abstract_canon_sha256":"5bf17f941ea456f7aa22a69b93e22d6ca7774b8a437228fbb5f90a6d3056a0d1"},"schema_version":"1.0"},"canonical_sha256":"83985db574390c33aec406259e70dfeb76f1c955c6b017f723d1c4b633aa72a2","source":{"kind":"arxiv","id":"1809.06719","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.06719","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"arxiv_version","alias_value":"1809.06719v2","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.06719","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"pith_short_12","alias_value":"QOMF3NLUHEGD","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"QOMF3NLUHEGDHLWE","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"QOMF3NLU","created_at":"2026-05-18T12:32:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:QOMF3NLUHEGDHLWEAYSZ44G75N","target":"record","payload":{"canonical_record":{"source":{"id":"1809.06719","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-16T17:07:33Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"67ce0e70e4e1246a95147a262b92e9cce94ee8333c58fd3a17dd6b812fc866ae","abstract_canon_sha256":"5bf17f941ea456f7aa22a69b93e22d6ca7774b8a437228fbb5f90a6d3056a0d1"},"schema_version":"1.0"},"canonical_sha256":"83985db574390c33aec406259e70dfeb76f1c955c6b017f723d1c4b633aa72a2","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:01:37.716280Z","signature_b64":"yfAaBfrSqptg7i2dZr8h/PJy+L7YxJt1l2Ilke6vXdf29X659+IAVYGIbkVW+Slbj8zal8voMBafYlLm3JCdBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"83985db574390c33aec406259e70dfeb76f1c955c6b017f723d1c4b633aa72a2","last_reissued_at":"2026-05-18T00:01:37.715771Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:01:37.715771Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1809.06719","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"vgIdh1MoeGW446iJwtEULBm+YyDgrRED0vFk+8VOX2ZYmAO0XWg5Y69BWny4UMovl2mxnQ9059EhTNShLgpSBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T02:04:19.595827Z"},"content_sha256":"f37658eb439ec152f8012e9ed87ba2ca7c991a5cee777d609d569991e046108b","schema_version":"1.0","event_id":"sha256:f37658eb439ec152f8012e9ed87ba2ca7c991a5cee777d609d569991e046108b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:QOMF3NLUHEGDHLWEAYSZ44G75N","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Improvements on Hindsight Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Ameet Deshpande, Ashutosh Jha, Balaraman Ravindran, Srikanth Sarma","submitted_at":"2018-09-16T17:07:33Z","abstract_excerpt":"Sparse reward problems are one of the biggest challenges in Reinforcement Learning. Goal-directed tasks are one such sparse reward problems where a reward signal is received only when the goal is reached. One promising way to train an agent to perform goal-directed tasks is to use Hindsight Learning approaches. In these approaches, even when an agent fails to reach the desired goal, the agent learns to reach the goal it achieved instead. Doing this over multiple trajectories while generalizing the policy learned from the achieved goals, the agent learns a goal conditioned policy to reach any g"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.06719","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:01:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gOLpkXxhBz357ddovzyFMNiLbl5XQ+a2U2Mp2ijTGW7ytnT9ZQs+YTui7I8UTry3HdjgbL12YxCMcLeDluxdBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-08T02:04:19.596489Z"},"content_sha256":"04003c0af0a721ef008a5191c8902955cde265529c5fe9d8d72ff7b6a06ed0f4","schema_version":"1.0","event_id":"sha256:04003c0af0a721ef008a5191c8902955cde265529c5fe9d8d72ff7b6a06ed0f4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QOMF3NLUHEGDHLWEAYSZ44G75N/bundle.json","state_url":"https://pith.science/pith/QOMF3NLUHEGDHLWEAYSZ44G75N/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QOMF3NLUHEGDHLWEAYSZ44G75N/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-08T02:04:19Z","links":{"resolver":"https://pith.science/pith/QOMF3NLUHEGDHLWEAYSZ44G75N","bundle":"https://pith.science/pith/QOMF3NLUHEGDHLWEAYSZ44G75N/bundle.json","state":"https://pith.science/pith/QOMF3NLUHEGDHLWEAYSZ44G75N/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QOMF3NLUHEGDHLWEAYSZ44G75N/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:QOMF3NLUHEGDHLWEAYSZ44G75N","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5bf17f941ea456f7aa22a69b93e22d6ca7774b8a437228fbb5f90a6d3056a0d1","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-16T17:07:33Z","title_canon_sha256":"67ce0e70e4e1246a95147a262b92e9cce94ee8333c58fd3a17dd6b812fc866ae"},"schema_version":"1.0","source":{"id":"1809.06719","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1809.06719","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"arxiv_version","alias_value":"1809.06719v2","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1809.06719","created_at":"2026-05-18T00:01:37Z"},{"alias_kind":"pith_short_12","alias_value":"QOMF3NLUHEGD","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_16","alias_value":"QOMF3NLUHEGDHLWE","created_at":"2026-05-18T12:32:46Z"},{"alias_kind":"pith_short_8","alias_value":"QOMF3NLU","created_at":"2026-05-18T12:32:46Z"}],"graph_snapshots":[{"event_id":"sha256:04003c0af0a721ef008a5191c8902955cde265529c5fe9d8d72ff7b6a06ed0f4","target":"graph","created_at":"2026-05-18T00:01:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Sparse reward problems are one of the biggest challenges in Reinforcement Learning. Goal-directed tasks are one such sparse reward problems where a reward signal is received only when the goal is reached. One promising way to train an agent to perform goal-directed tasks is to use Hindsight Learning approaches. In these approaches, even when an agent fails to reach the desired goal, the agent learns to reach the goal it achieved instead. Doing this over multiple trajectories while generalizing the policy learned from the achieved goals, the agent learns a goal conditioned policy to reach any g","authors_text":"Ameet Deshpande, Ashutosh Jha, Balaraman Ravindran, Srikanth Sarma","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-16T17:07:33Z","title":"Improvements on Hindsight Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1809.06719","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f37658eb439ec152f8012e9ed87ba2ca7c991a5cee777d609d569991e046108b","target":"record","created_at":"2026-05-18T00:01:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5bf17f941ea456f7aa22a69b93e22d6ca7774b8a437228fbb5f90a6d3056a0d1","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-09-16T17:07:33Z","title_canon_sha256":"67ce0e70e4e1246a95147a262b92e9cce94ee8333c58fd3a17dd6b812fc866ae"},"schema_version":"1.0","source":{"id":"1809.06719","kind":"arxiv","version":2}},"canonical_sha256":"83985db574390c33aec406259e70dfeb76f1c955c6b017f723d1c4b633aa72a2","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"83985db574390c33aec406259e70dfeb76f1c955c6b017f723d1c4b633aa72a2","first_computed_at":"2026-05-18T00:01:37.715771Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:01:37.715771Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"yfAaBfrSqptg7i2dZr8h/PJy+L7YxJt1l2Ilke6vXdf29X659+IAVYGIbkVW+Slbj8zal8voMBafYlLm3JCdBg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:01:37.716280Z","signed_message":"canonical_sha256_bytes"},"source_id":"1809.06719","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f37658eb439ec152f8012e9ed87ba2ca7c991a5cee777d609d569991e046108b","sha256:04003c0af0a721ef008a5191c8902955cde265529c5fe9d8d72ff7b6a06ed0f4"],"state_sha256":"3b1498f0b3d63e1c6abe0c714d8877698218d44791d2dfe5caaceb585fa9e52f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Zbo/t53mcDfK3iG1hJDQ8Snlguz/55PthnW8x+hi4Oin2Ji8hgRNL60kfwBoOQiE1TqNzoOfJCoTEqo9rcEtAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-08T02:04:19.600816Z","bundle_sha256":"92212985244caa51bade41d39e76a5edb8f940db78b45725b058d3cc52d4dc0e"}}