{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:VARUQFMFWBOO54BRT5MMGQWFSX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"4cdeff5c1b5f17421a1cd4c652036d56d8df76ac2dc40a0eb2c9f36fde34e01d","cross_cats_sorted":["cs.AI","cs.NE","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T18:43:56Z","title_canon_sha256":"b89bfe6c2cc563d42fe66b7df7e36e0e7b4acca9be365d72c394233bfc609f53"},"schema_version":"1.0","source":{"id":"1902.04546","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.04546","created_at":"2026-05-17T23:54:12Z"},{"alias_kind":"arxiv_version","alias_value":"1902.04546v1","created_at":"2026-05-17T23:54:12Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.04546","created_at":"2026-05-17T23:54:12Z"},{"alias_kind":"pith_short_12","alias_value":"VARUQFMFWBOO","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_16","alias_value":"VARUQFMFWBOO54BR","created_at":"2026-05-18T12:33:30Z"},{"alias_kind":"pith_short_8","alias_value":"VARUQFMF","created_at":"2026-05-18T12:33:30Z"}],"graph_snapshots":[{"event_id":"sha256:020158d5000c4378fb3c7e8f7a5a371aafcc5862e971226ccc32569c8708ac01","target":"graph","created_at":"2026-05-17T23:54:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Sparse reward is one of the most challenging problems in reinforcement learning (RL). Hindsight Experience Replay (HER) attempts to address this issue by converting a failed experience to a successful one by relabeling the goals. Despite its effectiveness, HER has limited applicability because it lacks a compact and universal goal representation. We present Augmenting experienCe via TeacheR's adviCE (ACTRCE), an efficient reinforcement learning technique that extends the HER framework using natural language as the goal representation. We first analyze the differences among goal representation,","authors_text":"Harris Chan, Jamie Kiros, Jimmy Ba, Sanja Fidler, Yuhuai Wu","cross_cats":["cs.AI","cs.NE","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T18:43:56Z","title":"ACTRCE: Augmenting Experience via Teacher's Advice For Multi-Goal Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.04546","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e39723a195a2478f83b69d164b94590ded7339900e62ae1fa625b55cea4c360d","target":"record","created_at":"2026-05-17T23:54:12Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"4cdeff5c1b5f17421a1cd4c652036d56d8df76ac2dc40a0eb2c9f36fde34e01d","cross_cats_sorted":["cs.AI","cs.NE","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-12T18:43:56Z","title_canon_sha256":"b89bfe6c2cc563d42fe66b7df7e36e0e7b4acca9be365d72c394233bfc609f53"},"schema_version":"1.0","source":{"id":"1902.04546","kind":"arxiv","version":1}},"canonical_sha256":"a823481585b05ceef0319f58c342c595eca541e9ce8a3669cbb268c05aee94db","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a823481585b05ceef0319f58c342c595eca541e9ce8a3669cbb268c05aee94db","first_computed_at":"2026-05-17T23:54:12.000832Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:54:12.000832Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"2j1c0v+LNh62t3BJ3qJSo/C3TAWgwcWkvqO64BFuHN05znRv5M5lcwh6RxYIr+A7uOaksqTgjFYT5ckTdk6CBQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:54:12.001283Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.04546","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e39723a195a2478f83b69d164b94590ded7339900e62ae1fa625b55cea4c360d","sha256:020158d5000c4378fb3c7e8f7a5a371aafcc5862e971226ccc32569c8708ac01"],"state_sha256":"fdec67f1d7fa7cb01774750602ab8e1e640b4de381f768c7250061fa9659bfa7"}