{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:GKQXAZTVXEJYHPZZKKQABR6RTB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"26a2b89c602765607a109a0dd9c505e4cc1a5aa0842db8bdec2c48e207fd8e85","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-27T04:33:08Z","title_canon_sha256":"4ae62450e6125d277e961bacb7b8a52f83a3cdc9ef926749154efab2952f5400"},"schema_version":"1.0","source":{"id":"1803.10227","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.10227","created_at":"2026-05-18T00:19:56Z"},{"alias_kind":"arxiv_version","alias_value":"1803.10227v1","created_at":"2026-05-18T00:19:56Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.10227","created_at":"2026-05-18T00:19:56Z"},{"alias_kind":"pith_short_12","alias_value":"GKQXAZTVXEJY","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"GKQXAZTVXEJYHPZZ","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"GKQXAZTV","created_at":"2026-05-18T12:32:25Z"}],"graph_snapshots":[{"event_id":"sha256:07365812326b590610eb1e4c47048efb1b0318c01e116a81f3817e04ea8a5c0d","target":"graph","created_at":"2026-05-18T00:19:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Goals for reinforcement learning problems are typically defined through hand-specified rewards. To design such problems, developers of learning algorithms must inherently be aware of what the task goals are, yet we often require agents to discover them on their own without any supervision beyond these sparse rewards. While much of the power of reinforcement learning derives from the concept that agents can learn with little guidance, this requirement greatly burdens the training process. If we relax this one restriction and endow the agent with knowledge of the reward function, and in particul","authors_text":"Ashley D. Edwards, James C. Davidson, Laura Downs","cross_cats":["cs.AI","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-27T04:33:08Z","title":"Forward-Backward Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.10227","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:33b4bfa776f8525044d29dea3ffed4c74d0c6a539075af613f31c4157f76aee7","target":"record","created_at":"2026-05-18T00:19:56Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"26a2b89c602765607a109a0dd9c505e4cc1a5aa0842db8bdec2c48e207fd8e85","cross_cats_sorted":["cs.AI","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2018-03-27T04:33:08Z","title_canon_sha256":"4ae62450e6125d277e961bacb7b8a52f83a3cdc9ef926749154efab2952f5400"},"schema_version":"1.0","source":{"id":"1803.10227","kind":"arxiv","version":1}},"canonical_sha256":"32a1706675b91383bf3952a000c7d19847325718ceb22860943ec18b5cbe87ca","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"32a1706675b91383bf3952a000c7d19847325718ceb22860943ec18b5cbe87ca","first_computed_at":"2026-05-18T00:19:56.047294Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:19:56.047294Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"O4rjRyhJPVgH5QEZT2CnZsNG+L7ue9b1Mg/ckZt7tv5B9L+67b8/Ndw+Ty1e+kghpculTGPL+9l1tc4BT1guCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:19:56.047876Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.10227","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:33b4bfa776f8525044d29dea3ffed4c74d0c6a539075af613f31c4157f76aee7","sha256:07365812326b590610eb1e4c47048efb1b0318c01e116a81f3817e04ea8a5c0d"],"state_sha256":"e7c50c198a025ea8c69afc2c906dfd355f748145a9c84ee41aaacd0612453d35"}