{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:DYXHDMMA2WDGUNODEGQGN4MRZX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"98515e01c0be789d4cadb4530b14ba43bfd3d41028c24aff188bf8f150a57125","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T04:58:40Z","title_canon_sha256":"8fd31ed859ee9d5d0bc81c49f25b1e3c8519d5d590d65ee2ea67634d54870851"},"schema_version":"1.0","source":{"id":"1902.00183","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1902.00183","created_at":"2026-05-17T23:46:09Z"},{"alias_kind":"arxiv_version","alias_value":"1902.00183v2","created_at":"2026-05-17T23:46:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1902.00183","created_at":"2026-05-17T23:46:09Z"},{"alias_kind":"pith_short_12","alias_value":"DYXHDMMA2WDG","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_16","alias_value":"DYXHDMMA2WDGUNOD","created_at":"2026-05-18T12:33:15Z"},{"alias_kind":"pith_short_8","alias_value":"DYXHDMMA","created_at":"2026-05-18T12:33:15Z"}],"graph_snapshots":[{"event_id":"sha256:a428ea96ee56f25ea16bd1db4dc9846bced39d53e7e31ca431b6b9f40d8d1c04","target":"graph","created_at":"2026-05-17T23:46:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Most model-free reinforcement learning methods leverage state representations (embeddings) for generalization, but either ignore structure in the space of actions or assume the structure is provided a priori. We show how a policy can be decomposed into a component that acts in a low-dimensional space of action representations and a component that transforms these representations into actual actions. These representations improve generalization over large, finite action sets by allowing the agent to infer the outcomes of actions similar to actions already taken. We provide an algorithm to both ","authors_text":"Georgios Theocharous, James Kostas, Philip S. Thomas, Scott Jordan, Yash Chandak","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T04:58:40Z","title":"Learning Action Representations for Reinforcement Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1902.00183","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:88e4282ebed77f5344cc631d32e06e1c0f6d08833a98285483a5a2502359bb1c","target":"record","created_at":"2026-05-17T23:46:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"98515e01c0be789d4cadb4530b14ba43bfd3d41028c24aff188bf8f150a57125","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-02-01T04:58:40Z","title_canon_sha256":"8fd31ed859ee9d5d0bc81c49f25b1e3c8519d5d590d65ee2ea67634d54870851"},"schema_version":"1.0","source":{"id":"1902.00183","kind":"arxiv","version":2}},"canonical_sha256":"1e2e71b180d5866a35c321a066f191cde31306f5e88c0dd94d205a0592eeee4f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"1e2e71b180d5866a35c321a066f191cde31306f5e88c0dd94d205a0592eeee4f","first_computed_at":"2026-05-17T23:46:09.189655Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:46:09.189655Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1MwtyCkt+taivUsLTywpeDFPLMcnAj/etjgvc89baPr/PEZMrZTbc5oZsD5sRgXXJvC5tMWTwSHxlgEXziKaDQ==","signature_status":"signed_v1","signed_at":"2026-05-17T23:46:09.190302Z","signed_message":"canonical_sha256_bytes"},"source_id":"1902.00183","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:88e4282ebed77f5344cc631d32e06e1c0f6d08833a98285483a5a2502359bb1c","sha256:a428ea96ee56f25ea16bd1db4dc9846bced39d53e7e31ca431b6b9f40d8d1c04"],"state_sha256":"12c1dc398a2c1ae9c389115d754fb8a365f22cc44f88ecfbd49afd5538b743d0"}