{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:2ENPI6WZTPRUYWF655QTOTTHMA","short_pith_number":"pith:2ENPI6WZ","schema_version":"1.0","canonical_sha256":"d11af47ad99be34c58beef61374e67602b6b3479c4858bd0ed780a02d6e36220","source":{"kind":"arxiv","id":"1906.09310","version":1},"attestation_state":"computed","paper":{"title":"A Study of State Aliasing in Structured Prediction with RNNs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Adam Trischler, Layla El Asri","submitted_at":"2019-06-21T20:16:52Z","abstract_excerpt":"End-to-end reinforcement learning agents learn a state representation and a policy at the same time. Recurrent neural networks (RNNs) have been trained successfully as reinforcement learning agents in settings like dialogue that require structured prediction. In this paper, we investigate the representations learned by RNN-based agents when trained with both policy gradient and value-based methods. We show through extensive experiments and analysis that, when trained with policy gradient, recurrent neural networks often fail to learn a state representation that leads to an optimal policy in se"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1906.09310","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-21T20:16:52Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"6a02f07d0ca80d6dbfe35ff4dd16a99519a612b2a568e469dc91804fed2be5c6","abstract_canon_sha256":"50ad18e53b7802f44aa8746fe6e781579facfcc63254a82336d8419601211540"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:42:38.556413Z","signature_b64":"holqFitFUE2yhuerJvInaxWHqsL3GXmDqIUnFyUCDXQUs5uFXKl12VjWFocnyWdxgJop5U4vMxwV4ToNBGyKAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d11af47ad99be34c58beef61374e67602b6b3479c4858bd0ed780a02d6e36220","last_reissued_at":"2026-05-17T23:42:38.555658Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:42:38.555658Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Study of State Aliasing in Structured Prediction with RNNs","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Adam Trischler, Layla El Asri","submitted_at":"2019-06-21T20:16:52Z","abstract_excerpt":"End-to-end reinforcement learning agents learn a state representation and a policy at the same time. Recurrent neural networks (RNNs) have been trained successfully as reinforcement learning agents in settings like dialogue that require structured prediction. In this paper, we investigate the representations learned by RNN-based agents when trained with both policy gradient and value-based methods. We show through extensive experiments and analysis that, when trained with policy gradient, recurrent neural networks often fail to learn a state representation that leads to an optimal policy in se"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.09310","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1906.09310","created_at":"2026-05-17T23:42:38.555793+00:00"},{"alias_kind":"arxiv_version","alias_value":"1906.09310v1","created_at":"2026-05-17T23:42:38.555793+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.09310","created_at":"2026-05-17T23:42:38.555793+00:00"},{"alias_kind":"pith_short_12","alias_value":"2ENPI6WZTPRU","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_16","alias_value":"2ENPI6WZTPRUYWF6","created_at":"2026-05-18T12:33:07.085635+00:00"},{"alias_kind":"pith_short_8","alias_value":"2ENPI6WZ","created_at":"2026-05-18T12:33:07.085635+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA","json":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA.json","graph_json":"https://pith.science/api/pith-number/2ENPI6WZTPRUYWF655QTOTTHMA/graph.json","events_json":"https://pith.science/api/pith-number/2ENPI6WZTPRUYWF655QTOTTHMA/events.json","paper":"https://pith.science/paper/2ENPI6WZ"},"agent_actions":{"view_html":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA","download_json":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA.json","view_paper":"https://pith.science/paper/2ENPI6WZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1906.09310&json=true","fetch_graph":"https://pith.science/api/pith-number/2ENPI6WZTPRUYWF655QTOTTHMA/graph.json","fetch_events":"https://pith.science/api/pith-number/2ENPI6WZTPRUYWF655QTOTTHMA/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA/action/storage_attestation","attest_author":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA/action/author_attestation","sign_citation":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA/action/citation_signature","submit_replication":"https://pith.science/pith/2ENPI6WZTPRUYWF655QTOTTHMA/action/replication_record"}},"created_at":"2026-05-17T23:42:38.555793+00:00","updated_at":"2026-05-17T23:42:38.555793+00:00"}