{"work":{"id":"c3b0bfa7-6764-45f1-a40d-45baaee9d22c","openalex_id":"https://openalex.org/W2064675550","doi":"10.1162/neco.1997.9.8.1735","arxiv_id":"gov/9377276","raw_key":null,"title":"Hochreiter and J","authors":[{"given":"Sepp","family":"Hochreiter","sequence":"first","affiliation":[{"name":"Fakultät für Informatik, Technische Universität München, 80290 München, Germany"}]},{"given":"Jürgen","family":"Schmidhuber","sequence":"additional","affiliation":[{"name":"IDSIA, Corso Elvezia 36, 6900 Lugano, Switzerland"}]}],"authors_text":"S","year":1997,"venue":"Neural Computation","abstract":null,"external_url":"https://doi.org/10.1162/neco.1997.9.8.1735","cited_by_count":80814,"metadata_source":"doi_reference","metadata_fetched_at":"2026-06-29T09:23:15.570020+00:00","pith_arxiv_id":null,"created_at":"2026-05-08T19:29:04.783454+00:00","updated_at":"2026-06-29T09:23:15.570020+00:00","title_quality_ok":false,"display_title":"Long short -term memory","render_title":"Long short -term memory"},"hub":{"state":{"work_id":"c3b0bfa7-6764-45f1-a40d-45baaee9d22c","tier":"super_hub","tier_reason":"100+ Pith inbound or 10,000+ external citations","pith_inbound_count":126,"external_cited_by_count":80814,"distinct_field_count":26,"first_pith_cited_at":"2017-10-10T17:42:04+00:00","last_pith_cited_at":"2026-06-26T16:13:48+00:00","author_build_status":"needed","summary_status":"needed","contexts_status":"needed","graph_status":"needed","ask_index_status":"needed","reader_status":"not_needed","recognition_status":"not_needed","updated_at":"2026-06-29T13:38:55.684932+00:00","tier_text":"super_hub"},"tier":"super_hub","role_counts":[{"context_role":"background","n":15},{"context_role":"baseline","n":2},{"context_role":"method","n":2}],"polarity_counts":[{"context_polarity":"background","n":14},{"context_polarity":"baseline","n":2},{"context_polarity":"use_method","n":2},{"context_polarity":"support","n":1}],"runs":{"ask_index":{"job_type":"ask_index","status":"succeeded","result":{"title":"Long short-term memory","claims":[],"why_cited":"Pith tracks Long short-term memory because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T02:45:14.696808+00:00"},"author_expand":{"job_type":"author_expand","status":"succeeded","result":{"authors_linked":[{"id":"7522a990-8179-46b3-82e3-c564e1fa045c","orcid":null,"display_name":"Sepp Hochreiter"},{"id":"4743b052-e37b-470b-bd4b-54868c1d59a0","orcid":null,"display_name":"Jürgen Schmidhuber"}]},"error":null,"updated_at":"2026-05-14T02:45:15.464692+00:00"},"context_extract":{"job_type":"context_extract","status":"succeeded","result":{"enqueued_papers":25},"error":null,"updated_at":"2026-05-14T02:45:19.967571+00:00"},"graph_features":{"job_type":"graph_features","status":"succeeded","result":{"co_cited":[{"title":"Adam: A Method for Stochastic Optimization","work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","shared_citers":7},{"title":"Gradient-based learning applied to document recognition.Proceedings of the IEEE, 86(11):2278–2324","work_id":"0a3595ca-57f9-43f8-8e2f-aface7154b99","shared_citers":6},{"title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces","work_id":"4ee75248-1199-492c-a52f-6661e0f4adff","shared_citers":6},{"title":"An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling","work_id":"04430f4e-b270-479c-9dcd-bee723164789","shared_citers":4},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":4},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":4},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":4},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":4},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":4},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":4},{"title":"Xgboost: A scalable tree boosting system","work_id":"1c2af073-b162-4cd0-a5cd-d8aade23b9b5","shared_citers":4},{"title":"arXiv preprint arXiv:1905.00537 , year=","work_id":"54fdcd2d-ade5-4d5e-9b37-8d75abcbaae2","shared_citers":3},{"title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","work_id":"ed240a10-5b19-406c-baa5-30803f465785","shared_citers":3},{"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","work_id":"d1cf6693-a082-403c-ada9-dac7b96341f9","shared_citers":3},{"title":"Deep learning","work_id":"f959cefa-9092-49df-9fb5-a4e6654500f1","shared_citers":3},{"title":"doi:10.18653/v1/N19-1246 , editor =","work_id":"56ac41e4-5078-4307-aa88-20a9d4e90afc","shared_citers":3},{"title":"doi:10.18653/v1/P19-1472 , editor =","work_id":"11bfc949-547c-40f3-a86d-953eb9b2154c","shared_citers":3},{"title":"Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling","work_id":"c7f2f5a9-ae4b-48db-aff0-24b9d0528995","shared_citers":3},{"title":"Know What You Don","work_id":"9fa70caa-364b-4753-b402-0f2e3ac53239","shared_citers":3},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":3},{"title":"Measuring Massive Multitask Language Understanding","work_id":"e87ec49a-544b-4ec8-8991-75298c64ff5e","shared_citers":3},{"title":"Nikita Kitaev, Lukasz Kaiser, and Anselm Levskaya","work_id":"37c05e13-4a24-44f8-a1c4-da1bbe7223aa","shared_citers":3},{"title":"On the Opportunities and Risks of Foundation Models","work_id":"a18039e9-928d-47c9-a836-32656a71bf71","shared_citers":3},{"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","shared_citers":3}],"time_series":[{"n":1,"year":2017},{"n":1,"year":2021},{"n":1,"year":2022},{"n":1,"year":2023},{"n":1,"year":2024},{"n":44,"year":2026}],"dependency_candidates":[]},"error":null,"updated_at":"2026-05-14T02:45:20.028089+00:00"},"identity_refresh":{"job_type":"identity_refresh","status":"succeeded","result":{"items":[{"title":"Qwen3 Technical Report","outcome":"unchanged","work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e","resolver":"local_arxiv","confidence":0.98,"old_work_id":"25a4e30c-1232-48e7-9925-02fa12ba7c9e"}],"counts":{"fixed":0,"merged":0,"unchanged":1,"quarantined":0,"needs_external_resolution":0},"errors":[],"attempted":1},"error":null,"updated_at":"2026-05-14T02:45:26.553880+00:00"},"role_polarity":{"job_type":"role_polarity","status":"succeeded","result":{"title":"Long short-term memory","claims":[],"why_cited":"Pith tracks Long short-term memory because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T02:45:14.693607+00:00"},"summary_claims":{"job_type":"summary_claims","status":"succeeded","result":{"title":"Long short-term memory","claims":[],"why_cited":"Pith tracks Long short-term memory because it crossed a citation-hub threshold.","role_counts":[]},"error":null,"updated_at":"2026-05-14T02:45:14.684344+00:00"}},"summary":{"title":"Long short-term memory","claims":[],"why_cited":"Pith tracks Long short-term memory because it crossed a citation-hub threshold.","role_counts":[]},"graph":{"co_cited":[{"title":"Adam: A Method for Stochastic Optimization","work_id":"1910796d-9b52-4683-bf5c-de9632c1028b","shared_citers":7},{"title":"Gradient-based learning applied to document recognition.Proceedings of the IEEE, 86(11):2278–2324","work_id":"0a3595ca-57f9-43f8-8e2f-aface7154b99","shared_citers":6},{"title":"Mamba: Linear-Time Sequence Modeling with Selective State Spaces","work_id":"4ee75248-1199-492c-a52f-6661e0f4adff","shared_citers":6},{"title":"An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling","work_id":"04430f4e-b270-479c-9dcd-bee723164789","shared_citers":4},{"title":"Attention Is All You Need","work_id":"baafb5a2-5272-43bc-932f-09fa9ffe5316","shared_citers":4},{"title":"Decoupled Weight Decay Regularization","work_id":"07ef7360-d385-4033-83f7-8384a6325204","shared_citers":4},{"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","shared_citers":4},{"title":"Scaling Laws for Neural Language Models","work_id":"b7dd8749-9c45-4977-ab9b-64478dce1ae8","shared_citers":4},{"title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge","work_id":"28ea1282-d657-4c61-a83c-f1249be6d6b1","shared_citers":4},{"title":"Training Verifiers to Solve Math Word Problems","work_id":"acab1aa8-b4d6-40e0-a3ee-25341701dca2","shared_citers":4},{"title":"Xgboost: A scalable tree boosting system","work_id":"1c2af073-b162-4cd0-a5cd-d8aade23b9b5","shared_citers":4},{"title":"arXiv preprint arXiv:1905.00537 , year=","work_id":"54fdcd2d-ade5-4d5e-9b37-8d75abcbaae2","shared_citers":3},{"title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","work_id":"ed240a10-5b19-406c-baa5-30803f465785","shared_citers":3},{"title":"Chain-of-Thought Prompting Elicits Reasoning in Large Language Models","work_id":"d1cf6693-a082-403c-ada9-dac7b96341f9","shared_citers":3},{"title":"Deep learning","work_id":"f959cefa-9092-49df-9fb5-a4e6654500f1","shared_citers":3},{"title":"doi:10.18653/v1/N19-1246 , editor =","work_id":"56ac41e4-5078-4307-aa88-20a9d4e90afc","shared_citers":3},{"title":"doi:10.18653/v1/P19-1472 , editor =","work_id":"11bfc949-547c-40f3-a86d-953eb9b2154c","shared_citers":3},{"title":"Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling","work_id":"c7f2f5a9-ae4b-48db-aff0-24b9d0528995","shared_citers":3},{"title":"Know What You Don","work_id":"9fa70caa-364b-4753-b402-0f2e3ac53239","shared_citers":3},{"title":"Language Models are Few-Shot Learners","work_id":"214732c0-2edd-44a0-af9e-28184a2b8279","shared_citers":3},{"title":"Measuring Massive Multitask Language Understanding","work_id":"e87ec49a-544b-4ec8-8991-75298c64ff5e","shared_citers":3},{"title":"Nikita Kitaev, Lukasz Kaiser, and Anselm Levskaya","work_id":"37c05e13-4a24-44f8-a1c4-da1bbe7223aa","shared_citers":3},{"title":"On the Opportunities and Risks of Foundation Models","work_id":"a18039e9-928d-47c9-a836-32656a71bf71","shared_citers":3},{"title":"Program Synthesis with Large Language Models","work_id":"fd241a05-03b9-4de2-9588-9d77ce176125","shared_citers":3}],"time_series":[{"n":1,"year":2017},{"n":1,"year":2021},{"n":1,"year":2022},{"n":1,"year":2023},{"n":1,"year":2024},{"n":44,"year":2026}],"dependency_candidates":[]},"authors":[{"id":"4743b052-e37b-470b-bd4b-54868c1d59a0","orcid":null,"display_name":"Jürgen Schmidhuber","source":"manual","import_confidence":0.72},{"id":"7522a990-8179-46b3-82e3-c564e1fa045c","orcid":null,"display_name":"Sepp Hochreiter","source":"manual","import_confidence":0.72}]}}