{"work":{"id":"de105b1d-cc2e-4cfe-a5e1-df54a3d8ab75","openalex_id":null,"doi":"10.1145/3600006.3613165","arxiv_id":null,"raw_key":"raw:b7bdaddc8d858d1585f2e3ae","title":"Efficient Memory Management for Large Language Model Serving with PagedAttention","authors":[{"ORCID":"https://orcid.org/0009-0008-8870-4892","given":"Woosuk","family":"Kwon","sequence":"first","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0009-0004-1534-9106","given":"Zhuohan","family":"Li","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0009-0007-3787-0316","given":"Siyuan","family":"Zhuang","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0000-0002-1883-2126","given":"Ying","family":"Sheng","sequence":"additional","affiliation":[{"name":"UC Berkeley and Stanford University, Berkeley, USA"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0000-0002-6611-4612","given":"Lianmin","family":"Zheng","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0000-0002-9298-6254","given":"Cody Hao","family":"Yu","sequence":"additional","affiliation":[{"name":"Independent Researcher, Berkeley, United States of America"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0000-0003-2921-956X","given":"Joseph","family":"Gonzalez","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0009-0003-8392-3977","given":"Hao","family":"Zhang","sequence":"additional","affiliation":[{"name":"UC San Diego, La Jolla, United States of America"}],"authenticated-orcid":false},{"ORCID":"https://orcid.org/0000-0002-5373-0088","given":"Ion","family":"Stoica","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, United States of America"}],"authenticated-orcid":false}],"authors_text":"Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu, Joseph Gonzalez, Hao Zhang, Ion Stoica","year":2023,"venue":"Proceedings of the 29th Symposium on Operating Systems Principles","abstract":null,"external_url":"https://doi.org/10.1145/3600006.3613165","cited_by_count":1063,"metadata_source":"crossref","metadata_fetched_at":"2026-05-19T03:52:27.108376+00:00","pith_arxiv_id":null,"created_at":"2026-05-15T03:19:51.630679+00:00","updated_at":"2026-05-19T03:52:27.108376+00:00","title_quality_ok":true,"display_title":null,"render_title":"Efficient Memory Management for Large Language Model Serving with PagedAttention"},"hub":{"state":{"tier_text":"hub","tier":"hub","tier_reason":"10+ Pith inbound or 1,000+ external citations","pith_inbound_count":1,"external_cited_by_count":1063},"tier":"hub","role_counts":[{"context_role":"background","n":1}],"polarity_counts":[{"context_polarity":"background","n":1}],"runs":{},"summary":{},"graph":{},"authors":[]}}