{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:2IAX2M2YAOXPWGYGGJ7HTWNENH","short_pith_number":"pith:2IAX2M2Y","schema_version":"1.0","canonical_sha256":"d2017d335803aefb1b06327e79d9a469c63a3b73f3198514e11f059dc54d6f80","source":{"kind":"arxiv","id":"2508.15601","version":2},"attestation_state":"computed","paper":{"title":"LMDeploy Accelerates Mixed-Precision LLM Inference with TurboMind","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.PF"],"primary_cat":"cs.DC","authors_text":"Fangcheng Fu, Guoliang He, Han Lv, Kai Chen, Li Zhang, Ningsheng Ma, Qian Yao, Xin Chen, Youhe Jiang","submitted_at":"2025-08-21T14:24:52Z","abstract_excerpt":"Mixed-precision inference techniques reduce the memory and computational demands of Large Language Models (LLMs) by applying hybrid precision formats to model weights, activations, and KV caches. However, existing systems struggle to (i) automatically generalize across diverse hardware architectures and precision formats, often requiring fragmented, hand-tuned kernels, and (ii) fully exploit available memory and compute resources, often causing performance bottlenecks. To address these problems, we propose TurboMind, a generalizable and efficient mixed-precision LLM inference engine of LMDeplo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2508.15601","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2025-08-21T14:24:52Z","cross_cats_sorted":["cs.PF"],"title_canon_sha256":"fd99dfd641caad1553c5a2ee174022657fcbf25b44fd3db30d84aade1e20d960","abstract_canon_sha256":"4465555fe84f2caea1ca592ed5630d52b5ac8f41b12886434ea538c50e80f226"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:01:35.253760Z","signature_b64":"DGG3q6j+MhK5B/UATLPmWo2hyz1oIuD/XtmipTZX99R70QjDS9P2TVgqwpyblQt2DzM+fe+EFy5NCPWMMck4DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d2017d335803aefb1b06327e79d9a469c63a3b73f3198514e11f059dc54d6f80","last_reissued_at":"2026-05-20T00:01:35.252971Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:01:35.252971Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"LMDeploy Accelerates Mixed-Precision LLM Inference with TurboMind","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.PF"],"primary_cat":"cs.DC","authors_text":"Fangcheng Fu, Guoliang He, Han Lv, Kai Chen, Li Zhang, Ningsheng Ma, Qian Yao, Xin Chen, Youhe Jiang","submitted_at":"2025-08-21T14:24:52Z","abstract_excerpt":"Mixed-precision inference techniques reduce the memory and computational demands of Large Language Models (LLMs) by applying hybrid precision formats to model weights, activations, and KV caches. However, existing systems struggle to (i) automatically generalize across diverse hardware architectures and precision formats, often requiring fragmented, hand-tuned kernels, and (ii) fully exploit available memory and compute resources, often causing performance bottlenecks. To address these problems, we propose TurboMind, a generalizable and efficient mixed-precision LLM inference engine of LMDeplo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.15601","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.15601/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2508.15601","created_at":"2026-05-20T00:01:35.253089+00:00"},{"alias_kind":"arxiv_version","alias_value":"2508.15601v2","created_at":"2026-05-20T00:01:35.253089+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.15601","created_at":"2026-05-20T00:01:35.253089+00:00"},{"alias_kind":"pith_short_12","alias_value":"2IAX2M2YAOXP","created_at":"2026-05-20T00:01:35.253089+00:00"},{"alias_kind":"pith_short_16","alias_value":"2IAX2M2YAOXPWGYG","created_at":"2026-05-20T00:01:35.253089+00:00"},{"alias_kind":"pith_short_8","alias_value":"2IAX2M2Y","created_at":"2026-05-20T00:01:35.253089+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2605.22100","citing_title":"MPDocBench-Parse: Benchmarking Practical Multi-page Document Parsing","ref_index":68,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19537","citing_title":"The Silent Hyperparameter: Quantifying the Impact of Inference Backends on LLM Reproducibility","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19537","citing_title":"The Silent Hyperparameter: Quantifying the Impact of Inference Backends on LLM Reproducibility","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16637","citing_title":"HexAGenT: Efficient Agentic LLM Serving via Workflow- and Heterogeneity-Aware Scheduling","ref_index":53,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13915","citing_title":"Multi-Scale Dequant: Eliminating Dequantization Bottleneck via Activation Decomposition for Efficient LLM Inference","ref_index":23,"is_internal_anchor":false},{"citing_arxiv_id":"2605.13667","citing_title":"SceneGraphVLM: Dynamic Scene Graph Generation from Video with Vision-Language Models","ref_index":47,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07144","citing_title":"Autopoiesis: A Self-Evolving System Paradigm for LLM Serving Under Runtime Dynamics","ref_index":44,"is_internal_anchor":false},{"citing_arxiv_id":"2605.07569","citing_title":"HexiSeq: Accommodating Long Context Training of LLMs over Heterogeneous Hardware","ref_index":60,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH","json":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH.json","graph_json":"https://pith.science/api/pith-number/2IAX2M2YAOXPWGYGGJ7HTWNENH/graph.json","events_json":"https://pith.science/api/pith-number/2IAX2M2YAOXPWGYGGJ7HTWNENH/events.json","paper":"https://pith.science/paper/2IAX2M2Y"},"agent_actions":{"view_html":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH","download_json":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH.json","view_paper":"https://pith.science/paper/2IAX2M2Y","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2508.15601&json=true","fetch_graph":"https://pith.science/api/pith-number/2IAX2M2YAOXPWGYGGJ7HTWNENH/graph.json","fetch_events":"https://pith.science/api/pith-number/2IAX2M2YAOXPWGYGGJ7HTWNENH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH/action/storage_attestation","attest_author":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH/action/author_attestation","sign_citation":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH/action/citation_signature","submit_replication":"https://pith.science/pith/2IAX2M2YAOXPWGYGGJ7HTWNENH/action/replication_record"}},"created_at":"2026-05-20T00:01:35.253089+00:00","updated_at":"2026-05-20T00:01:35.253089+00:00"}