{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:3RK74S3F4KBRUGYJCHTCCAJSBU","short_pith_number":"pith:3RK74S3F","schema_version":"1.0","canonical_sha256":"dc55fe4b65e2831a1b0911e62101320d173da6db9bf7cdce041041b7e04c6b88","source":{"kind":"arxiv","id":"2506.06295","version":2},"attestation_state":"computed","paper":{"title":"dLLM-Cache: Accelerating Diffusion Large Language Models with Adaptive Caching","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chang Zou, Junjie Chen, Linfeng Zhang, Qingyuan Wei, Shaobo Wang, Yaojie Zhang, Yichen Zhu, Yicun Yang, Zhiyuan Liu","submitted_at":"2025-05-17T15:50:46Z","abstract_excerpt":"Autoregressive Models (ARMs) have long dominated the landscape of Large Language Models. Recently, a new paradigm has emerged in the form of diffusion-based Large Language Models (dLLMs), which generate text by iteratively denoising masked segments. This approach has shown significant advantages and potential. However, dLLMs suffer from high inference latency. Traditional ARM acceleration techniques, such as Key-Value caching, are incompatible with dLLMs due to their bidirectional attention mechanism. To address this specific challenge, our work begins with a key observation that dLLM inferenc"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2506.06295","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2025-05-17T15:50:46Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"a618bc940865c09f38e7739934e4d32b4ba06a9eb26cc600715e553d07655c02","abstract_canon_sha256":"178844e2c2f013e15fdefc0363396d536d0dd3fc707a9ab2ebfbb994c61dda48"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T02:05:41.324170Z","signature_b64":"U8xVOFmJtqyzHSJ8FRVl39tlmZGjRTCCMjFEeliyYE77V/0qxM5sRPcQ9Pwg+3ayhO1ANdC6IIuDnAM9G2JDBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"dc55fe4b65e2831a1b0911e62101320d173da6db9bf7cdce041041b7e04c6b88","last_reissued_at":"2026-06-03T02:05:41.323653Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T02:05:41.323653Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"dLLM-Cache: Accelerating Diffusion Large Language Models with Adaptive Caching","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Chang Zou, Junjie Chen, Linfeng Zhang, Qingyuan Wei, Shaobo Wang, Yaojie Zhang, Yichen Zhu, Yicun Yang, Zhiyuan Liu","submitted_at":"2025-05-17T15:50:46Z","abstract_excerpt":"Autoregressive Models (ARMs) have long dominated the landscape of Large Language Models. Recently, a new paradigm has emerged in the form of diffusion-based Large Language Models (dLLMs), which generate text by iteratively denoising masked segments. This approach has shown significant advantages and potential. However, dLLMs suffer from high inference latency. Traditional ARM acceleration techniques, such as Key-Value caching, are incompatible with dLLMs due to their bidirectional attention mechanism. To address this specific challenge, our work begins with a key observation that dLLM inferenc"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2506.06295","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2506.06295/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2506.06295","created_at":"2026-06-03T02:05:41.323719+00:00"},{"alias_kind":"arxiv_version","alias_value":"2506.06295v2","created_at":"2026-06-03T02:05:41.323719+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2506.06295","created_at":"2026-06-03T02:05:41.323719+00:00"},{"alias_kind":"pith_short_12","alias_value":"3RK74S3F4KBR","created_at":"2026-06-03T02:05:41.323719+00:00"},{"alias_kind":"pith_short_16","alias_value":"3RK74S3F4KBRUGYJ","created_at":"2026-06-03T02:05:41.323719+00:00"},{"alias_kind":"pith_short_8","alias_value":"3RK74S3F","created_at":"2026-06-03T02:05:41.323719+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":19,"internal_anchor_count":19,"sample":[{"citing_arxiv_id":"2603.20216","citing_title":"Locally Coherent Parallel Decoding in Diffusion Language Models","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.09450","citing_title":"ECHO: Efficient Chest X-ray Report Generation with One-step Block Diffusion","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20813","citing_title":"PulseCol: Periodically Refreshed Column-Sparse Attention for Accelerating Diffusion Language Models","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18165","citing_title":"Elastic-dLLM: Position Preserving Context Compression and Augmentation of Diffusion LLMs","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19470","citing_title":"Drifting Objectives for Refining Discrete Diffusion Language Models","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20022","citing_title":"FlexDraft: Flexible Speculative Decoding via Attention Tuning and Bonus-Guided Calibration","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16941","citing_title":"Roll Out and Roll Back: Diffusion LLMs are Their Own Efficiency Teachers","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08302","citing_title":"DMax: Aggressive Parallel Decoding for dLLMs","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2508.19982","citing_title":"Diffusion Language Models Know the Answer Before Decoding","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2512.14067","citing_title":"Efficient-DLM: From Autoregressive to Diffusion Language Models, and Beyond in Speed","ref_index":41,"is_internal_anchor":true},{"citing_arxiv_id":"2603.07475","citing_title":"A Comparative analysis of Layer-wise Representational Capacity in AR and Diffusion LLMs","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.13382","citing_title":"BlockVLA: Accelerating Autoregressive VLA via Block Diffusion Finetuning","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09536","citing_title":"TAD: Temporal-Aware Trajectory Self-Distillation for Fast and Accurate Diffusion LLM","ref_index":44,"is_internal_anchor":true},{"citing_arxiv_id":"2605.00161","citing_title":"Consistent Diffusion Language Models","ref_index":19,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18995","citing_title":"$R^2$-dLLM: Accelerating Diffusion Large Language Models via Spatio-Temporal Redundancy Reduction","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2604.09450","citing_title":"ECHO: Efficient Chest X-ray Report Generation with One-step Block Diffusion","ref_index":32,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08302","citing_title":"DMax: Aggressive Parallel Decoding for dLLMs","ref_index":49,"is_internal_anchor":true},{"citing_arxiv_id":"2604.15750","citing_title":"DepCap: Adaptive Block-Wise Parallel Decoding for Efficient Diffusion LM Inference","ref_index":22,"is_internal_anchor":true},{"citing_arxiv_id":"2604.18471","citing_title":"NI Sampling: Accelerating Discrete Diffusion Sampling by Token Order Optimization","ref_index":24,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU","json":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU.json","graph_json":"https://pith.science/api/pith-number/3RK74S3F4KBRUGYJCHTCCAJSBU/graph.json","events_json":"https://pith.science/api/pith-number/3RK74S3F4KBRUGYJCHTCCAJSBU/events.json","paper":"https://pith.science/paper/3RK74S3F"},"agent_actions":{"view_html":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU","download_json":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU.json","view_paper":"https://pith.science/paper/3RK74S3F","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2506.06295&json=true","fetch_graph":"https://pith.science/api/pith-number/3RK74S3F4KBRUGYJCHTCCAJSBU/graph.json","fetch_events":"https://pith.science/api/pith-number/3RK74S3F4KBRUGYJCHTCCAJSBU/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU/action/timestamp_anchor","attest_storage":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU/action/storage_attestation","attest_author":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU/action/author_attestation","sign_citation":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU/action/citation_signature","submit_replication":"https://pith.science/pith/3RK74S3F4KBRUGYJCHTCCAJSBU/action/replication_record"}},"created_at":"2026-06-03T02:05:41.323719+00:00","updated_at":"2026-06-03T02:05:41.323719+00:00"}