{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:GX4VJYB6NTWWRSZHB3D2IJX3BR","short_pith_number":"pith:GX4VJYB6","schema_version":"1.0","canonical_sha256":"35f954e03e6ced68cb270ec7a426fb0c59f0903864928926c5b107f0badcd252","source":{"kind":"arxiv","id":"2410.17891","version":3},"attestation_state":"computed","paper":{"title":"Scaling Diffusion Language Models via Adaptation from Autoregressive Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chenxin An, Hao Peng, Jiacheng Ye, Jiawei Han, Lingpeng Kong, Lin Zheng, Mukai Li, Peilin Zhao, Shansan Gong, Shivam Agarwal, Wei Bi, Yizhe Zhang","submitted_at":"2024-10-23T14:04:22Z","abstract_excerpt":"Diffusion Language Models (DLMs) have emerged as a promising new paradigm for text generative modeling, potentially addressing limitations of autoregressive (AR) models. However, current DLMs have been studied at a smaller scale compared to their AR counterparts and lack fair comparison on language modeling benchmarks. Additionally, training diffusion models from scratch at scale remains challenging. Given the prevalence of open-source AR language models, we propose adapting these models to build text diffusion models. We demonstrate connections between AR and diffusion modeling objectives and"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2410.17891","kind":"arxiv","version":3},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2024-10-23T14:04:22Z","cross_cats_sorted":[],"title_canon_sha256":"0cc86a6e95746ea64005aacb09cc092c1083a7c7f4a4f4096242afa4aef901f4","abstract_canon_sha256":"f616183ef9a7c8dc88ab325f2931e286ce47d207d60ddb9a4a01072d2a5b562d"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T19:53:53.670822Z","signature_b64":"tP0diijiBpvlWRRgOBFUB9Qi8H7JkXKFnVC+L7Q13mJJ0gQp++9eYpFk1BbcdKS+MzUVa5GalrsCFceEOSzuCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"35f954e03e6ced68cb270ec7a426fb0c59f0903864928926c5b107f0badcd252","last_reissued_at":"2026-05-20T19:53:53.668724Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T19:53:53.668724Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Scaling Diffusion Language Models via Adaptation from Autoregressive Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Chenxin An, Hao Peng, Jiacheng Ye, Jiawei Han, Lingpeng Kong, Lin Zheng, Mukai Li, Peilin Zhao, Shansan Gong, Shivam Agarwal, Wei Bi, Yizhe Zhang","submitted_at":"2024-10-23T14:04:22Z","abstract_excerpt":"Diffusion Language Models (DLMs) have emerged as a promising new paradigm for text generative modeling, potentially addressing limitations of autoregressive (AR) models. However, current DLMs have been studied at a smaller scale compared to their AR counterparts and lack fair comparison on language modeling benchmarks. Additionally, training diffusion models from scratch at scale remains challenging. Given the prevalence of open-source AR language models, we propose adapting these models to build text diffusion models. We demonstrate connections between AR and diffusion modeling objectives and"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2410.17891","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2410.17891/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2410.17891","created_at":"2026-05-20T19:53:53.668812+00:00"},{"alias_kind":"arxiv_version","alias_value":"2410.17891v3","created_at":"2026-05-20T19:53:53.668812+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2410.17891","created_at":"2026-05-20T19:53:53.668812+00:00"},{"alias_kind":"pith_short_12","alias_value":"GX4VJYB6NTWW","created_at":"2026-05-20T19:53:53.668812+00:00"},{"alias_kind":"pith_short_16","alias_value":"GX4VJYB6NTWWRSZH","created_at":"2026-05-20T19:53:53.668812+00:00"},{"alias_kind":"pith_short_8","alias_value":"GX4VJYB6","created_at":"2026-05-20T19:53:53.668812+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":23,"internal_anchor_count":23,"sample":[{"citing_arxiv_id":"2511.18801","citing_title":"PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20813","citing_title":"PulseCol: Periodically Refreshed Column-Sparse Attention for Accelerating Diffusion Language Models","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.15676","citing_title":"Dynamic Chunking for Diffusion Language Models","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17174","citing_title":"Beyond Execution: Static-Analysis Rewards and Hint-Conditioned Diffusion RL for Code Generation","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2605.20179","citing_title":"TIDE: Efficient and Lossless MoE Diffusion LLM Inference with I/O-aware Expert Offload","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14531","citing_title":"Language Generation as Optimal Control: Closed-Loop Diffusion in Latent Control Space","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2508.19982","citing_title":"Diffusion Language Models Know the Answer Before Decoding","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2510.03206","citing_title":"Coevolutionary Continuous Discrete Diffusion: Make Your Diffusion Language Model a Latent Reasoner","ref_index":15,"is_internal_anchor":true},{"citing_arxiv_id":"2510.18165","citing_title":"Saber: An Efficient Sampling with Adaptive Acceleration and Backtracking Enhanced Remasking for Diffusion Language Model","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2511.14148","citing_title":"AsyncVLA: Asynchronous Flow Matching for Vision-Language-Action Models","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2505.22618","citing_title":"Fast-dLLM: Training-free Acceleration of Diffusion LLM by Enabling KV Cache and Parallel Decoding","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14531","citing_title":"Language Generation as Optimal Control: Closed-Loop Diffusion in Latent Control Space","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12522","citing_title":"Differences in Text Generated by Diffusion and Autoregressive Language Models","ref_index":11,"is_internal_anchor":true},{"citing_arxiv_id":"2605.09302","citing_title":"Discrete Langevin-Inspired Posterior Sampling","ref_index":6,"is_internal_anchor":true},{"citing_arxiv_id":"2604.23235","citing_title":"Measuring Temporal Linguistic Emergence in Diffusion Language Models","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"2605.06548","citing_title":"Continuous Latent Diffusion Language Model","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01373","citing_title":"Focus on the Core: Empowering Diffusion Large Language Models by Self-Contrast","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2604.11748","citing_title":"LangFlow: Continuous Diffusion Rivals Discrete in Language Modeling","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2604.08964","citing_title":"Breaking Block Boundaries: Anchor-based History-stable Decoding for Diffusion Large Language Models","ref_index":3,"is_internal_anchor":true},{"citing_arxiv_id":"2605.07193","citing_title":"Coupling Models for One-Step Discrete Generation","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2502.09992","citing_title":"Large Language Diffusion Models","ref_index":73,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16514","citing_title":"BARD: Bridging AutoRegressive and Diffusion Vision-Language Models Via Highly Efficient Progressive Block Merging and Stage-Wise Distillation","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.04291","citing_title":"Leveraging Pretrained Language Models as Energy Functions for Glauber Dynamics Text Diffusion","ref_index":36,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR","json":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR.json","graph_json":"https://pith.science/api/pith-number/GX4VJYB6NTWWRSZHB3D2IJX3BR/graph.json","events_json":"https://pith.science/api/pith-number/GX4VJYB6NTWWRSZHB3D2IJX3BR/events.json","paper":"https://pith.science/paper/GX4VJYB6"},"agent_actions":{"view_html":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR","download_json":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR.json","view_paper":"https://pith.science/paper/GX4VJYB6","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2410.17891&json=true","fetch_graph":"https://pith.science/api/pith-number/GX4VJYB6NTWWRSZHB3D2IJX3BR/graph.json","fetch_events":"https://pith.science/api/pith-number/GX4VJYB6NTWWRSZHB3D2IJX3BR/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR/action/timestamp_anchor","attest_storage":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR/action/storage_attestation","attest_author":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR/action/author_attestation","sign_citation":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR/action/citation_signature","submit_replication":"https://pith.science/pith/GX4VJYB6NTWWRSZHB3D2IJX3BR/action/replication_record"}},"created_at":"2026-05-20T19:53:53.668812+00:00","updated_at":"2026-05-20T19:53:53.668812+00:00"}