{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:F4JIFFKUB327TU2XITLUJGKXLE","short_pith_number":"pith:F4JIFFKU","schema_version":"1.0","canonical_sha256":"2f128295540ef5f9d35744d7449957592839c7d431bff461bb36a16f9aa02b56","source":{"kind":"arxiv","id":"2605.12522","version":1},"attestation_state":"computed","paper":{"title":"Differences in Text Generated by Diffusion and Autoregressive Language Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"Diffusion language models generate text with higher semantic coherence and diversity than autoregressive models due to bidirectional context in training, while lower entropy stems from their decoding algorithms.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Chengwei Liang, Jingzhao Zhang, Meiqi Gu, Minrui Luo, Tianxing He, Xingyan Chen, Zeyang Zhang","submitted_at":"2026-04-04T17:30:35Z","abstract_excerpt":"Diffusion language models (DLMs) are promising alternatives to autoregressive language models (ARMs), yet the intrinsic differences in their generated text remain underexplored. We first find empirically that off-the-shelf DLMs exhibit lower $n$-gram entropy, higher semantic coherence, and higher semantic diversity. To understand the cause, we conduct controlled experiments that decouple the effects of training objectives and decoding algorithms. Results suggest that the DLM training objective contributes to the increases in semantic coherence and semantic diversity, but has a minor influence "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.12522","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-04T17:30:35Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d214cd220beeda91852a4ea93e3a7195d797345c2a044b3aa19e0ff6c26949ac","abstract_canon_sha256":"6aa56592c21a371401bacc3f41678d5bfb82eb89e857097159b95715d7521f4b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:10:02.855483Z","signature_b64":"31j8Sh52XME45cP8DN50IYRQ7aeUfF3Qsofp11CdpL2PD52JIVKLlE++a1sKWEC9cab57kqSYwPAa8vO/uK0Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2f128295540ef5f9d35744d7449957592839c7d431bff461bb36a16f9aa02b56","last_reissued_at":"2026-05-18T03:10:02.854950Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:10:02.854950Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Differences in Text Generated by Diffusion and Autoregressive Language Models","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"Diffusion language models generate text with higher semantic coherence and diversity than autoregressive models due to bidirectional context in training, while lower entropy stems from their decoding algorithms.","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Chengwei Liang, Jingzhao Zhang, Meiqi Gu, Minrui Luo, Tianxing He, Xingyan Chen, Zeyang Zhang","submitted_at":"2026-04-04T17:30:35Z","abstract_excerpt":"Diffusion language models (DLMs) are promising alternatives to autoregressive language models (ARMs), yet the intrinsic differences in their generated text remain underexplored. We first find empirically that off-the-shelf DLMs exhibit lower $n$-gram entropy, higher semantic coherence, and higher semantic diversity. To understand the cause, we conduct controlled experiments that decouple the effects of training objectives and decoding algorithms. Results suggest that the DLM training objective contributes to the increases in semantic coherence and semantic diversity, but has a minor influence "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Results suggest that the DLM training objective contributes to the increases in semantic coherence and semantic diversity, but has a minor influence on entropy. These differences are primarily driven by the bidirectional context; the reduction in entropy stems from DLMs' decoding algorithms, particularly confidence-based remasking strategies.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the controlled experiments can cleanly decouple training-objective effects from decoding-algorithm effects without confounding factors from implementation choices or data selection.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"DLMs exhibit lower n-gram entropy, higher semantic coherence, and higher semantic diversity than ARMs, primarily due to bidirectional context and remasking decoding strategies.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Diffusion language models generate text with higher semantic coherence and diversity than autoregressive models due to bidirectional context in training, while lower entropy stems from their decoding algorithms.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"87e13b3bacf483c8698a6cb823f4ac5e2e453373ff4f3a65fb5c89402d5eb9c6"},"source":{"id":"2605.12522","kind":"arxiv","version":1},"verdict":{"id":"b04183a9-7db3-4db6-a511-8ff5bb54f97f","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-14T20:55:29.405292Z","strongest_claim":"Results suggest that the DLM training objective contributes to the increases in semantic coherence and semantic diversity, but has a minor influence on entropy. These differences are primarily driven by the bidirectional context; the reduction in entropy stems from DLMs' decoding algorithms, particularly confidence-based remasking strategies.","one_line_summary":"DLMs exhibit lower n-gram entropy, higher semantic coherence, and higher semantic diversity than ARMs, primarily due to bidirectional context and remasking decoding strategies.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the controlled experiments can cleanly decouple training-objective effects from decoding-algorithm effects without confounding factors from implementation choices or data selection.","pith_extraction_headline":"Diffusion language models generate text with higher semantic coherence and diversity than autoregressive models due to bidirectional context in training, while lower entropy stems from their decoding algorithms."},"references":{"count":41,"sample":[{"doi":"","year":null,"title":"Phi-4-Mini Technical Report: Compact yet Powerful Multimodal Language Models via Mixture-of-LoRAs","work_id":"83956045-536a-41ff-af02-b80e2a614eab","ref_index":1,"cited_arxiv_id":"2503.01743","is_internal_anchor":true},{"doi":"","year":null,"title":"Block Diffusion: Interpolating Between Autoregressive and Diffusion Language Models","work_id":"b34ab928-6ffb-4028-b13c-395a8924d76b","ref_index":2,"cited_arxiv_id":"2503.09573","is_internal_anchor":true},{"doi":"","year":null,"title":"Accelerated sampling from masked diffusion models via entropy bounded unmasking.arXiv preprint arXiv:2505.24857","work_id":"87f3b449-ac69-40f1-a330-34d31ccfba8e","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"LLaDA2.0: Scaling Up Diffusion Language Models to 100B","work_id":"a1b1080d-0a91-44a4-8f70-2bf3e7a27e0b","ref_index":4,"cited_arxiv_id":"2512.15745","is_internal_anchor":true},{"doi":"","year":null,"title":"M3-Embedding: Multi-Linguality, Multi-Functionality, Multi-Granularity Text Embeddings Through Self-Knowledge Distillation","work_id":"a9435752-4e49-42bd-95b4-0fec975633c8","ref_index":5,"cited_arxiv_id":"2402.03216","is_internal_anchor":true}],"resolved_work":41,"snapshot_sha256":"4c4af1cc4372d63c77f1c45eb0787f9e14ea58864aab64d391c235b731dc79f5","internal_anchors":18},"formal_canon":{"evidence_count":2,"snapshot_sha256":"34d1f4283181bc0a9d52443080b5ad75f1386c8bc520823bb88eae7d85aff84a"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.12522","created_at":"2026-05-18T03:10:02.855037+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.12522v1","created_at":"2026-05-18T03:10:02.855037+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12522","created_at":"2026-05-18T03:10:02.855037+00:00"},{"alias_kind":"pith_short_12","alias_value":"F4JIFFKUB327","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"F4JIFFKUB327TU2X","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"F4JIFFKU","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE","json":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE.json","graph_json":"https://pith.science/api/pith-number/F4JIFFKUB327TU2XITLUJGKXLE/graph.json","events_json":"https://pith.science/api/pith-number/F4JIFFKUB327TU2XITLUJGKXLE/events.json","paper":"https://pith.science/paper/F4JIFFKU"},"agent_actions":{"view_html":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE","download_json":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE.json","view_paper":"https://pith.science/paper/F4JIFFKU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.12522&json=true","fetch_graph":"https://pith.science/api/pith-number/F4JIFFKUB327TU2XITLUJGKXLE/graph.json","fetch_events":"https://pith.science/api/pith-number/F4JIFFKUB327TU2XITLUJGKXLE/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE/action/timestamp_anchor","attest_storage":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE/action/storage_attestation","attest_author":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE/action/author_attestation","sign_citation":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE/action/citation_signature","submit_replication":"https://pith.science/pith/F4JIFFKUB327TU2XITLUJGKXLE/action/replication_record"}},"created_at":"2026-05-18T03:10:02.855037+00:00","updated_at":"2026-05-18T03:10:02.855037+00:00"}