{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:UYYAV7RWZ5RP5ECFXZYZ7CVP6U","short_pith_number":"pith:UYYAV7RW","schema_version":"1.0","canonical_sha256":"a6300afe36cf62fe9045be719f8aaff5358796878de954c72ac4ffe4d481f627","source":{"kind":"arxiv","id":"2512.09106","version":4},"attestation_state":"computed","paper":{"title":"Learning Unmasking Policies for Diffusion Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jason Ramapuram, Jo\\~ao Monteiro, Louis B\\'ethune, Marco Cuturi, Metod Jazbec, Michael Kirchhof, Pierre Ablin, Theo X. Olausson, Victor Turrisi","submitted_at":"2025-12-09T20:44:33Z","abstract_excerpt":"Diffusion (Large) Language Models (dLLMs) now match the downstream performance of their autoregressive counterparts on many tasks, while holding the promise of being more efficient during inference. One critical design aspect of dLLMs is the sampling procedure that selects which tokens to unmask at each diffusion step. Indeed, recent work has found that heuristic strategies such as confidence thresholding improve both sample quality and token throughput compared to random unmasking. However, such heuristics have downsides: they require manual tuning, and we observe that their performance degra"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2512.09106","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-09T20:44:33Z","cross_cats_sorted":[],"title_canon_sha256":"1d71f40c26ce3d0789ece2fb08ebca1cc765fe60a87477d8ee26e0d3089acbb9","abstract_canon_sha256":"fc1466f2029b9507635d7d08c88c9e7d9a81356ed724eb64519b6ba9983f6b9b"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T00:05:03.419798Z","signature_b64":"3I1i/P4eXLOrrOsrG1klhWbV2WUFOsE/eTZEzgXQD001CPex7+GPH9g6zP4/yW9YR8wlXxfiD8XVlr2ZeZhlCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a6300afe36cf62fe9045be719f8aaff5358796878de954c72ac4ffe4d481f627","last_reissued_at":"2026-06-03T00:05:03.419226Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T00:05:03.419226Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Learning Unmasking Policies for Diffusion Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jason Ramapuram, Jo\\~ao Monteiro, Louis B\\'ethune, Marco Cuturi, Metod Jazbec, Michael Kirchhof, Pierre Ablin, Theo X. Olausson, Victor Turrisi","submitted_at":"2025-12-09T20:44:33Z","abstract_excerpt":"Diffusion (Large) Language Models (dLLMs) now match the downstream performance of their autoregressive counterparts on many tasks, while holding the promise of being more efficient during inference. One critical design aspect of dLLMs is the sampling procedure that selects which tokens to unmask at each diffusion step. Indeed, recent work has found that heuristic strategies such as confidence thresholding improve both sample quality and token throughput compared to random unmasking. However, such heuristics have downsides: they require manual tuning, and we observe that their performance degra"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.09106","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.09106/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2512.09106","created_at":"2026-06-03T00:05:03.419300+00:00"},{"alias_kind":"arxiv_version","alias_value":"2512.09106v4","created_at":"2026-06-03T00:05:03.419300+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.09106","created_at":"2026-06-03T00:05:03.419300+00:00"},{"alias_kind":"pith_short_12","alias_value":"UYYAV7RWZ5RP","created_at":"2026-06-03T00:05:03.419300+00:00"},{"alias_kind":"pith_short_16","alias_value":"UYYAV7RWZ5RP5ECF","created_at":"2026-06-03T00:05:03.419300+00:00"},{"alias_kind":"pith_short_8","alias_value":"UYYAV7RW","created_at":"2026-06-03T00:05:03.419300+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2604.02560","citing_title":"Dependency-Guided Parallel Decoding in Discrete Diffusion Language Models","ref_index":9,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U","json":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U.json","graph_json":"https://pith.science/api/pith-number/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/graph.json","events_json":"https://pith.science/api/pith-number/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/events.json","paper":"https://pith.science/paper/UYYAV7RW"},"agent_actions":{"view_html":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U","download_json":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U.json","view_paper":"https://pith.science/paper/UYYAV7RW","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2512.09106&json=true","fetch_graph":"https://pith.science/api/pith-number/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/graph.json","fetch_events":"https://pith.science/api/pith-number/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/action/storage_attestation","attest_author":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/action/author_attestation","sign_citation":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/action/citation_signature","submit_replication":"https://pith.science/pith/UYYAV7RWZ5RP5ECFXZYZ7CVP6U/action/replication_record"}},"created_at":"2026-06-03T00:05:03.419300+00:00","updated_at":"2026-06-03T00:05:03.419300+00:00"}