{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:5PMV3IA4NFT74Z43AQVPNYR67J","short_pith_number":"pith:5PMV3IA4","schema_version":"1.0","canonical_sha256":"ebd95da01c6967fe679b042af6e23efa62a051f42dc22e5860000525e99df42b","source":{"kind":"arxiv","id":"2604.02028","version":2},"attestation_state":"computed","paper":{"title":"Why Gaussian Diffusion Models Fail on Discrete Data and How to Prevent It?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Alexander Shabalin, Dmitry Vetrov, Ildus Sadrtdinov, Simon Elistratov, Viacheslav Meshchaninov","submitted_at":"2026-04-02T13:35:02Z","abstract_excerpt":"Diffusion models have become a standard approach for generative modeling in continuous domains, yet their application to discrete data remains challenging. We investigate why Gaussian diffusion models with the DDPM solver struggle to sample from discrete distributions that are represented as a mixture of delta-distributions in the continuous space. Using a toy Random Hierarchy Model, we identify a critical sampling interval in which the density of noisified data becomes multimodal. In this regime, DDPM occasionally enters low-density regions between modes producing out-of-distribution inputs f"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2604.02028","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-04-02T13:35:02Z","cross_cats_sorted":[],"title_canon_sha256":"4f7f40725949da3e6fa597d787549a78a3b28b8f6e3693adac38a00ce18ef872","abstract_canon_sha256":"2eeb17c8bf010fd788e0e2482a2d98a7b001b80a353b338053446926f36259e5"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:08.452369Z","signature_b64":"97MDTEKF2EApfGTIzJgtPb2wPFoCmHRf8ikCDKP75Zef+WIZ00BO3ZvOKAfF2uwcSVSU8Hxyl0N59F5H2xqtDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ebd95da01c6967fe679b042af6e23efa62a051f42dc22e5860000525e99df42b","last_reissued_at":"2026-05-28T01:04:08.451726Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:08.451726Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Why Gaussian Diffusion Models Fail on Discrete Data and How to Prevent It?","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Alexander Shabalin, Dmitry Vetrov, Ildus Sadrtdinov, Simon Elistratov, Viacheslav Meshchaninov","submitted_at":"2026-04-02T13:35:02Z","abstract_excerpt":"Diffusion models have become a standard approach for generative modeling in continuous domains, yet their application to discrete data remains challenging. We investigate why Gaussian diffusion models with the DDPM solver struggle to sample from discrete distributions that are represented as a mixture of delta-distributions in the continuous space. Using a toy Random Hierarchy Model, we identify a critical sampling interval in which the density of noisified data becomes multimodal. In this regime, DDPM occasionally enters low-density regions between modes producing out-of-distribution inputs f"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.02028","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.02028/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2604.02028","created_at":"2026-05-28T01:04:08.451797+00:00"},{"alias_kind":"arxiv_version","alias_value":"2604.02028v2","created_at":"2026-05-28T01:04:08.451797+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.02028","created_at":"2026-05-28T01:04:08.451797+00:00"},{"alias_kind":"pith_short_12","alias_value":"5PMV3IA4NFT7","created_at":"2026-05-28T01:04:08.451797+00:00"},{"alias_kind":"pith_short_16","alias_value":"5PMV3IA4NFT74Z43","created_at":"2026-05-28T01:04:08.451797+00:00"},{"alias_kind":"pith_short_8","alias_value":"5PMV3IA4","created_at":"2026-05-28T01:04:08.451797+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.10938","citing_title":"ELF: Embedded Language Flows","ref_index":60,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J","json":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J.json","graph_json":"https://pith.science/api/pith-number/5PMV3IA4NFT74Z43AQVPNYR67J/graph.json","events_json":"https://pith.science/api/pith-number/5PMV3IA4NFT74Z43AQVPNYR67J/events.json","paper":"https://pith.science/paper/5PMV3IA4"},"agent_actions":{"view_html":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J","download_json":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J.json","view_paper":"https://pith.science/paper/5PMV3IA4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2604.02028&json=true","fetch_graph":"https://pith.science/api/pith-number/5PMV3IA4NFT74Z43AQVPNYR67J/graph.json","fetch_events":"https://pith.science/api/pith-number/5PMV3IA4NFT74Z43AQVPNYR67J/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J/action/timestamp_anchor","attest_storage":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J/action/storage_attestation","attest_author":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J/action/author_attestation","sign_citation":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J/action/citation_signature","submit_replication":"https://pith.science/pith/5PMV3IA4NFT74Z43AQVPNYR67J/action/replication_record"}},"created_at":"2026-05-28T01:04:08.451797+00:00","updated_at":"2026-05-28T01:04:08.451797+00:00"}