{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:KL2TUAL7MQORU3LTIQHPSU7GKN","short_pith_number":"pith:KL2TUAL7","canonical_record":{"source":{"id":"2602.05000","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T19:37:14Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"5851463b43d9893bc27c49b78d4a6e2bba924a915535379ef76ace1f7ad43e3b","abstract_canon_sha256":"db6b7b6772cf9aef1b71b9f426c1ebd3c57562aeb90da44aacb6dc94a5dd5fe2"},"schema_version":"1.0"},"canonical_sha256":"52f53a017f641d1a6d73440ef953e65344d4522a3dfb2baf1bd39d4f26451846","source":{"kind":"arxiv","id":"2602.05000","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.05000","created_at":"2026-05-18T03:09:23Z"},{"alias_kind":"arxiv_version","alias_value":"2602.05000v2","created_at":"2026-05-18T03:09:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.05000","created_at":"2026-05-18T03:09:23Z"},{"alias_kind":"pith_short_12","alias_value":"KL2TUAL7MQOR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"KL2TUAL7MQORU3LT","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"KL2TUAL7","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:KL2TUAL7MQORU3LTIQHPSU7GKN","target":"record","payload":{"canonical_record":{"source":{"id":"2602.05000","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T19:37:14Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"5851463b43d9893bc27c49b78d4a6e2bba924a915535379ef76ace1f7ad43e3b","abstract_canon_sha256":"db6b7b6772cf9aef1b71b9f426c1ebd3c57562aeb90da44aacb6dc94a5dd5fe2"},"schema_version":"1.0"},"canonical_sha256":"52f53a017f641d1a6d73440ef953e65344d4522a3dfb2baf1bd39d4f26451846","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:09:23.885853Z","signature_b64":"4vO8r6R4ocpGnIHeuA2Y2x4ELrxNl1ppApcXNgv6V88IvEWewrnhZWwloZpUmlFy+toxTdZ3nJy+qoesyCcuBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"52f53a017f641d1a6d73440ef953e65344d4522a3dfb2baf1bd39d4f26451846","last_reissued_at":"2026-05-18T03:09:23.885118Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:09:23.885118Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2602.05000","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"eOcbtsLwzNzFEqJJO7UFuLWb9FLU1R7QyfwIS0FgqJjK+LLEr8HNkc55/9QfByJTBO7Dsnn7enRCh1BxnR6JAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:26:07.628914Z"},"content_sha256":"e2f1cdd31c86b82440ed91a66999a75615c20cc5f53864d1dc7a1c66c698aeb1","schema_version":"1.0","event_id":"sha256:e2f1cdd31c86b82440ed91a66999a75615c20cc5f53864d1dc7a1c66c698aeb1"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:KL2TUAL7MQORU3LTIQHPSU7GKN","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Entropy Aware Reward Guidance for Diffusion Language Model Alignment","license":"http://creativecommons.org/licenses/by/4.0/","headline":"EntRGi uses predictive entropy to interpolate between continuous relaxations and hard tokens, enabling reward guidance for discrete diffusion language models.","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.LG","authors_text":"Atula Tejaswi, Constantine Caramanis, Litu Rout, Sanjay Shakkottai, Sujay Sanghavi","submitted_at":"2026-02-04T19:37:14Z","abstract_excerpt":"Reward guidance, also known as posterior sampling, is a popular method for test-time adaptation and post-training in continuous diffusion models. In this paper, we study reward guidance for discrete diffusion language models; now, one cannot differentiate through the natural outputs of the model because they are discrete tokens. We introduce a novel mechanism called EntRGi (Entropy aware Reward Guidance) to address this issue. EntRGi dynamically interpolates between continuous token relaxations and sampled hard tokens, on a token-by-token basis, using the diffusion model's predictive entropy. "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We introduce a novel mechanism called EntRGi (Entropy aware Reward Guidance) to address this issue. EntRGi dynamically interpolates between continuous token relaxations and sampled hard tokens, on a token-by-token basis, using the diffusion model's predictive entropy. We demonstrate that EntRGi maintains both reward model reliability and optimization accuracy, while existing approaches sacrifice one for the other.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the entropy threshold and interpolation schedule can be chosen so the method simultaneously preserves reward model reliability and optimization accuracy without introducing new biases or instability in the discrete sampling process.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"EntRGi uses predictive entropy to dynamically switch between relaxed and hard tokens for reward guidance in discrete diffusion LMs, yielding consistent gains over prior methods in adaptation and RGRL post-training.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"EntRGi uses predictive entropy to interpolate between continuous relaxations and hard tokens, enabling reward guidance for discrete diffusion language models.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"3a516afe6cc637343362ff0ebb4952cb251e0e5ac6c91afb125f125884826f08"},"source":{"id":"2602.05000","kind":"arxiv","version":2},"verdict":{"id":"8ea1b2d1-413a-4505-b99e-7ef2812c37b2","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T07:06:24.698873Z","strongest_claim":"We introduce a novel mechanism called EntRGi (Entropy aware Reward Guidance) to address this issue. EntRGi dynamically interpolates between continuous token relaxations and sampled hard tokens, on a token-by-token basis, using the diffusion model's predictive entropy. We demonstrate that EntRGi maintains both reward model reliability and optimization accuracy, while existing approaches sacrifice one for the other.","one_line_summary":"EntRGi uses predictive entropy to dynamically switch between relaxed and hard tokens for reward guidance in discrete diffusion LMs, yielding consistent gains over prior methods in adaptation and RGRL post-training.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the entropy threshold and interpolation schedule can be chosen so the method simultaneously preserves reward model reliability and optimization accuracy without introducing new biases or instability in the discrete sampling process.","pith_extraction_headline":"EntRGi uses predictive entropy to interpolate between continuous relaxations and hard tokens, enabling reward guidance for discrete diffusion language models."},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"79ee309c9e388b6258924cef12f44f49999621aa52dcb21f7164004f1b597641"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"8ea1b2d1-413a-4505-b99e-7ef2812c37b2"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:09:23Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9bwEcT6BcMiWhf3BRo6zeoKv0siJ35UrPqs5kT/EQwiH6hpKVpeKP+ASMte4UTC0RGmAV9f/SWFK89cxYTX5Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-25T19:26:07.629519Z"},"content_sha256":"d5a7e52309726b3c55770bfb9ec40234bc70c769fb23296a787f45a98eae4635","schema_version":"1.0","event_id":"sha256:d5a7e52309726b3c55770bfb9ec40234bc70c769fb23296a787f45a98eae4635"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KL2TUAL7MQORU3LTIQHPSU7GKN/bundle.json","state_url":"https://pith.science/pith/KL2TUAL7MQORU3LTIQHPSU7GKN/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KL2TUAL7MQORU3LTIQHPSU7GKN/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-25T19:26:07Z","links":{"resolver":"https://pith.science/pith/KL2TUAL7MQORU3LTIQHPSU7GKN","bundle":"https://pith.science/pith/KL2TUAL7MQORU3LTIQHPSU7GKN/bundle.json","state":"https://pith.science/pith/KL2TUAL7MQORU3LTIQHPSU7GKN/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KL2TUAL7MQORU3LTIQHPSU7GKN/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:KL2TUAL7MQORU3LTIQHPSU7GKN","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"db6b7b6772cf9aef1b71b9f426c1ebd3c57562aeb90da44aacb6dc94a5dd5fe2","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T19:37:14Z","title_canon_sha256":"5851463b43d9893bc27c49b78d4a6e2bba924a915535379ef76ace1f7ad43e3b"},"schema_version":"1.0","source":{"id":"2602.05000","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2602.05000","created_at":"2026-05-18T03:09:23Z"},{"alias_kind":"arxiv_version","alias_value":"2602.05000v2","created_at":"2026-05-18T03:09:23Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.05000","created_at":"2026-05-18T03:09:23Z"},{"alias_kind":"pith_short_12","alias_value":"KL2TUAL7MQOR","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"KL2TUAL7MQORU3LT","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"KL2TUAL7","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:d5a7e52309726b3c55770bfb9ec40234bc70c769fb23296a787f45a98eae4635","target":"graph","created_at":"2026-05-18T03:09:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We introduce a novel mechanism called EntRGi (Entropy aware Reward Guidance) to address this issue. EntRGi dynamically interpolates between continuous token relaxations and sampled hard tokens, on a token-by-token basis, using the diffusion model's predictive entropy. We demonstrate that EntRGi maintains both reward model reliability and optimization accuracy, while existing approaches sacrifice one for the other."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the entropy threshold and interpolation schedule can be chosen so the method simultaneously preserves reward model reliability and optimization accuracy without introducing new biases or instability in the discrete sampling process."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"EntRGi uses predictive entropy to dynamically switch between relaxed and hard tokens for reward guidance in discrete diffusion LMs, yielding consistent gains over prior methods in adaptation and RGRL post-training."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"EntRGi uses predictive entropy to interpolate between continuous relaxations and hard tokens, enabling reward guidance for discrete diffusion language models."}],"snapshot_sha256":"3a516afe6cc637343362ff0ebb4952cb251e0e5ac6c91afb125f125884826f08"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"79ee309c9e388b6258924cef12f44f49999621aa52dcb21f7164004f1b597641"},"paper":{"abstract_excerpt":"Reward guidance, also known as posterior sampling, is a popular method for test-time adaptation and post-training in continuous diffusion models. In this paper, we study reward guidance for discrete diffusion language models; now, one cannot differentiate through the natural outputs of the model because they are discrete tokens. We introduce a novel mechanism called EntRGi (Entropy aware Reward Guidance) to address this issue. EntRGi dynamically interpolates between continuous token relaxations and sampled hard tokens, on a token-by-token basis, using the diffusion model's predictive entropy. ","authors_text":"Atula Tejaswi, Constantine Caramanis, Litu Rout, Sanjay Shakkottai, Sujay Sanghavi","cross_cats":["cs.AI","cs.CL"],"headline":"EntRGi uses predictive entropy to interpolate between continuous relaxations and hard tokens, enabling reward guidance for discrete diffusion language models.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T19:37:14Z","title":"Entropy Aware Reward Guidance for Diffusion Language Model Alignment"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.05000","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-16T07:06:24.698873Z","id":"8ea1b2d1-413a-4505-b99e-7ef2812c37b2","model_set":{"reader":"grok-4.3"},"one_line_summary":"EntRGi uses predictive entropy to dynamically switch between relaxed and hard tokens for reward guidance in discrete diffusion LMs, yielding consistent gains over prior methods in adaptation and RGRL post-training.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"EntRGi uses predictive entropy to interpolate between continuous relaxations and hard tokens, enabling reward guidance for discrete diffusion language models.","strongest_claim":"We introduce a novel mechanism called EntRGi (Entropy aware Reward Guidance) to address this issue. EntRGi dynamically interpolates between continuous token relaxations and sampled hard tokens, on a token-by-token basis, using the diffusion model's predictive entropy. We demonstrate that EntRGi maintains both reward model reliability and optimization accuracy, while existing approaches sacrifice one for the other.","weakest_assumption":"That the entropy threshold and interpolation schedule can be chosen so the method simultaneously preserves reward model reliability and optimization accuracy without introducing new biases or instability in the discrete sampling process."}},"verdict_id":"8ea1b2d1-413a-4505-b99e-7ef2812c37b2"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e2f1cdd31c86b82440ed91a66999a75615c20cc5f53864d1dc7a1c66c698aeb1","target":"record","created_at":"2026-05-18T03:09:23Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"db6b7b6772cf9aef1b71b9f426c1ebd3c57562aeb90da44aacb6dc94a5dd5fe2","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-02-04T19:37:14Z","title_canon_sha256":"5851463b43d9893bc27c49b78d4a6e2bba924a915535379ef76ace1f7ad43e3b"},"schema_version":"1.0","source":{"id":"2602.05000","kind":"arxiv","version":2}},"canonical_sha256":"52f53a017f641d1a6d73440ef953e65344d4522a3dfb2baf1bd39d4f26451846","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"52f53a017f641d1a6d73440ef953e65344d4522a3dfb2baf1bd39d4f26451846","first_computed_at":"2026-05-18T03:09:23.885118Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:23.885118Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"4vO8r6R4ocpGnIHeuA2Y2x4ELrxNl1ppApcXNgv6V88IvEWewrnhZWwloZpUmlFy+toxTdZ3nJy+qoesyCcuBg==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:23.885853Z","signed_message":"canonical_sha256_bytes"},"source_id":"2602.05000","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e2f1cdd31c86b82440ed91a66999a75615c20cc5f53864d1dc7a1c66c698aeb1","sha256:d5a7e52309726b3c55770bfb9ec40234bc70c769fb23296a787f45a98eae4635"],"state_sha256":"269c5bc57400c0b7c9289ad3946bf4efc8faf32cd3cd6c893ce419fe4eb87c15"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"g8vOgbv2sgBhit4WRlKGcxjS9a3lBtfTKQs/+NH8Aca86AVBCZCjiyjtceqzN8NM4MPuax33pnIshd22Ppo7CQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-25T19:26:07.633225Z","bundle_sha256":"33277ca8544870d2f216690493e705d6471c22c1d54b6d43e9944dfd88950c49"}}