{"paper":{"title":"Gradient Preconditioning for Efficient and Reliable Reward-Guided Generation","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.LG","authors_text":"Jisung Hwang, Minhyuk Sung","submitted_at":"2026-02-09T13:43:38Z","abstract_excerpt":"We propose a gradient preconditioning method that makes reward-guided generation with one-step generative models both efficient and reliable. Test-time noise optimization can unlock substantially better reward-guided generations from pretrained generative models, but it is prone to reward hacking that degrades quality and is often too slow for practical use. We precondition reward gradients by projecting them onto a carefully designed white Gaussian noise feasible set, a compact spectral set with blockwise norm constraints that tightly captures the statistics and spatial uncorrelatedness of wh"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.08646","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.08646/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"}