{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:EMM5433EMWX2CK5J2LRUQBNP5C","short_pith_number":"pith:EMM5433E","schema_version":"1.0","canonical_sha256":"2319de6f6465afa12ba9d2e34805afe8a914b8486729819ab9317e3fd325dd79","source":{"kind":"arxiv","id":"2501.09732","version":1},"attestation_state":"computed","paper":{"title":"Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Haolin Jia, Hexiang Hu, Mingda Zhang, Nanye Ma, Saining Xie, Shangyuan Tong, Tommi Jaakkola, Xuan Yang, Xuhui Jia, Yandong Li, Yu-Chuan Su","submitted_at":"2025-01-16T18:30:37Z","abstract_excerpt":"Generative models have made significant impacts across various domains, largely due to their ability to scale during training by increasing data, computational resources, and model size, a phenomenon characterized by the scaling laws. Recent research has begun to explore inference-time scaling behavior in Large Language Models (LLMs), revealing how performance can further improve with additional computation during inference. Unlike LLMs, diffusion models inherently possess the flexibility to adjust inference-time computation via the number of denoising steps, although the performance gains typ"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2501.09732","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-01-16T18:30:37Z","cross_cats_sorted":[],"title_canon_sha256":"1c146319a74c0bc978bc1355230a114ca890d539132010d335377612abcca90d","abstract_canon_sha256":"c8e219f8b6c2a8a9ce9171be2984f846ec95bb5e274d888c6b5045d700d8718e"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T11:39:04.901611Z","signature_b64":"qvB9zi2umcZzm4R51aVZxQoSXBqskSxbbUAssBdfChW6lLc5uEWUCkmfbW5ksfwW/YBxCK826+ooRBMIrg66Bg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2319de6f6465afa12ba9d2e34805afe8a914b8486729819ab9317e3fd325dd79","last_reissued_at":"2026-05-20T11:39:04.899993Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T11:39:04.899993Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Haolin Jia, Hexiang Hu, Mingda Zhang, Nanye Ma, Saining Xie, Shangyuan Tong, Tommi Jaakkola, Xuan Yang, Xuhui Jia, Yandong Li, Yu-Chuan Su","submitted_at":"2025-01-16T18:30:37Z","abstract_excerpt":"Generative models have made significant impacts across various domains, largely due to their ability to scale during training by increasing data, computational resources, and model size, a phenomenon characterized by the scaling laws. Recent research has begun to explore inference-time scaling behavior in Large Language Models (LLMs), revealing how performance can further improve with additional computation during inference. Unlike LLMs, diffusion models inherently possess the flexibility to adjust inference-time computation via the number of denoising steps, although the performance gains typ"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2501.09732","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2501.09732/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2501.09732","created_at":"2026-05-20T11:39:04.900059+00:00"},{"alias_kind":"arxiv_version","alias_value":"2501.09732v1","created_at":"2026-05-20T11:39:04.900059+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2501.09732","created_at":"2026-05-20T11:39:04.900059+00:00"},{"alias_kind":"pith_short_12","alias_value":"EMM5433EMWX2","created_at":"2026-05-20T11:39:04.900059+00:00"},{"alias_kind":"pith_short_16","alias_value":"EMM5433EMWX2CK5J","created_at":"2026-05-20T11:39:04.900059+00:00"},{"alias_kind":"pith_short_8","alias_value":"EMM5433E","created_at":"2026-05-20T11:39:04.900059+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":24,"internal_anchor_count":24,"sample":[{"citing_arxiv_id":"2603.13708","citing_title":"RSEdit: Text-Guided Image Editing for Remote Sensing","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19385","citing_title":"LatentBox: Storing AI-Generated Images at Scale via a Latent-First Design","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2501.09038","citing_title":"Do generative video models understand physical principles?","ref_index":71,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18233","citing_title":"Enhancing Train-Free Infinite-Frame Generation for Consistent Long Videos","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.19385","citing_title":"LatentBox: Storing AI-Generated Images at Scale via a Latent-First Design","ref_index":37,"is_internal_anchor":true},{"citing_arxiv_id":"2507.08390","citing_title":"Inference-Time Scaling of Diffusion Language Models via Trajectory Refinement","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2507.12549","citing_title":"The Serial Scaling Hypothesis","ref_index":66,"is_internal_anchor":true},{"citing_arxiv_id":"2510.20206","citing_title":"RAPO++: Cross-Stage Prompt Optimization for Text-to-Video Generation via Data Alignment and Test-Time Scaling","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2512.19433","citing_title":"dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2512.23532","citing_title":"Iterative Inference-time Scaling with Adaptive Frequency Steering for Image Super-Resolution","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"2601.00090","citing_title":"It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2504.20690","citing_title":"In-Context Edit: Enabling Instructional Image Editing with In-Context Generation in Large Scale Diffusion Transformer","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2603.06165","citing_title":"Reflective Flow Sampling Enhancement","ref_index":56,"is_internal_anchor":true},{"citing_arxiv_id":"2605.14136","citing_title":"TeDiO: Temporal Diagonal Optimization for Training-Free Coherent Video Diffusion","ref_index":34,"is_internal_anchor":true},{"citing_arxiv_id":"2603.21743","citing_title":"CellFluxRL: Biologically-Constrained Virtual Cell Modeling via Reinforcement Learning","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2604.03885","citing_title":"PhaseFlow4D: Physically Constrained 4D Beam Reconstruction via Feedback-Guided Latent Diffusion","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.05922","citing_title":"Think, then Score: Decoupled Reasoning and Scoring for Video Reward Modeling","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.11722","citing_title":"EPIC: Efficient Predicate-Guided Inference-Time Control for Compositional Text-to-Image Generation","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06779","citing_title":"VASR: Variance-Aware Systematic Resampling for Reward-Guided Diffusion","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2604.23540","citing_title":"Oracle Noise: Faster Semantic Spherical Alignment for Interpretable Latent Optimization","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07427","citing_title":"Personalizing Text-to-Image Generation to Individual Taste","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06779","citing_title":"VASR: Variance-Aware Systematic Resampling for Reward-Guided Diffusion","ref_index":30,"is_internal_anchor":true},{"citing_arxiv_id":"2604.16648","citing_title":"FRIGID: Scaling Diffusion-Based Molecular Generation from Mass Spectra at Training and Inference Time","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"2604.19730","citing_title":"FASTER: Value-Guided Sampling for Fast RL","ref_index":4,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C","json":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C.json","graph_json":"https://pith.science/api/pith-number/EMM5433EMWX2CK5J2LRUQBNP5C/graph.json","events_json":"https://pith.science/api/pith-number/EMM5433EMWX2CK5J2LRUQBNP5C/events.json","paper":"https://pith.science/paper/EMM5433E"},"agent_actions":{"view_html":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C","download_json":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C.json","view_paper":"https://pith.science/paper/EMM5433E","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2501.09732&json=true","fetch_graph":"https://pith.science/api/pith-number/EMM5433EMWX2CK5J2LRUQBNP5C/graph.json","fetch_events":"https://pith.science/api/pith-number/EMM5433EMWX2CK5J2LRUQBNP5C/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C/action/timestamp_anchor","attest_storage":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C/action/storage_attestation","attest_author":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C/action/author_attestation","sign_citation":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C/action/citation_signature","submit_replication":"https://pith.science/pith/EMM5433EMWX2CK5J2LRUQBNP5C/action/replication_record"}},"created_at":"2026-05-20T11:39:04.900059+00:00","updated_at":"2026-05-20T11:39:04.900059+00:00"}