{"total":452,"items":[{"citing_arxiv_id":"2606.26566","ref_index":61,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Adversarial Diffusion Across Modalities: A Fusion Survey of Attacks, Defenses, and Evaluation for Text, Vision, and Vision-Language Models","primary_cat":"cs.CR","submitted_at":"2026-06-25T03:32:12+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A narrative survey that catalogs fifty papers on diffusion-based adversarial techniques across text, vision, and vision-language models, proposes a six-class taxonomy of diffusion roles plus a unified five-dimension evaluation framework, and releases a companion catalog.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.11308","ref_index":126,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"pop-cosmos: Disentangling galaxy properties from observables using data-driven approaches","primary_cat":"astro-ph.GA","submitted_at":"2026-06-09T18:00:20+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A beta-VAE analysis of pop-cosmos models finds that five latent dimensions capture the rest-frame optical SED, corresponding to stellar mass, recent star formation, dust, and two gas ionization states.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03926","ref_index":36,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DiffUNet^2: Bidirectional Prediction, Probabilistic Generation and Collaborative Visual Discovery for Scientific Data","primary_cat":"cs.HC","submitted_at":"2026-06-02T17:15:01+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"DiffUNet^2 is a bidirectional conditional diffusion model integrated with visual tools for probabilistic exploration of scientific time series across five evaluated datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.03393","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Flicker-DDPM: Accelerating Denoising Diffusion via 1/f Colored Noise Injection","primary_cat":"cs.LG","submitted_at":"2026-06-02T09:36:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Flicker-DDPM accelerates DDPM sampling by injecting 1/f colored noise matched to image spectra, achieving similar quality with 3.33 times fewer steps on CIFAR-10.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.02448","ref_index":13,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Diffusion-Based Heart Sound Generation: Evaluation with Physiological Signal Metrics, Classifiers, and Expert Listening","primary_cat":"eess.SP","submitted_at":"2026-06-01T16:19:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"A diffusion model generates synthetic phonocardiogram clips that retain some normal/abnormal discriminative structure (82.8% classifier accuracy) but show reduced envelope periodicity and increased burstiness relative to real clips from the PhysioNet 2016 dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01493","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Splatshot: 3D Face Avatar Generation from a Single Unconstrained Photo","primary_cat":"cs.CV","submitted_at":"2026-05-31T23:19:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"SplatShot is a training-free method that inserts per-step 3DGS refitting and photometric feedback into diffusion denoising to enforce multi-view consistency for single-photo 3D face avatars.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01151","ref_index":24,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Lagrangian Perturbation Diffusion Steering: Latent Reinforcement Learning for Generative Policies","primary_cat":"cs.LG","submitted_at":"2026-05-31T10:40:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"LP-DS improves generative policies for imitation and RL by optimizing latent noise perturbations with a constrained Lagrangian objective, showing up to 25% better returns on manipulation and locomotion tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.01048","ref_index":54,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation","primary_cat":"cs.CV","submitted_at":"2026-05-31T06:38:18+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DRDD decouples diffusion into independent noise and residual stages to preserve domain harmonization and enable unified data-efficient I2I translation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.31204","ref_index":127,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Probabilistic Precipitation Nowcasting with Rectified Flow Transformers","primary_cat":"cs.CV","submitted_at":"2026-05-29T12:11:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FREUD applies rectified flow transformers with frame-wise encoding and a unified decoder to achieve state-of-the-art probabilistic precipitation nowcasting on the SEVIR benchmark.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30876","ref_index":46,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"dMoE: dLLMs with Learnable Block Experts","primary_cat":"cs.CL","submitted_at":"2026-05-29T06:03:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"dMoE aggregates token expert distributions to block level in dLLMs, cutting unique experts from 69.5 to 14.6, memory by 76-80%, and latency by 1.14-1.66x while retaining 99.11% performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30753","ref_index":21,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Efficient Diffusion LLMs via Temporal-Spatial Parallel Decoding and Confidence Extrapolation","primary_cat":"cs.CL","submitted_at":"2026-05-29T02:29:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Introduces TSPD with a trajectory-feature controller and training-free CE to reduce denoising steps in dLLMs while aiming to preserve quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30614","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Audio Pirates: Black-box Audio Watermark Removal via Diffusion Priors","primary_cat":"cs.CR","submitted_at":"2026-05-28T22:07:32+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DiffErase removes black-box audio watermarks via diffusion priors by adding intermediate noise and regenerating with a pretrained model, preserving quality across audio domains.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30512","ref_index":30,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PhyDrawGen: Physically Grounded Diagram Generation from Natural Language","primary_cat":"cs.AI","submitted_at":"2026-05-28T19:49:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"PhyDrawGen is a neuro-symbolic pipeline that extracts typed scene graphs via LLM, converts them to physically constrained PSLGs via deterministic solver, and refines via fine-tuned Qwen-VL, claiming superior performance over GPT-5-image and Gemini models on 1,449 physics problems.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30332","ref_index":56,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Colored Noise Diffusion Sampling","primary_cat":"cs.CV","submitted_at":"2026-05-28T17:58:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CNS is a plug-and-play stochastic sampler for diffusion models that uses timestep- and frequency-dependent colored noise to allocate energy to unresolved bands, producing lower FID scores than standard ODE/SDE baselines on ImageNet-256.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30230","ref_index":36,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation","primary_cat":"cs.CV","submitted_at":"2026-05-28T17:00:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A fine-tuning-free framework combines pretrained Stable Diffusion with IP-Adapter plus three parameter-free modules to achieve improved lip synchronization and visual quality in talking face generation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.30056","ref_index":12,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Sample-Efficient Diffusion-based Reinforcement Learning with Critic Guidance","primary_cat":"cs.RO","submitted_at":"2026-05-28T15:07:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CGPO integrates training-free critic guidance into diffusion denoising to produce high-Q actions as regression targets, yielding SOTA results on MuJoCo locomotion and successful Franka arm grasping.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29920","ref_index":8,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Midpoint Generative Models","primary_cat":"cs.LG","submitted_at":"2026-05-28T13:31:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Midpoint Generative Models define a midpoint divergence from flow matching symmetry and derive its variational form as a tractable objective for training competitive one-step generators.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.29335","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Rethinking FID Through the Geometry of the Reference Dataset","primary_cat":"cs.CV","submitted_at":"2026-05-28T04:10:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"FID improves with better samples only on concentrated reference datasets but can worsen on dispersed ones, as shown by density and effective rank in a controlled study across six datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.28900","ref_index":11,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Spectral Guidance for Flexible and Efficient Control of Diffusion Models","primary_cat":"cs.LG","submitted_at":"2026-05-27T15:11:09+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Spectral Guidance learns singular functions via self-supervised objective to project guidance signals onto diffusion sampling trajectories, enabling stable control without retraining or backpropagation and improving CIFAR-10 accuracy by 37 points with 4x faster sampling.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.28394","ref_index":27,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Sketch2Motion: Text-driven 2D Sketch to 3D Animation via Diffusion-guided Skeleton Optimization","primary_cat":"cs.CV","submitted_at":"2026-05-27T12:32:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Sketch2Motion is a diffusion-guided skeleton optimization framework that generates text-driven 3D animations from 2D sketches for biped, quadruped, and other articulated characters.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.28167","ref_index":24,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DebFilter: Eradicating Biases Stashed in Value","primary_cat":"cs.CV","submitted_at":"2026-05-27T08:49:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"DebFilter mitigates biases in text-to-image diffusion models by applying a fixed offset to the guidance embedding slice in cross-attention during inference.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27975","ref_index":2,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Continual Learning in Modern Hopfield Networks with an Application to Diffusion Models","primary_cat":"cs.LG","submitted_at":"2026-05-27T05:12:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Modern Hopfield energy identifies high-energy samples as more prone to intrinsic forgetting in continual learning, with effective energy-based replay validated in diffusion models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27947","ref_index":33,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"SANTS: A State-Adaptive Scheduler for World Action Models","primary_cat":"cs.RO","submitted_at":"2026-05-27T04:40:48+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"SANTS adaptively chooses denoising depth in video-based robot action diffusion policies using a state-dependent stopping hazard and noise ratio, trained via downstream action reward to reduce latency.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.27919","ref_index":6,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Frequency-Guided Action Diffusion via Sub-Frequency Manifold Traversal","primary_cat":"cs.RO","submitted_at":"2026-05-27T03:45:25+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"FGO guides diffusion policy generation via expanding spectral bands on sub-frequency manifolds to improve action smoothness on 15 robotic manipulation tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23856","ref_index":83,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Point Tracking Improves World Action Models","primary_cat":"cs.RO","submitted_at":"2026-05-22T17:08:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"MODERATE","novelty_score":7.0,"formal_verification":"none","one_line_summary":"JOPAT jointly models pixels, point tracks, and actions in a diffusion transformer and reports gains over pixel-only baselines on long-horizon robot tasks with occlusion and off-screen motion.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23522","ref_index":13,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Precise: SDE-Consistent Stochastic Sampling for RL Post-Training of Flow-Matching Models","primary_cat":"cs.LG","submitted_at":"2026-05-22T11:37:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Precise is a new SDE-consistent stochastic sampler that balances exploration and stability for RL post-training of flow-matching models via a novel posterior-mean approximation.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23445","ref_index":9,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DFSAttn: Dynamic Fine-grained Sparse Attention for Efficient Video Generation","primary_cat":"cs.CV","submitted_at":"2026-05-22T09:58:23+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DFSAttn is a training-free framework for dynamic fine-grained sparse attention in video DiTs that achieves up to 2.1x speedup while preserving generation quality via Hilbert reordering, hierarchical scoring, and adaptive caching.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23381","ref_index":43,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation","primary_cat":"cs.CV","submitted_at":"2026-05-22T08:50:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"VDE accelerates rectified flow models like Flux by 3.22x with LPIPS of 0.069 via velocity decomposition into parallel/orthogonal components plus periodic full-pass anchoring.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23365","ref_index":21,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Score-Based One-step MeanFlow Policy Optimization","primary_cat":"cs.LG","submitted_at":"2026-05-22T08:28:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"SOM is an actor-critic algorithm that constructs the target velocity field for one-step MeanFlow policies directly from the Q-function via score estimation and probability flow ODE, achieving claimed SOTA on locomotion tasks with reduced training and inference time.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23282","ref_index":48,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Discontinuous Galerkin Neural Operator for Pathology Defocus Deblurring","primary_cat":"eess.IV","submitted_at":"2026-05-22T06:50:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DGNO parameterizes integral kernels with discontinuous Galerkin elements for heterogeneous defocus deblurring in pathology images and reports superior performance over prior methods.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.23226","ref_index":37,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"MASQ: Accelerating Masked Diffusion via Stage-Wise Multi-Precision Quantization","primary_cat":"cs.AR","submitted_at":"2026-05-22T04:37:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"MASQ claims up to 16.06x speedup and 4.18x energy gain over A100 for masked diffusion via stage-wise multi-precision quantization and specialized hardware units while preserving quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22990","ref_index":32,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Drift-React: One-step Generation of Reaction Pathways via SE(3) Drifting Fields","primary_cat":"physics.chem-ph","submitted_at":"2026-05-21T19:40:45+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Drift-React produces full minimum energy pathways for reactions in a single step via SE(3) drifting fields, matching TS accuracy of iterative models with orders-of-magnitude speedup on Transition1x and Halo8 datasets.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22723","ref_index":10,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"The Value of Covariance Matching in Gaussian DDPMs and the Lanczos Sampler","primary_cat":"cs.LG","submitted_at":"2026-05-21T16:57:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Full covariance matching in Gaussian DDPMs yields O(1/T^2) path KL error and is enabled by the training-free Lanczos Gaussian sampler using Jacobian-vector products.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.22050","ref_index":34,"ref_count":2,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Broken Memories: Detecting and Mitigating Memorization in Diffusion Models with Degraded Generations","primary_cat":"cs.CV","submitted_at":"2026-05-21T06:36:59+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21573","ref_index":60,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Lens: Rethinking Training Efficiency for Foundational Text-to-Image Models","primary_cat":"cs.CV","submitted_at":"2026-05-20T17:59:58+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Lens is a 3.8B-parameter text-to-image model that reaches competitive or superior performance to >6B-parameter systems using 19.3% of the training compute of Z-Image through a densely captioned 800M dataset, multi-resolution batching, semantic VAE, strong language encoder, RL fine-tuning, and 4-step","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21466","ref_index":68,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"StreamGVE: Training-Free Video Editing via Few-Step Streaming Video Generation","primary_cat":"cs.CV","submitted_at":"2026-05-20T17:52:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"StreamGVE enables high-quality training-free video editing by converting the task to noise-to-data streaming generation with dual-branch fast sampling, self-attention bridges, cross-attention grounding, source-oriented guidance, and visual prompting.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21460","ref_index":40,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"HITL-D: Human In The Loop Diffusion Assisted Shared Control","primary_cat":"cs.RO","submitted_at":"2026-05-20T17:49:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"HITL-D combines diffusion policies with human input for shared robotic control, reducing required joystick axes and improving speed and workload in manipulation tasks per a 12-participant study.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21190","ref_index":16,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Semantic Granularity Navigation in Image Editing","primary_cat":"cs.CV","submitted_at":"2026-05-20T13:53:13+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.21123","ref_index":41,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Linear-DPO: Linear Direct Preference Optimization for Diffusion and Flow-Matching Generative Models","primary_cat":"cs.CV","submitted_at":"2026-05-20T12:54:51+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Linear-DPO replaces sigmoid utility with linear utility and adds EMA reference to improve preference alignment in diffusion and flow-matching text-to-image models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20955","ref_index":64,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DrawMotion: Generating 3D Human Motions by Freehand Drawing","primary_cat":"cs.CV","submitted_at":"2026-05-20T09:43:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"DrawMotion is a diffusion-based framework that fuses text and hand-drawn stickman conditions via a Multi-Condition Module and training-free guidance to generate 3D human motions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20872","ref_index":65,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"CAdam: Context-Adaptive Moment Estimation for 3D Gaussian Densification in Generative Distillation","primary_cat":"cs.LG","submitted_at":"2026-05-20T08:08:39+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CAdam reinterprets densification in generative 3DGS as signal verification via gradient-moment interference, quantile context, and SNR gating to achieve large reductions in primitive count with comparable quality.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20856","ref_index":39,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"DISC: Decoupling Instruction from State-Conditioned Control via Policy Generation","primary_cat":"cs.RO","submitted_at":"2026-05-20T07:45:50+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A hypernetwork generates complete task-specific visuomotor policy parameters from instructions alone to structurally eliminate observation leakage in language-conditioned robotic control.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20780","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Learning to Think in Physics: Breaking Shortcut Learning in Scientific Diffusion via Representation Alignment","primary_cat":"cs.LG","submitted_at":"2026-05-20T06:22:44+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"REPA-P aligns intermediate representations in diffusion models with physical states using first-principles PDE residuals to accelerate convergence and boost out-of-distribution robustness on PDE tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20777","ref_index":31,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AttriStory: Fine-grained Attribute Realization for Visual Storytelling with Diffusion Models","primary_cat":"cs.CV","submitted_at":"2026-05-20T06:17:53+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"AttriStory adds a benchmark and AttriLoss-based latent optimization to improve faithful rendering of fine-grained attributes such as clothing color and texture in diffusion-model visual storytelling.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20708","ref_index":48,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Rethinking Cross-Layer Information Routing in Diffusion Transformers","primary_cat":"cs.CV","submitted_at":"2026-05-20T05:07:15+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20640","ref_index":24,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Pareto-Enhanced Portrait Generation: Vision-Aligned Text Supervision for Alignment, Realism, and Aesthetics","primary_cat":"cs.CV","submitted_at":"2026-05-20T02:55:28+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A feature supervision approach using SigLIP 2 extracts multi-granularity vision-aligned text representations to supervise MM-DiT image branches, pushing the Pareto frontier for portrait generation across alignment, realism, and aesthetics.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19771","ref_index":37,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Beyond Imitation: Learning Safe End-to-End Autonomous Driving from Hard Negatives","primary_cat":"cs.RO","submitted_at":"2026-05-19T12:41:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"BeyondDrive augments imitation learning with synthesized safety-critical negative trajectories and a repulsive loss to improve safety in autonomous driving, reporting 89.7 PDMS on NAVSIMv1 and generalization to other models.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19739","ref_index":17,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"FlowErase-RL: Rethinking Concept Erasure as Reward Optimization in Flow Matching Models","primary_cat":"cs.CV","submitted_at":"2026-05-19T12:10:09+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19411","ref_index":137,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"BrepForge: Factorized B-rep Synthesis via Wireframe Composition and Boundary-Conditioned Surface Instantiation","primary_cat":"cs.GR","submitted_at":"2026-05-19T06:04:26+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"BrepForge factorizes B-rep synthesis into face-aware autoregressive wireframe composition followed by boundary-conditioned surface instantiation using learning-free geometric priors.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.19366","ref_index":138,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Accurate, Efficient, and Explainable Deep Learning Approaches for Environmental Science Problems","primary_cat":"cs.LG","submitted_at":"2026-05-19T04:58:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"The work introduces WaLeF/FIDLAr for flood forecasting, CoDiCast for probabilistic weather, and Hypercube-RAG for explainable environmental QA, claiming superior accuracy, efficiency, and interpretability over baselines.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}