From bd74b84cc5879d1f981591573fa9f76174c15ad5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 25 Jun 2024 18:30:59 -0400 Subject: [PATCH 1/2] Revert "Remove the redundant init_timestep parameter that was being passed around. It is simply the first element of the timesteps array." This reverts commit fa40061eca2d3166996db7c1bb85c0b6a3d2b9a3. --- invokeai/app/invocations/denoise_latents.py | 6 ++++-- .../tiled_multi_diffusion_denoise_latents.py | 3 ++- .../backend/stable_diffusion/diffusers_pipeline.py | 13 +++++++++---- .../stable_diffusion/multi_diffusion_pipeline.py | 9 +++++---- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/invokeai/app/invocations/denoise_latents.py b/invokeai/app/invocations/denoise_latents.py index 67c7f2abca..fd901298f7 100644 --- a/invokeai/app/invocations/denoise_latents.py +++ b/invokeai/app/invocations/denoise_latents.py @@ -625,6 +625,7 @@ class DenoiseLatentsInvocation(BaseInvocation): t_start_idx *= scheduler.order t_end_idx *= scheduler.order + init_timestep = timesteps[t_start_idx : t_start_idx + 1] timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx] scheduler_step_kwargs: Dict[str, Any] = {} @@ -647,7 +648,7 @@ class DenoiseLatentsInvocation(BaseInvocation): if isinstance(scheduler, TCDScheduler): scheduler_step_kwargs.update({"eta": 1.0}) - return timesteps, scheduler_step_kwargs + return timesteps, init_timestep, scheduler_step_kwargs def prep_inpaint_mask( self, context: InvocationContext, latents: torch.Tensor @@ -813,7 +814,7 @@ class DenoiseLatentsInvocation(BaseInvocation): dtype=unet.dtype, ) - timesteps, scheduler_step_kwargs = self.init_scheduler( + timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler( scheduler, device=unet.device, steps=self.steps, @@ -825,6 +826,7 @@ class DenoiseLatentsInvocation(BaseInvocation): result_latents = pipeline.latents_from_embeddings( latents=latents, timesteps=timesteps, + init_timestep=init_timestep, noise=noise, seed=seed, mask=mask, diff --git a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py index de4b5ac696..2566fd2551 100644 --- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py +++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py @@ -252,7 +252,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation): ) ) - timesteps, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler( + timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler( scheduler, device=unet.device, steps=self.steps, @@ -269,6 +269,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation): scheduler_step_kwargs=scheduler_step_kwargs, noise=noise, timesteps=timesteps, + init_timestep=init_timestep, callback=step_callback, ) diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index c25ccf4d2a..cf34dac007 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -273,6 +273,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): noise: Optional[torch.Tensor], seed: int, timesteps: torch.Tensor, + init_timestep: torch.Tensor, callback: Callable[[PipelineIntermediateState], None], control_data: list[ControlNetData] | None = None, ip_adapter_data: Optional[list[IPAdapterData]] = None, @@ -298,6 +299,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the same noise used earlier in the pipeline. This should really be handled in a clearer way. timesteps: The timestep schedule for the denoising process. + init_timestep: The first timestep in the schedule. + TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the + case, and remove this duplicate param. callback: A callback function that is called to report progress during the denoising process. control_data: ControlNet data. ip_adapter_data: IP-Adapter data. @@ -312,17 +316,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): SD UNet model. is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not. """ - if timesteps.shape[0] == 0: + # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle + # cases where densoisings_start and denoising_end are set such that there are no timesteps. + if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0: return latents orig_latents = latents.clone() + batch_size = latents.shape[0] + batched_init_timestep = init_timestep.expand(batch_size) # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner). if noise is not None: - # batched_init_timestep should have shape (batch_size, 1). - batched_init_timestep = timesteps[0:1].expand(batch_size) - # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with # full noise. Investigate the history of why this got commented out. # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers diff --git a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py index 8036ca3e01..2c9d39c8cc 100644 --- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py +++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py @@ -44,20 +44,21 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline): scheduler_step_kwargs: dict[str, Any], noise: Optional[torch.Tensor], timesteps: torch.Tensor, + init_timestep: torch.Tensor, callback: Callable[[PipelineIntermediateState], None], ) -> torch.Tensor: self._check_regional_prompting(multi_diffusion_conditioning) - if timesteps.shape[0] == 0: + # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle + # cases where densoisings_start and denoising_end are set such that there are no timesteps. + if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0: return latents batch_size, _, latent_height, latent_width = latents.shape + batched_init_timestep = init_timestep.expand(batch_size) # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner). if noise is not None: - # batched_init_timestep should have shape (batch_size, 1). - batched_init_timestep = timesteps[0:1].expand(batch_size) - # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with # full noise. Investigate the history of why this got commented out. # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers From 9a3b8c6fcb25407513e0512af9f5cdff62262ae0 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Tue, 25 Jun 2024 18:38:13 -0400 Subject: [PATCH 2/2] Fix handling of init_timestep in StableDiffusionGeneratorPipeline and improve its documentation. --- invokeai/backend/stable_diffusion/diffusers_pipeline.py | 9 +++------ .../backend/stable_diffusion/multi_diffusion_pipeline.py | 4 +--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index cf34dac007..ee464f73e1 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -299,9 +299,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the same noise used earlier in the pipeline. This should really be handled in a clearer way. timesteps: The timestep schedule for the denoising process. - init_timestep: The first timestep in the schedule. - TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the - case, and remove this duplicate param. + init_timestep: The first timestep in the schedule. This is used to determine the initial noise level, so + should be populated if you want noise applied *even* if timesteps is empty. callback: A callback function that is called to report progress during the denoising process. control_data: ControlNet data. ip_adapter_data: IP-Adapter data. @@ -316,9 +315,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): SD UNet model. is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not. """ - # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle - # cases where densoisings_start and denoising_end are set such that there are no timesteps. - if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0: + if init_timestep.shape[0] == 0: return latents orig_latents = latents.clone() diff --git a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py index 2c9d39c8cc..0ddcfdd380 100644 --- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py +++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py @@ -49,9 +49,7 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline): ) -> torch.Tensor: self._check_regional_prompting(multi_diffusion_conditioning) - # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle - # cases where densoisings_start and denoising_end are set such that there are no timesteps. - if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0: + if init_timestep.shape[0] == 0: return latents batch_size, _, latent_height, latent_width = latents.shape