From fa40061eca2d3166996db7c1bb85c0b6a3d2b9a3 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 19 Jun 2024 11:56:13 -0400 Subject: [PATCH] Remove the redundant init_timestep parameter that was being passed around. It is simply the first element of the timesteps array. --- invokeai/app/invocations/denoise_latents.py | 6 ++---- .../tiled_multi_diffusion_denoise_latents.py | 3 +-- .../backend/stable_diffusion/diffusers_pipeline.py | 13 ++++--------- .../stable_diffusion/multi_diffusion_pipeline.py | 9 ++++----- 4 files changed, 11 insertions(+), 20 deletions(-) diff --git a/invokeai/app/invocations/denoise_latents.py b/invokeai/app/invocations/denoise_latents.py index a0eb789270..9970949ef6 100644 --- a/invokeai/app/invocations/denoise_latents.py +++ b/invokeai/app/invocations/denoise_latents.py @@ -625,7 +625,6 @@ class DenoiseLatentsInvocation(BaseInvocation): t_start_idx *= scheduler.order t_end_idx *= scheduler.order - init_timestep = timesteps[t_start_idx : t_start_idx + 1] timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx] scheduler_step_kwargs: Dict[str, Any] = {} @@ -648,7 +647,7 @@ class DenoiseLatentsInvocation(BaseInvocation): if isinstance(scheduler, TCDScheduler): scheduler_step_kwargs.update({"eta": 1.0}) - return timesteps, init_timestep, scheduler_step_kwargs + return timesteps, scheduler_step_kwargs def prep_inpaint_mask( self, context: InvocationContext, latents: torch.Tensor @@ -814,7 +813,7 @@ class DenoiseLatentsInvocation(BaseInvocation): dtype=unet.dtype, ) - timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler( + timesteps, scheduler_step_kwargs = self.init_scheduler( scheduler, device=unet.device, steps=self.steps, @@ -826,7 +825,6 @@ class DenoiseLatentsInvocation(BaseInvocation): result_latents = pipeline.latents_from_embeddings( latents=latents, timesteps=timesteps, - init_timestep=init_timestep, noise=noise, seed=seed, mask=mask, diff --git a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py index 4812cd34a8..53b8ed7fef 100644 --- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py +++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py @@ -234,7 +234,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation): ) ) - timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler( + timesteps, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler( scheduler, device=unet.device, steps=self.steps, @@ -250,7 +250,6 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation): scheduler_step_kwargs=scheduler_step_kwargs, noise=noise, timesteps=timesteps, - init_timestep=init_timestep, # TODO(ryand): Add proper callback. callback=lambda x: None, ) diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 4d5ef3a52b..fdcff7bd0d 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -273,7 +273,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): noise: Optional[torch.Tensor], seed: int, timesteps: torch.Tensor, - init_timestep: torch.Tensor, callback: Callable[[PipelineIntermediateState], None], control_data: list[ControlNetData] | None = None, ip_adapter_data: Optional[list[IPAdapterData]] = None, @@ -299,9 +298,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the same noise used earlier in the pipeline. This should really be handled in a clearer way. timesteps: The timestep schedule for the denoising process. - init_timestep: The first timestep in the schedule. - TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the - case, and remove this duplicate param. callback: A callback function that is called to report progress during the denoising process. control_data: ControlNet data. ip_adapter_data: IP-Adapter data. @@ -316,18 +312,17 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): SD UNet model. is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not. """ - # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle - # cases where densoisings_start and denoising_end are set such that there are no timesteps. - if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0: + if timesteps.shape[0] == 0: return latents orig_latents = latents.clone() - batch_size = latents.shape[0] - batched_init_timestep = init_timestep.expand(batch_size) # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner). if noise is not None: + # batched_init_timestep should have shape (batch_size, 1). + batched_init_timestep = timesteps[0:1].expand(batch_size) + # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with # full noise. Investigate the history of why this got commented out. # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers diff --git a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py index 2f945cfeca..ca68233c27 100644 --- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py +++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py @@ -34,19 +34,18 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline): scheduler_step_kwargs: dict[str, Any], noise: Optional[torch.Tensor], timesteps: torch.Tensor, - init_timestep: torch.Tensor, callback: Callable[[PipelineIntermediateState], None], ) -> torch.Tensor: - # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle - # cases where densoisings_start and denoising_end are set such that there are no timesteps. - if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0: + if timesteps.shape[0] == 0: return latents batch_size, _, latent_height, latent_width = latents.shape - batched_init_timestep = init_timestep.expand(batch_size) # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner). if noise is not None: + # batched_init_timestep should have shape (batch_size, 1). + batched_init_timestep = timesteps[0:1].expand(batch_size) + # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with # full noise. Investigate the history of why this got commented out. # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers