From bd74b84cc5879d1f981591573fa9f76174c15ad5 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 25 Jun 2024 18:30:59 -0400
Subject: [PATCH 1/2] Revert "Remove the redundant init_timestep parameter that
 was being passed around. It is simply the first element of the timesteps
 array."

This reverts commit fa40061eca2d3166996db7c1bb85c0b6a3d2b9a3.
---
 invokeai/app/invocations/denoise_latents.py         |  6 ++++--
 .../tiled_multi_diffusion_denoise_latents.py        |  3 ++-
 .../backend/stable_diffusion/diffusers_pipeline.py  | 13 +++++++++----
 .../stable_diffusion/multi_diffusion_pipeline.py    |  9 +++++----
 4 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/invokeai/app/invocations/denoise_latents.py b/invokeai/app/invocations/denoise_latents.py
index 67c7f2abca..fd901298f7 100644
--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@@ -625,6 +625,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
         t_start_idx *= scheduler.order
         t_end_idx *= scheduler.order
 
+        init_timestep = timesteps[t_start_idx : t_start_idx + 1]
         timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx]
 
         scheduler_step_kwargs: Dict[str, Any] = {}
@@ -647,7 +648,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
         if isinstance(scheduler, TCDScheduler):
             scheduler_step_kwargs.update({"eta": 1.0})
 
-        return timesteps, scheduler_step_kwargs
+        return timesteps, init_timestep, scheduler_step_kwargs
 
     def prep_inpaint_mask(
         self, context: InvocationContext, latents: torch.Tensor
@@ -813,7 +814,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
                 dtype=unet.dtype,
             )
 
-            timesteps, scheduler_step_kwargs = self.init_scheduler(
+            timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
                 scheduler,
                 device=unet.device,
                 steps=self.steps,
@@ -825,6 +826,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
             result_latents = pipeline.latents_from_embeddings(
                 latents=latents,
                 timesteps=timesteps,
+                init_timestep=init_timestep,
                 noise=noise,
                 seed=seed,
                 mask=mask,
diff --git a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
index de4b5ac696..2566fd2551 100644
--- a/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
+++ b/invokeai/app/invocations/tiled_multi_diffusion_denoise_latents.py
@@ -252,7 +252,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
                     )
                 )
 
-            timesteps, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler(
+            timesteps, init_timestep, scheduler_step_kwargs = DenoiseLatentsInvocation.init_scheduler(
                 scheduler,
                 device=unet.device,
                 steps=self.steps,
@@ -269,6 +269,7 @@ class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
                 scheduler_step_kwargs=scheduler_step_kwargs,
                 noise=noise,
                 timesteps=timesteps,
+                init_timestep=init_timestep,
                 callback=step_callback,
             )
 
diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
index c25ccf4d2a..cf34dac007 100644
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -273,6 +273,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
         noise: Optional[torch.Tensor],
         seed: int,
         timesteps: torch.Tensor,
+        init_timestep: torch.Tensor,
         callback: Callable[[PipelineIntermediateState], None],
         control_data: list[ControlNetData] | None = None,
         ip_adapter_data: Optional[list[IPAdapterData]] = None,
@@ -298,6 +299,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                 HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
                 same noise used earlier in the pipeline. This should really be handled in a clearer way.
             timesteps: The timestep schedule for the denoising process.
+            init_timestep: The first timestep in the schedule.
+                TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the
+                case, and remove this duplicate param.
             callback: A callback function that is called to report progress during the denoising process.
             control_data: ControlNet data.
             ip_adapter_data: IP-Adapter data.
@@ -312,17 +316,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                 SD UNet model.
             is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not.
         """
-        if timesteps.shape[0] == 0:
+        # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
+        # cases where densoisings_start and denoising_end are set such that there are no timesteps.
+        if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
             return latents
 
         orig_latents = latents.clone()
+
         batch_size = latents.shape[0]
+        batched_init_timestep = init_timestep.expand(batch_size)
 
         # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner).
         if noise is not None:
-            # batched_init_timestep should have shape (batch_size, 1).
-            batched_init_timestep = timesteps[0:1].expand(batch_size)
-
             # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with
             # full noise. Investigate the history of why this got commented out.
             # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
diff --git a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
index 8036ca3e01..2c9d39c8cc 100644
--- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
+++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
@@ -44,20 +44,21 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
         scheduler_step_kwargs: dict[str, Any],
         noise: Optional[torch.Tensor],
         timesteps: torch.Tensor,
+        init_timestep: torch.Tensor,
         callback: Callable[[PipelineIntermediateState], None],
     ) -> torch.Tensor:
         self._check_regional_prompting(multi_diffusion_conditioning)
 
-        if timesteps.shape[0] == 0:
+        # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
+        # cases where densoisings_start and denoising_end are set such that there are no timesteps.
+        if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
             return latents
 
         batch_size, _, latent_height, latent_width = latents.shape
+        batched_init_timestep = init_timestep.expand(batch_size)
 
         # noise can be None if the latents have already been noised (e.g. when running the SDXL refiner).
         if noise is not None:
-            # batched_init_timestep should have shape (batch_size, 1).
-            batched_init_timestep = timesteps[0:1].expand(batch_size)
-
             # TODO(ryand): I'm pretty sure we should be applying init_noise_sigma in cases where we are starting with
             # full noise. Investigate the history of why this got commented out.
             # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers

From 9a3b8c6fcb25407513e0512af9f5cdff62262ae0 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 25 Jun 2024 18:38:13 -0400
Subject: [PATCH 2/2] Fix handling of init_timestep in
 StableDiffusionGeneratorPipeline and improve its documentation.

---
 invokeai/backend/stable_diffusion/diffusers_pipeline.py  | 9 +++------
 .../backend/stable_diffusion/multi_diffusion_pipeline.py | 4 +---
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
index cf34dac007..ee464f73e1 100644
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -299,9 +299,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                 HACK(ryand): seed is only used in a particular case when `noise` is None, but we need to re-generate the
                 same noise used earlier in the pipeline. This should really be handled in a clearer way.
             timesteps: The timestep schedule for the denoising process.
-            init_timestep: The first timestep in the schedule.
-                TODO(ryand): I'm pretty sure this should always be the same as timesteps[0:1]. Confirm that that is the
-                case, and remove this duplicate param.
+            init_timestep: The first timestep in the schedule. This is used to determine the initial noise level, so
+                should be populated if you want noise applied *even* if timesteps is empty.
             callback: A callback function that is called to report progress during the denoising process.
             control_data: ControlNet data.
             ip_adapter_data: IP-Adapter data.
@@ -316,9 +315,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                 SD UNet model.
             is_gradient_mask: A flag indicating whether `mask` is a gradient mask or not.
         """
-        # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
-        # cases where densoisings_start and denoising_end are set such that there are no timesteps.
-        if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
+        if init_timestep.shape[0] == 0:
             return latents
 
         orig_latents = latents.clone()
diff --git a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
index 2c9d39c8cc..0ddcfdd380 100644
--- a/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
+++ b/invokeai/backend/stable_diffusion/multi_diffusion_pipeline.py
@@ -49,9 +49,7 @@ class MultiDiffusionPipeline(StableDiffusionGeneratorPipeline):
     ) -> torch.Tensor:
         self._check_regional_prompting(multi_diffusion_conditioning)
 
-        # TODO(ryand): Figure out why this condition is necessary, and document it. My guess is that it's to handle
-        # cases where densoisings_start and denoising_end are set such that there are no timesteps.
-        if init_timestep.shape[0] == 0 or timesteps.shape[0] == 0:
+        if init_timestep.shape[0] == 0:
             return latents
 
         batch_size, _, latent_height, latent_width = latents.shape