fix(events): fix denoise progress percentage

- Restore calculation of step percentage but in the backend instead of client - Simplify signatures for denoise progress event callbacks - Clean up `step_callback.py` (types, do not recreate constant matrix on every step, formatting)
2024-08-30 20:32:17 +00:00 · 2024-04-01 18:16:33 +11:00
parent ec4f10aed3
commit 0f733c42fc
3 changed files with 59 additions and 65 deletions
--- a/invokeai/app/util/step_callback.py
+++ b/invokeai/app/util/step_callback.py
@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING, Callable, Optional

 import torch
 from PIL import Image
@ -13,8 +13,36 @@ if TYPE_CHECKING:
    from invokeai.app.services.events.events_base import EventServiceBase
    from invokeai.app.services.shared.invocation_context import InvocationContextData

+# fast latents preview matrix for sdxl
+# generated by @StAlKeR7779
+SDXL_LATENT_RGB_FACTORS = [
+    #   R        G        B
+    [0.3816, 0.4930, 0.5320],
+    [-0.3753, 0.1631, 0.1739],
+    [0.1770, 0.3588, -0.2048],
+    [-0.4350, -0.2644, -0.4289],
+]
+SDXL_SMOOTH_MATRIX = [
+    [0.0358, 0.0964, 0.0358],
+    [0.0964, 0.4711, 0.0964],
+    [0.0358, 0.0964, 0.0358],
+]

-def sample_to_lowres_estimated_image(samples, latent_rgb_factors, smooth_matrix=None):
+# origingally adapted from code by @erucipe and @keturn here:
+# https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
+# these updated numbers for v1.5 are from @torridgristle
+SD1_5_LATENT_RGB_FACTORS = [
+    #    R        G        B
+    [0.3444, 0.1385, 0.0670],  # L1
+    [0.1247, 0.4027, 0.1494],  # L2
+    [-0.3192, 0.2513, 0.2103],  # L3
+    [-0.1307, -0.1874, -0.7445],  # L4
+]
+
+
+def sample_to_lowres_estimated_image(
+    samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
+):
    latent_image = samples[0].permute(1, 2, 0) @ latent_rgb_factors

    if smooth_matrix is not None:
@ -47,64 +75,12 @@ def stable_diffusion_step_callback(
    else:
        sample = intermediate_state.latents

-    # TODO: This does not seem to be needed any more?
-    # # txt2img provides a Tensor in the step_callback
-    # # img2img provides a PipelineIntermediateState
-    # if isinstance(sample, PipelineIntermediateState):
-    #     # this was an img2img
-    #     print('img2img')
-    #     latents = sample.latents
-    #     step = sample.step
-    # else:
-    #     print('txt2img')
-    #     latents = sample
-    #     step = intermediate_state.step
-
-    # TODO: only output a preview image when requested
-
    if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
-        # fast latents preview matrix for sdxl
-        # generated by @StAlKeR7779
-        sdxl_latent_rgb_factors = torch.tensor(
-            [
-                #   R        G        B
-                [0.3816, 0.4930, 0.5320],
-                [-0.3753, 0.1631, 0.1739],
-                [0.1770, 0.3588, -0.2048],
-                [-0.4350, -0.2644, -0.4289],
-            ],
-            dtype=sample.dtype,
-            device=sample.device,
-        )
-
-        sdxl_smooth_matrix = torch.tensor(
-            [
-                [0.0358, 0.0964, 0.0358],
-                [0.0964, 0.4711, 0.0964],
-                [0.0358, 0.0964, 0.0358],
-            ],
-            dtype=sample.dtype,
-            device=sample.device,
-        )
-
+        sdxl_latent_rgb_factors = torch.tensor(SDXL_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
+        sdxl_smooth_matrix = torch.tensor(SDXL_SMOOTH_MATRIX, dtype=sample.dtype, device=sample.device)
        image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
    else:
-        # origingally adapted from code by @erucipe and @keturn here:
-        # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
-
-        # these updated numbers for v1.5 are from @torridgristle
-        v1_5_latent_rgb_factors = torch.tensor(
-            [
-                #    R        G        B
-                [0.3444, 0.1385, 0.0670],  # L1
-                [0.1247, 0.4027, 0.1494],  # L2
-                [-0.3192, 0.2513, 0.2103],  # L3
-                [-0.1307, -0.1874, -0.7445],  # L4
-            ],
-            dtype=sample.dtype,
-            device=sample.device,
-        )
-
+        v1_5_latent_rgb_factors = torch.tensor(SD1_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
        image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)

    (width, height) = image.size
@ -116,7 +92,6 @@ def stable_diffusion_step_callback(
    events.emit_invocation_denoise_progress(
        context_data.queue_item,
        context_data.invocation,
-        intermediate_state.step,
-        intermediate_state.total_steps * intermediate_state.order,
+        intermediate_state,
        ProgressImage(dataURL=dataURL, width=width, height=height),
    )