fix(events): fix denoise progress percentage

- Restore calculation of step percentage but in the backend instead of client - Simplify signatures for denoise progress event callbacks - Clean up `step_callback.py` (types, do not recreate constant matrix on every step, formatting)
2024-08-30 20:32:17 +00:00 · 2024-04-01 18:16:33 +11:00 · 2024-04-01 18:16:33 +11:00 · 0f733c42fc
commit 0f733c42fc
parent ec4f10aed3
3 changed files with 59 additions and 65 deletions
--- a/invokeai/app/services/events/events_base.py
+++ b/invokeai/app/services/events/events_base.py
@ -32,6 +32,7 @@ from invokeai.app.services.events.events_common import (
    SessionCompleteEvent,
    SessionStartedEvent,
 )
+from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState

 if TYPE_CHECKING:
    from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput
@ -64,12 +65,11 @@ class EventServiceBase:
        self,
        queue_item: "SessionQueueItem",
        invocation: "BaseInvocation",
-        step: int,
-        total_steps: int,
+        intermediate_state: PipelineIntermediateState,
        progress_image: "ProgressImage",
    ) -> None:
        """Emitted at each step during denoising of an invocation."""
-        self.dispatch(InvocationDenoiseProgressEvent.build(queue_item, invocation, step, total_steps, progress_image))
+        self.dispatch(InvocationDenoiseProgressEvent.build(queue_item, invocation, intermediate_state, progress_image))

    def emit_invocation_complete(
        self, queue_item: "SessionQueueItem", invocation: "BaseInvocation", output: "BaseInvocationOutput"
--- a/invokeai/app/services/events/events_common.py
+++ b/invokeai/app/services/events/events_common.py
@ -1,3 +1,4 @@
+from math import floor
 from typing import TYPE_CHECKING, Any, Coroutine, Optional, Protocol, TypeAlias, TypeVar

 from fastapi_events.handlers.local import local_handler
@ -14,6 +15,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
 )
 from invokeai.app.util.misc import get_timestamp
 from invokeai.backend.model_manager.config import AnyModelConfig, SubModelType
+from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState

 if TYPE_CHECKING:
    from invokeai.app.services.download.download_base import DownloadJob
@ -68,7 +70,8 @@ def register_events(events: set[type[TEvent]], func: FastAPIEventFunc) -> None:
    :param func: The function to handle the events
    """
    for event in events:
-        local_handler.register(event_name=event.__event_name__, _func=func)
+        assert hasattr(event, "__event_name__")
+        local_handler.register(event_name=event.__event_name__, _func=func)  # pyright: ignore [reportUnknownMemberType, reportUnknownArgumentType, reportAttributeAccessIssue]


 class QueueEventBase(EventBase):
@ -128,16 +131,20 @@ class InvocationDenoiseProgressEvent(InvocationEventBase):
    progress_image: ProgressImage = Field(description="The progress image sent at each step during processing")
    step: int = Field(description="The current step of the invocation")
    total_steps: int = Field(description="The total number of steps in the invocation")
+    order: int = Field(description="The order of the invocation in the session")
+    percentage: float = Field(description="The percentage of completion of the invocation")

    @classmethod
    def build(
        cls,
        queue_item: SessionQueueItem,
        invocation: BaseInvocation,
-        step: int,
-        total_steps: int,
+        intermediate_state: PipelineIntermediateState,
        progress_image: ProgressImage,
    ) -> "InvocationDenoiseProgressEvent":
+        step = intermediate_state.step
+        total_steps = intermediate_state.total_steps
+        order = intermediate_state.order
        return cls(
            queue_id=queue_item.queue_id,
            item_id=queue_item.item_id,
@ -149,8 +156,20 @@ class InvocationDenoiseProgressEvent(InvocationEventBase):
            progress_image=progress_image,
            step=step,
            total_steps=total_steps,
+            order=order,
+            percentage=cls.calc_percentage(step, total_steps, order),
        )

+    @staticmethod
+    def calc_percentage(step: int, total_steps: int, scheduler_order: float) -> float:
+        """Calculate the percentage of completion of denoising."""
+        if total_steps == 0:
+            return 0.0
+        if scheduler_order == 2:
+            return floor((step + 1 + 1) / 2) / floor((total_steps + 1) / 2)
+        # order == 1
+        return (step + 1 + 1) / (total_steps + 1)
+

 class InvocationCompleteEvent(InvocationEventBase):
    """Event model for invocation_complete"""
--- a/invokeai/app/util/step_callback.py
+++ b/invokeai/app/util/step_callback.py
@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING, Callable, Optional

 import torch
 from PIL import Image
@ -13,8 +13,36 @@ if TYPE_CHECKING:
    from invokeai.app.services.events.events_base import EventServiceBase
    from invokeai.app.services.shared.invocation_context import InvocationContextData

+# fast latents preview matrix for sdxl
+# generated by @StAlKeR7779
+SDXL_LATENT_RGB_FACTORS = [
+    #   R        G        B
+    [0.3816, 0.4930, 0.5320],
+    [-0.3753, 0.1631, 0.1739],
+    [0.1770, 0.3588, -0.2048],
+    [-0.4350, -0.2644, -0.4289],
+]
+SDXL_SMOOTH_MATRIX = [
+    [0.0358, 0.0964, 0.0358],
+    [0.0964, 0.4711, 0.0964],
+    [0.0358, 0.0964, 0.0358],
+]

-def sample_to_lowres_estimated_image(samples, latent_rgb_factors, smooth_matrix=None):
+# origingally adapted from code by @erucipe and @keturn here:
+# https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
+# these updated numbers for v1.5 are from @torridgristle
+SD1_5_LATENT_RGB_FACTORS = [
+    #    R        G        B
+    [0.3444, 0.1385, 0.0670],  # L1
+    [0.1247, 0.4027, 0.1494],  # L2
+    [-0.3192, 0.2513, 0.2103],  # L3
+    [-0.1307, -0.1874, -0.7445],  # L4
+]
+
+
+def sample_to_lowres_estimated_image(
+    samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
+):
    latent_image = samples[0].permute(1, 2, 0) @ latent_rgb_factors

    if smooth_matrix is not None:
@ -47,64 +75,12 @@ def stable_diffusion_step_callback(
    else:
        sample = intermediate_state.latents

-    # TODO: This does not seem to be needed any more?
-    # # txt2img provides a Tensor in the step_callback
-    # # img2img provides a PipelineIntermediateState
-    # if isinstance(sample, PipelineIntermediateState):
-    #     # this was an img2img
-    #     print('img2img')
-    #     latents = sample.latents
-    #     step = sample.step
-    # else:
-    #     print('txt2img')
-    #     latents = sample
-    #     step = intermediate_state.step
-
-    # TODO: only output a preview image when requested
-
    if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
-        # fast latents preview matrix for sdxl
-        # generated by @StAlKeR7779
-        sdxl_latent_rgb_factors = torch.tensor(
-            [
-                #   R        G        B
-                [0.3816, 0.4930, 0.5320],
-                [-0.3753, 0.1631, 0.1739],
-                [0.1770, 0.3588, -0.2048],
-                [-0.4350, -0.2644, -0.4289],
-            ],
-            dtype=sample.dtype,
-            device=sample.device,
-        )
-
-        sdxl_smooth_matrix = torch.tensor(
-            [
-                [0.0358, 0.0964, 0.0358],
-                [0.0964, 0.4711, 0.0964],
-                [0.0358, 0.0964, 0.0358],
-            ],
-            dtype=sample.dtype,
-            device=sample.device,
-        )
-
+        sdxl_latent_rgb_factors = torch.tensor(SDXL_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
+        sdxl_smooth_matrix = torch.tensor(SDXL_SMOOTH_MATRIX, dtype=sample.dtype, device=sample.device)
        image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
    else:
-        # origingally adapted from code by @erucipe and @keturn here:
-        # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
-
-        # these updated numbers for v1.5 are from @torridgristle
-        v1_5_latent_rgb_factors = torch.tensor(
-            [
-                #    R        G        B
-                [0.3444, 0.1385, 0.0670],  # L1
-                [0.1247, 0.4027, 0.1494],  # L2
-                [-0.3192, 0.2513, 0.2103],  # L3
-                [-0.1307, -0.1874, -0.7445],  # L4
-            ],
-            dtype=sample.dtype,
-            device=sample.device,
-        )
-
+        v1_5_latent_rgb_factors = torch.tensor(SD1_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device)
        image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)

    (width, height) = image.size
@ -116,7 +92,6 @@ def stable_diffusion_step_callback(
    events.emit_invocation_denoise_progress(
        context_data.queue_item,
        context_data.invocation,
-        intermediate_state.step,
-        intermediate_state.total_steps * intermediate_state.order,
+        intermediate_state,
        ProgressImage(dataURL=dataURL, width=width, height=height),
    )