diff --git a/invokeai/app/services/events/events_base.py b/invokeai/app/services/events/events_base.py index 5b96a36279..934cb995d0 100644 --- a/invokeai/app/services/events/events_base.py +++ b/invokeai/app/services/events/events_base.py @@ -32,6 +32,7 @@ from invokeai.app.services.events.events_common import ( SessionCompleteEvent, SessionStartedEvent, ) +from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState if TYPE_CHECKING: from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput @@ -64,12 +65,11 @@ class EventServiceBase: self, queue_item: "SessionQueueItem", invocation: "BaseInvocation", - step: int, - total_steps: int, + intermediate_state: PipelineIntermediateState, progress_image: "ProgressImage", ) -> None: """Emitted at each step during denoising of an invocation.""" - self.dispatch(InvocationDenoiseProgressEvent.build(queue_item, invocation, step, total_steps, progress_image)) + self.dispatch(InvocationDenoiseProgressEvent.build(queue_item, invocation, intermediate_state, progress_image)) def emit_invocation_complete( self, queue_item: "SessionQueueItem", invocation: "BaseInvocation", output: "BaseInvocationOutput" diff --git a/invokeai/app/services/events/events_common.py b/invokeai/app/services/events/events_common.py index 0efcf91f9e..fa447bc11b 100644 --- a/invokeai/app/services/events/events_common.py +++ b/invokeai/app/services/events/events_common.py @@ -1,3 +1,4 @@ +from math import floor from typing import TYPE_CHECKING, Any, Coroutine, Optional, Protocol, TypeAlias, TypeVar from fastapi_events.handlers.local import local_handler @@ -14,6 +15,7 @@ from invokeai.app.services.session_queue.session_queue_common import ( ) from invokeai.app.util.misc import get_timestamp from invokeai.backend.model_manager.config import AnyModelConfig, SubModelType +from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState if TYPE_CHECKING: from invokeai.app.services.download.download_base import DownloadJob @@ -68,7 +70,8 @@ def register_events(events: set[type[TEvent]], func: FastAPIEventFunc) -> None: :param func: The function to handle the events """ for event in events: - local_handler.register(event_name=event.__event_name__, _func=func) + assert hasattr(event, "__event_name__") + local_handler.register(event_name=event.__event_name__, _func=func) # pyright: ignore [reportUnknownMemberType, reportUnknownArgumentType, reportAttributeAccessIssue] class QueueEventBase(EventBase): @@ -128,16 +131,20 @@ class InvocationDenoiseProgressEvent(InvocationEventBase): progress_image: ProgressImage = Field(description="The progress image sent at each step during processing") step: int = Field(description="The current step of the invocation") total_steps: int = Field(description="The total number of steps in the invocation") + order: int = Field(description="The order of the invocation in the session") + percentage: float = Field(description="The percentage of completion of the invocation") @classmethod def build( cls, queue_item: SessionQueueItem, invocation: BaseInvocation, - step: int, - total_steps: int, + intermediate_state: PipelineIntermediateState, progress_image: ProgressImage, ) -> "InvocationDenoiseProgressEvent": + step = intermediate_state.step + total_steps = intermediate_state.total_steps + order = intermediate_state.order return cls( queue_id=queue_item.queue_id, item_id=queue_item.item_id, @@ -149,8 +156,20 @@ class InvocationDenoiseProgressEvent(InvocationEventBase): progress_image=progress_image, step=step, total_steps=total_steps, + order=order, + percentage=cls.calc_percentage(step, total_steps, order), ) + @staticmethod + def calc_percentage(step: int, total_steps: int, scheduler_order: float) -> float: + """Calculate the percentage of completion of denoising.""" + if total_steps == 0: + return 0.0 + if scheduler_order == 2: + return floor((step + 1 + 1) / 2) / floor((total_steps + 1) / 2) + # order == 1 + return (step + 1 + 1) / (total_steps + 1) + class InvocationCompleteEvent(InvocationEventBase): """Event model for invocation_complete""" diff --git a/invokeai/app/util/step_callback.py b/invokeai/app/util/step_callback.py index 1bbd6bc8d0..8992e59ace 100644 --- a/invokeai/app/util/step_callback.py +++ b/invokeai/app/util/step_callback.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING, Callable, Optional import torch from PIL import Image @@ -13,8 +13,36 @@ if TYPE_CHECKING: from invokeai.app.services.events.events_base import EventServiceBase from invokeai.app.services.shared.invocation_context import InvocationContextData +# fast latents preview matrix for sdxl +# generated by @StAlKeR7779 +SDXL_LATENT_RGB_FACTORS = [ + # R G B + [0.3816, 0.4930, 0.5320], + [-0.3753, 0.1631, 0.1739], + [0.1770, 0.3588, -0.2048], + [-0.4350, -0.2644, -0.4289], +] +SDXL_SMOOTH_MATRIX = [ + [0.0358, 0.0964, 0.0358], + [0.0964, 0.4711, 0.0964], + [0.0358, 0.0964, 0.0358], +] -def sample_to_lowres_estimated_image(samples, latent_rgb_factors, smooth_matrix=None): +# origingally adapted from code by @erucipe and @keturn here: +# https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 +# these updated numbers for v1.5 are from @torridgristle +SD1_5_LATENT_RGB_FACTORS = [ + # R G B + [0.3444, 0.1385, 0.0670], # L1 + [0.1247, 0.4027, 0.1494], # L2 + [-0.3192, 0.2513, 0.2103], # L3 + [-0.1307, -0.1874, -0.7445], # L4 +] + + +def sample_to_lowres_estimated_image( + samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None +): latent_image = samples[0].permute(1, 2, 0) @ latent_rgb_factors if smooth_matrix is not None: @@ -47,64 +75,12 @@ def stable_diffusion_step_callback( else: sample = intermediate_state.latents - # TODO: This does not seem to be needed any more? - # # txt2img provides a Tensor in the step_callback - # # img2img provides a PipelineIntermediateState - # if isinstance(sample, PipelineIntermediateState): - # # this was an img2img - # print('img2img') - # latents = sample.latents - # step = sample.step - # else: - # print('txt2img') - # latents = sample - # step = intermediate_state.step - - # TODO: only output a preview image when requested - if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]: - # fast latents preview matrix for sdxl - # generated by @StAlKeR7779 - sdxl_latent_rgb_factors = torch.tensor( - [ - # R G B - [0.3816, 0.4930, 0.5320], - [-0.3753, 0.1631, 0.1739], - [0.1770, 0.3588, -0.2048], - [-0.4350, -0.2644, -0.4289], - ], - dtype=sample.dtype, - device=sample.device, - ) - - sdxl_smooth_matrix = torch.tensor( - [ - [0.0358, 0.0964, 0.0358], - [0.0964, 0.4711, 0.0964], - [0.0358, 0.0964, 0.0358], - ], - dtype=sample.dtype, - device=sample.device, - ) - + sdxl_latent_rgb_factors = torch.tensor(SDXL_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device) + sdxl_smooth_matrix = torch.tensor(SDXL_SMOOTH_MATRIX, dtype=sample.dtype, device=sample.device) image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix) else: - # origingally adapted from code by @erucipe and @keturn here: - # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 - - # these updated numbers for v1.5 are from @torridgristle - v1_5_latent_rgb_factors = torch.tensor( - [ - # R G B - [0.3444, 0.1385, 0.0670], # L1 - [0.1247, 0.4027, 0.1494], # L2 - [-0.3192, 0.2513, 0.2103], # L3 - [-0.1307, -0.1874, -0.7445], # L4 - ], - dtype=sample.dtype, - device=sample.device, - ) - + v1_5_latent_rgb_factors = torch.tensor(SD1_5_LATENT_RGB_FACTORS, dtype=sample.dtype, device=sample.device) image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors) (width, height) = image.size @@ -116,7 +92,6 @@ def stable_diffusion_step_callback( events.emit_invocation_denoise_progress( context_data.queue_item, context_data.invocation, - intermediate_state.step, - intermediate_state.total_steps * intermediate_state.order, + intermediate_state, ProgressImage(dataURL=dataURL, width=width, height=height), )