Fix preview, inpaint

2024-08-30 20:32:17 +00:00 · 2023-08-07 21:27:32 +03:00
parent 2539e26c18
commit 1db2c93f75
4 changed files with 58 additions and 36 deletions
--- a/invokeai/app/util/step_callback.py
+++ b/invokeai/app/util/step_callback.py
@ -7,6 +7,7 @@ from ...backend.util.util import image_to_dataURL
 from ...backend.generator.base import Generator
 from ...backend.stable_diffusion import PipelineIntermediateState
 from invokeai.app.services.config import InvokeAIAppConfig
+from ...backend.model_management.models import BaseModelType


 def sample_to_lowres_estimated_image(samples, latent_rgb_factors, smooth_matrix=None):
@ -29,6 +30,7 @@ def stable_diffusion_step_callback(
    intermediate_state: PipelineIntermediateState,
    node: dict,
    source_node_id: str,
+    base_model: BaseModelType,
 ):
    if context.services.queue.is_canceled(context.graph_execution_state_id):
        raise CanceledException
@ -56,23 +58,51 @@ def stable_diffusion_step_callback(

    # TODO: only output a preview image when requested

-    # origingally adapted from code by @erucipe and @keturn here:
-    # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
+    if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
+        sdxl_latent_rgb_factors = torch.tensor(
+            [
+                #   R        G        B
+                [0.3816, 0.4930, 0.5320],
+                [-0.3753, 0.1631, 0.1739],
+                [0.1770, 0.3588, -0.2048],
+                [-0.4350, -0.2644, -0.4289],
+            ],
+            dtype=sample.dtype,
+            device=sample.device,
+        )

-    # these updated numbers for v1.5 are from @torridgristle
-    v1_5_latent_rgb_factors = torch.tensor(
-        [
-            #    R        G        B
-            [0.3444, 0.1385, 0.0670],  # L1
-            [0.1247, 0.4027, 0.1494],  # L2
-            [-0.3192, 0.2513, 0.2103],  # L3
-            [-0.1307, -0.1874, -0.7445],  # L4
-        ],
-        dtype=sample.dtype,
-        device=sample.device,
-    )
+        sdxl_smooth_matrix = torch.tensor(
+            [
+                # [ 0.0478,  0.1285,  0.0478],
+                # [ 0.1285,  0.2948,  0.1285],
+                # [ 0.0478,  0.1285,  0.0478],
+                [0.0358, 0.0964, 0.0358],
+                [0.0964, 0.4711, 0.0964],
+                [0.0358, 0.0964, 0.0358],
+            ],
+            dtype=sample.dtype,
+            device=sample.device,
+        )

-    image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)
+        image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
+    else:
+        # origingally adapted from code by @erucipe and @keturn here:
+        # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
+
+        # these updated numbers for v1.5 are from @torridgristle
+        v1_5_latent_rgb_factors = torch.tensor(
+            [
+                #    R        G        B
+                [0.3444, 0.1385, 0.0670],  # L1
+                [0.1247, 0.4027, 0.1494],  # L2
+                [-0.3192, 0.2513, 0.2103],  # L3
+                [-0.1307, -0.1874, -0.7445],  # L4
+            ],
+            dtype=sample.dtype,
+            device=sample.device,
+        )
+
+        image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)

    (width, height) = image.size
    width *= 8