Merge branch 'main' into stalker-modular_t2i_adapter

2024-08-30 20:32:17 +00:00 · 2024-07-28 15:30:00 -04:00
parent 5b84e117b2 e8e24822ec
commit 310719eb4c
139 changed files with 4062 additions and 2725 deletions
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@ -354,7 +354,7 @@ class CLIPVisionDiffusersConfig(DiffusersConfigBase):
    """Model config for CLIPVision."""

    type: Literal[ModelType.CLIPVision] = ModelType.CLIPVision
-    format: Literal[ModelFormat.Diffusers]
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers

    @staticmethod
    def get_tag() -> Tag:
@ -365,7 +365,7 @@ class T2IAdapterConfig(DiffusersConfigBase, ControlAdapterConfigBase):
    """Model config for T2I."""

    type: Literal[ModelType.T2IAdapter] = ModelType.T2IAdapter
-    format: Literal[ModelFormat.Diffusers]
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers

    @staticmethod
    def get_tag() -> Tag:
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@ -98,6 +98,9 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
                ModelVariantType.Normal: StableDiffusionXLPipeline,
                ModelVariantType.Inpaint: StableDiffusionXLInpaintPipeline,
            },
+            BaseModelType.StableDiffusionXLRefiner: {
+                ModelVariantType.Normal: StableDiffusionXLPipeline,
+            },
        }
        assert isinstance(config, MainCheckpointConfig)
        try:
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@ -187,164 +187,171 @@ STARTER_MODELS: list[StarterModel] = [
    # endregion
    # region ControlNet
    StarterModel(
-        name="QRCode Monster",
+        name="QRCode Monster v2 (SD1.5)",
        base=BaseModelType.StableDiffusion1,
-        source="monster-labs/control_v1p_sd15_qrcode_monster",
-        description="Controlnet model that generates scannable creative QR codes",
+        source="monster-labs/control_v1p_sd15_qrcode_monster::v2",
+        description="ControlNet model that generates scannable creative QR codes",
+        type=ModelType.ControlNet,
+    ),
+    StarterModel(
+        name="QRCode Monster (SDXL)",
+        base=BaseModelType.StableDiffusionXL,
+        source="monster-labs/control_v1p_sdxl_qrcode_monster",
+        description="ControlNet model that generates scannable creative QR codes",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="canny",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_canny",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning.",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="inpaint",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_inpaint",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning, inpaint version",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning, inpaint version",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="mlsd",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_mlsd",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning, MLSD version",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning, MLSD version",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11f1p_sd15_depth",
-        description="Controlnet weights trained on sd-1.5 with depth conditioning",
+        description="ControlNet weights trained on sd-1.5 with depth conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="normal_bae",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_normalbae",
-        description="Controlnet weights trained on sd-1.5 with normalbae image conditioning",
+        description="ControlNet weights trained on sd-1.5 with normalbae image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="seg",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_seg",
-        description="Controlnet weights trained on sd-1.5 with seg image conditioning",
+        description="ControlNet weights trained on sd-1.5 with seg image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="lineart",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_lineart",
-        description="Controlnet weights trained on sd-1.5 with lineart image conditioning",
+        description="ControlNet weights trained on sd-1.5 with lineart image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="lineart_anime",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15s2_lineart_anime",
-        description="Controlnet weights trained on sd-1.5 with anime image conditioning",
+        description="ControlNet weights trained on sd-1.5 with anime image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="openpose",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_openpose",
-        description="Controlnet weights trained on sd-1.5 with openpose image conditioning",
+        description="ControlNet weights trained on sd-1.5 with openpose image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="scribble",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_scribble",
-        description="Controlnet weights trained on sd-1.5 with scribble image conditioning",
+        description="ControlNet weights trained on sd-1.5 with scribble image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="softedge",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_softedge",
-        description="Controlnet weights trained on sd-1.5 with soft edge conditioning",
+        description="ControlNet weights trained on sd-1.5 with soft edge conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="shuffle",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11e_sd15_shuffle",
-        description="Controlnet weights trained on sd-1.5 with shuffle image conditioning",
+        description="ControlNet weights trained on sd-1.5 with shuffle image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="tile",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11f1e_sd15_tile",
-        description="Controlnet weights trained on sd-1.5 with tiled image conditioning",
+        description="ControlNet weights trained on sd-1.5 with tiled image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="ip2p",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11e_sd15_ip2p",
-        description="Controlnet weights trained on sd-1.5 with ip2p conditioning.",
+        description="ControlNet weights trained on sd-1.5 with ip2p conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="canny-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-canny-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
+        source="xinsir/controlNet-canny-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="diffusers/controlnet-depth-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with depth conditioning.",
+        source="diffusers/controlNet-depth-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with depth conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="softedge-dexined-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="SargeZT/controlnet-sd-xl-1.0-softedge-dexined",
-        description="Controlnet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
+        source="SargeZT/controlNet-sd-xl-1.0-softedge-dexined",
+        description="ControlNet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-16bit-zoe-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="SargeZT/controlnet-sd-xl-1.0-depth-16bit-zoe",
-        description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
+        source="SargeZT/controlNet-sd-xl-1.0-depth-16bit-zoe",
+        description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-zoe-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="diffusers/controlnet-zoe-depth-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
+        source="diffusers/controlNet-zoe-depth-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="openpose-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-openpose-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
+        source="xinsir/controlNet-openpose-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="scribble-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-scribble-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
+        source="xinsir/controlNet-scribble-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="tile-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-tile-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with tiled image conditioning",
+        source="xinsir/controlNet-tile-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with tiled image conditioning",
        type=ModelType.ControlNet,
    ),
    # endregion
--- a/invokeai/backend/stable_diffusion/init.py
+++ b/invokeai/backend/stable_diffusion/init.py
@ -7,11 +7,9 @@ from invokeai.backend.stable_diffusion.diffusers_pipeline import (  # noqa: F401
    StableDiffusionGeneratorPipeline,
 )
 from invokeai.backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent  # noqa: F401
-from invokeai.backend.stable_diffusion.seamless import set_seamless  # noqa: F401

 __all__ = [
    "PipelineIntermediateState",
    "StableDiffusionGeneratorPipeline",
    "InvokeAIDiffuserComponent",
-    "set_seamless",
 ]
--- a/invokeai/backend/stable_diffusion/extensions/seamless.py
+++ b/invokeai/backend/stable_diffusion/extensions/seamless.py
@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from typing import Callable, Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from diffusers import UNet2DConditionModel
+from diffusers.models.lora import LoRACompatibleConv
+
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase
+
+
+class SeamlessExt(ExtensionBase):
+    def __init__(
+        self,
+        seamless_axes: List[str],
+    ):
+        super().__init__()
+        self._seamless_axes = seamless_axes
+
+    @contextmanager
+    def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
+        with self.static_patch_model(
+            model=unet,
+            seamless_axes=self._seamless_axes,
+        ):
+            yield
+
+    @staticmethod
+    @contextmanager
+    def static_patch_model(
+        model: torch.nn.Module,
+        seamless_axes: List[str],
+    ):
+        if not seamless_axes:
+            yield
+            return
+
+        x_mode = "circular" if "x" in seamless_axes else "constant"
+        y_mode = "circular" if "y" in seamless_axes else "constant"
+
+        # override conv_forward
+        # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
+        def _conv_forward_asymmetric(
+            self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None
+        ):
+            self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
+            self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
+            working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
+            working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
+            return torch.nn.functional.conv2d(
+                working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
+            )
+
+        original_layers: List[Tuple[nn.Conv2d, Callable]] = []
+        try:
+            for layer in model.modules():
+                if not isinstance(layer, torch.nn.Conv2d):
+                    continue
+
+                if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
+                    layer.lora_layer = lambda *x: 0
+                original_layers.append((layer, layer._conv_forward))
+                layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
+
+            yield
+
+        finally:
+            for layer, orig_conv_forward in original_layers:
+                layer._conv_forward = orig_conv_forward
--- a/invokeai/backend/stable_diffusion/seamless.py
+++ b/invokeai/backend/stable_diffusion/seamless.py
@ -1,51 +0,0 @@
-from contextlib import contextmanager
-from typing import Callable, List, Optional, Tuple, Union
-
-import torch
-import torch.nn as nn
-from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
-from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
-from diffusers.models.lora import LoRACompatibleConv
-from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
-
-
-@contextmanager
-def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL, AutoencoderTiny], seamless_axes: List[str]):
-    if not seamless_axes:
-        yield
-        return
-
-    # override conv_forward
-    # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
-    def _conv_forward_asymmetric(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
-        self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
-        self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
-        working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
-        working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
-        return torch.nn.functional.conv2d(
-            working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
-        )
-
-    original_layers: List[Tuple[nn.Conv2d, Callable]] = []
-
-    try:
-        x_mode = "circular" if "x" in seamless_axes else "constant"
-        y_mode = "circular" if "y" in seamless_axes else "constant"
-
-        conv_layers: List[torch.nn.Conv2d] = []
-
-        for module in model.modules():
-            if isinstance(module, torch.nn.Conv2d):
-                conv_layers.append(module)
-
-        for layer in conv_layers:
-            if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
-                layer.lora_layer = lambda *x: 0
-            original_layers.append((layer, layer._conv_forward))
-            layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
-
-        yield
-
-    finally:
-        for layer, orig_conv_forward in original_layers:
-            layer._conv_forward = orig_conv_forward