Merge branch 'main' into stalker-modular_lora

2024-08-30 20:32:17 +00:00 · 2024-07-31 15:10:44 -04:00
parent 0bb7ed44f6 94d64b8a78
commit 5a9173f766
148 changed files with 4546 additions and 2801 deletions
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@ -354,7 +354,7 @@ class CLIPVisionDiffusersConfig(DiffusersConfigBase):
    """Model config for CLIPVision."""

    type: Literal[ModelType.CLIPVision] = ModelType.CLIPVision
-    format: Literal[ModelFormat.Diffusers]
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers

    @staticmethod
    def get_tag() -> Tag:
@ -365,7 +365,7 @@ class T2IAdapterConfig(DiffusersConfigBase, ControlAdapterConfigBase):
    """Model config for T2I."""

    type: Literal[ModelType.T2IAdapter] = ModelType.T2IAdapter
-    format: Literal[ModelFormat.Diffusers]
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers

    @staticmethod
    def get_tag() -> Tag:
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@ -98,6 +98,9 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
                ModelVariantType.Normal: StableDiffusionXLPipeline,
                ModelVariantType.Inpaint: StableDiffusionXLInpaintPipeline,
            },
+            BaseModelType.StableDiffusionXLRefiner: {
+                ModelVariantType.Normal: StableDiffusionXLPipeline,
+            },
        }
        assert isinstance(config, MainCheckpointConfig)
        try:
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@ -187,164 +187,171 @@ STARTER_MODELS: list[StarterModel] = [
    # endregion
    # region ControlNet
    StarterModel(
-        name="QRCode Monster",
+        name="QRCode Monster v2 (SD1.5)",
        base=BaseModelType.StableDiffusion1,
-        source="monster-labs/control_v1p_sd15_qrcode_monster",
-        description="Controlnet model that generates scannable creative QR codes",
+        source="monster-labs/control_v1p_sd15_qrcode_monster::v2",
+        description="ControlNet model that generates scannable creative QR codes",
+        type=ModelType.ControlNet,
+    ),
+    StarterModel(
+        name="QRCode Monster (SDXL)",
+        base=BaseModelType.StableDiffusionXL,
+        source="monster-labs/control_v1p_sdxl_qrcode_monster",
+        description="ControlNet model that generates scannable creative QR codes",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="canny",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_canny",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning.",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="inpaint",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_inpaint",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning, inpaint version",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning, inpaint version",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="mlsd",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_mlsd",
-        description="Controlnet weights trained on sd-1.5 with canny conditioning, MLSD version",
+        description="ControlNet weights trained on sd-1.5 with canny conditioning, MLSD version",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11f1p_sd15_depth",
-        description="Controlnet weights trained on sd-1.5 with depth conditioning",
+        description="ControlNet weights trained on sd-1.5 with depth conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="normal_bae",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_normalbae",
-        description="Controlnet weights trained on sd-1.5 with normalbae image conditioning",
+        description="ControlNet weights trained on sd-1.5 with normalbae image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="seg",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_seg",
-        description="Controlnet weights trained on sd-1.5 with seg image conditioning",
+        description="ControlNet weights trained on sd-1.5 with seg image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="lineart",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_lineart",
-        description="Controlnet weights trained on sd-1.5 with lineart image conditioning",
+        description="ControlNet weights trained on sd-1.5 with lineart image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="lineart_anime",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15s2_lineart_anime",
-        description="Controlnet weights trained on sd-1.5 with anime image conditioning",
+        description="ControlNet weights trained on sd-1.5 with anime image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="openpose",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_openpose",
-        description="Controlnet weights trained on sd-1.5 with openpose image conditioning",
+        description="ControlNet weights trained on sd-1.5 with openpose image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="scribble",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_scribble",
-        description="Controlnet weights trained on sd-1.5 with scribble image conditioning",
+        description="ControlNet weights trained on sd-1.5 with scribble image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="softedge",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11p_sd15_softedge",
-        description="Controlnet weights trained on sd-1.5 with soft edge conditioning",
+        description="ControlNet weights trained on sd-1.5 with soft edge conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="shuffle",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11e_sd15_shuffle",
-        description="Controlnet weights trained on sd-1.5 with shuffle image conditioning",
+        description="ControlNet weights trained on sd-1.5 with shuffle image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="tile",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11f1e_sd15_tile",
-        description="Controlnet weights trained on sd-1.5 with tiled image conditioning",
+        description="ControlNet weights trained on sd-1.5 with tiled image conditioning",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="ip2p",
        base=BaseModelType.StableDiffusion1,
        source="lllyasviel/control_v11e_sd15_ip2p",
-        description="Controlnet weights trained on sd-1.5 with ip2p conditioning.",
+        description="ControlNet weights trained on sd-1.5 with ip2p conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="canny-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-canny-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
+        source="xinsir/controlNet-canny-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="diffusers/controlnet-depth-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with depth conditioning.",
+        source="diffusers/controlNet-depth-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with depth conditioning.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="softedge-dexined-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="SargeZT/controlnet-sd-xl-1.0-softedge-dexined",
-        description="Controlnet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
+        source="SargeZT/controlNet-sd-xl-1.0-softedge-dexined",
+        description="ControlNet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-16bit-zoe-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="SargeZT/controlnet-sd-xl-1.0-depth-16bit-zoe",
-        description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
+        source="SargeZT/controlNet-sd-xl-1.0-depth-16bit-zoe",
+        description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="depth-zoe-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="diffusers/controlnet-zoe-depth-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
+        source="diffusers/controlNet-zoe-depth-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="openpose-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-openpose-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
+        source="xinsir/controlNet-openpose-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="scribble-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-scribble-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
+        source="xinsir/controlNet-scribble-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
        type=ModelType.ControlNet,
    ),
    StarterModel(
        name="tile-sdxl",
        base=BaseModelType.StableDiffusionXL,
-        source="xinsir/controlnet-tile-sdxl-1.0",
-        description="Controlnet weights trained on sdxl-1.0 with tiled image conditioning",
+        source="xinsir/controlNet-tile-sdxl-1.0",
+        description="ControlNet weights trained on sdxl-1.0 with tiled image conditioning",
        type=ModelType.ControlNet,
    ),
    # endregion
--- a/invokeai/backend/stable_diffusion/init.py
+++ b/invokeai/backend/stable_diffusion/init.py
@ -7,11 +7,9 @@ from invokeai.backend.stable_diffusion.diffusers_pipeline import (  # noqa: F401
    StableDiffusionGeneratorPipeline,
 )
 from invokeai.backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent  # noqa: F401
-from invokeai.backend.stable_diffusion.seamless import set_seamless  # noqa: F401

 __all__ = [
    "PipelineIntermediateState",
    "StableDiffusionGeneratorPipeline",
    "InvokeAIDiffuserComponent",
-    "set_seamless",
 ]
--- a/invokeai/backend/stable_diffusion/extensions/inpaint.py
+++ b/invokeai/backend/stable_diffusion/extensions/inpaint.py
@ -0,0 +1,120 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import einops
+import torch
+from diffusers import UNet2DConditionModel
+
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class InpaintExt(ExtensionBase):
+    """An extension for inpainting with non-inpainting models. See `InpaintModelExt` for inpainting with inpainting
+    models.
+    """
+
+    def __init__(
+        self,
+        mask: torch.Tensor,
+        is_gradient_mask: bool,
+    ):
+        """Initialize InpaintExt.
+        Args:
+            mask (torch.Tensor): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
+                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
+                inpainted.
+            is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
+                from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
+                1.
+        """
+        super().__init__()
+        self._mask = mask
+        self._is_gradient_mask = is_gradient_mask
+
+        # Noise, which used to noisify unmasked part of image
+        # if noise provided to context, then it will be used
+        # if no noise provided, then noise will be generated based on seed
+        self._noise: Optional[torch.Tensor] = None
+
+    @staticmethod
+    def _is_normal_model(unet: UNet2DConditionModel):
+        """Checks if the provided UNet belongs to a regular model.
+        The `in_channels` of a UNet vary depending on model type:
+        - normal - 4
+        - depth - 5
+        - inpaint - 9
+        """
+        return unet.conv_in.in_channels == 4
+
+    def _apply_mask(self, ctx: DenoiseContext, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
+        batch_size = latents.size(0)
+        mask = einops.repeat(self._mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
+        if t.dim() == 0:
+            # some schedulers expect t to be one-dimensional.
+            # TODO: file diffusers bug about inconsistency?
+            t = einops.repeat(t, "-> batch", batch=batch_size)
+        # Noise shouldn't be re-randomized between steps here. The multistep schedulers
+        # get very confused about what is happening from step to step when we do that.
+        mask_latents = ctx.scheduler.add_noise(ctx.inputs.orig_latents, self._noise, t)
+        # TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
+        # mask_latents = self.scheduler.scale_model_input(mask_latents, t)
+        mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
+        if self._is_gradient_mask:
+            threshold = (t.item()) / ctx.scheduler.config.num_train_timesteps
+            mask_bool = mask < 1 - threshold
+            masked_input = torch.where(mask_bool, latents, mask_latents)
+        else:
+            masked_input = torch.lerp(latents, mask_latents.to(dtype=latents.dtype), mask.to(dtype=latents.dtype))
+        return masked_input
+
+    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
+    def init_tensors(self, ctx: DenoiseContext):
+        if not self._is_normal_model(ctx.unet):
+            raise ValueError(
+                "InpaintExt should be used only on normal (non-inpainting) models. This could be caused by an "
+                "inpainting model that was incorrectly marked as a non-inpainting model. In some cases, this can be "
+                "fixed by removing and re-adding the model (so that it gets re-probed)."
+            )
+
+        self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+        self._noise = ctx.inputs.noise
+        # 'noise' might be None if the latents have already been noised (e.g. when running the SDXL refiner).
+        # We still need noise for inpainting, so we generate it from the seed here.
+        if self._noise is None:
+            self._noise = torch.randn(
+                ctx.latents.shape,
+                dtype=torch.float32,
+                device="cpu",
+                generator=torch.Generator(device="cpu").manual_seed(ctx.seed),
+            ).to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+    # Use negative order to make extensions with default order work with patched latents
+    @callback(ExtensionCallbackType.PRE_STEP, order=-100)
+    def apply_mask_to_initial_latents(self, ctx: DenoiseContext):
+        ctx.latents = self._apply_mask(ctx, ctx.latents, ctx.timestep)
+
+    # TODO: redo this with preview events rewrite
+    # Use negative order to make extensions with default order work with patched latents
+    @callback(ExtensionCallbackType.POST_STEP, order=-100)
+    def apply_mask_to_step_output(self, ctx: DenoiseContext):
+        timestep = ctx.scheduler.timesteps[-1]
+        if hasattr(ctx.step_output, "denoised"):
+            ctx.step_output.denoised = self._apply_mask(ctx, ctx.step_output.denoised, timestep)
+        elif hasattr(ctx.step_output, "pred_original_sample"):
+            ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.pred_original_sample, timestep)
+        else:
+            ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.prev_sample, timestep)
+
+    # Restore unmasked part after the last step is completed
+    @callback(ExtensionCallbackType.POST_DENOISE_LOOP)
+    def restore_unmasked(self, ctx: DenoiseContext):
+        if self._is_gradient_mask:
+            ctx.latents = torch.where(self._mask < 1, ctx.latents, ctx.inputs.orig_latents)
+        else:
+            ctx.latents = torch.lerp(ctx.latents, ctx.inputs.orig_latents, self._mask)
--- a/invokeai/backend/stable_diffusion/extensions/inpaint_model.py
+++ b/invokeai/backend/stable_diffusion/extensions/inpaint_model.py
@ -0,0 +1,88 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import torch
+from diffusers import UNet2DConditionModel
+
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class InpaintModelExt(ExtensionBase):
+    """An extension for inpainting with inpainting models. See `InpaintExt` for inpainting with non-inpainting
+    models.
+    """
+
+    def __init__(
+        self,
+        mask: Optional[torch.Tensor],
+        masked_latents: Optional[torch.Tensor],
+        is_gradient_mask: bool,
+    ):
+        """Initialize InpaintModelExt.
+        Args:
+            mask (Optional[torch.Tensor]): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
+                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
+                inpainted.
+            masked_latents (Optional[torch.Tensor]): Latents of initial image, with masked out by black color inpainted area.
+                If mask provided, then too should be provided. Shape: (1, 1, latent_height, latent_width)
+            is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
+                from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
+                1.
+        """
+        super().__init__()
+        if mask is not None and masked_latents is None:
+            raise ValueError("Source image required for inpaint mask when inpaint model used!")
+
+        # Inverse mask, because inpaint models treat mask as: 0 - remain same, 1 - inpaint
+        self._mask = None
+        if mask is not None:
+            self._mask = 1 - mask
+        self._masked_latents = masked_latents
+        self._is_gradient_mask = is_gradient_mask
+
+    @staticmethod
+    def _is_inpaint_model(unet: UNet2DConditionModel):
+        """Checks if the provided UNet belongs to a regular model.
+        The `in_channels` of a UNet vary depending on model type:
+        - normal - 4
+        - depth - 5
+        - inpaint - 9
+        """
+        return unet.conv_in.in_channels == 9
+
+    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
+    def init_tensors(self, ctx: DenoiseContext):
+        if not self._is_inpaint_model(ctx.unet):
+            raise ValueError("InpaintModelExt should be used only on inpaint models!")
+
+        if self._mask is None:
+            self._mask = torch.ones_like(ctx.latents[:1, :1])
+        self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+        if self._masked_latents is None:
+            self._masked_latents = torch.zeros_like(ctx.latents[:1])
+        self._masked_latents = self._masked_latents.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
+
+    # Do last so that other extensions works with normal latents
+    @callback(ExtensionCallbackType.PRE_UNET, order=1000)
+    def append_inpaint_layers(self, ctx: DenoiseContext):
+        batch_size = ctx.unet_kwargs.sample.shape[0]
+        b_mask = torch.cat([self._mask] * batch_size)
+        b_masked_latents = torch.cat([self._masked_latents] * batch_size)
+        ctx.unet_kwargs.sample = torch.cat(
+            [ctx.unet_kwargs.sample, b_mask, b_masked_latents],
+            dim=1,
+        )
+
+    # Restore unmasked part as inpaint model can change unmasked part slightly
+    @callback(ExtensionCallbackType.POST_DENOISE_LOOP)
+    def restore_unmasked(self, ctx: DenoiseContext):
+        if self._is_gradient_mask:
+            ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
+        else:
+            ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)
--- a/invokeai/backend/stable_diffusion/extensions/seamless.py
+++ b/invokeai/backend/stable_diffusion/extensions/seamless.py
@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from typing import Callable, Dict, List, Optional, Tuple
+
+import torch
+import torch.nn as nn
+from diffusers import UNet2DConditionModel
+from diffusers.models.lora import LoRACompatibleConv
+
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase
+
+
+class SeamlessExt(ExtensionBase):
+    def __init__(
+        self,
+        seamless_axes: List[str],
+    ):
+        super().__init__()
+        self._seamless_axes = seamless_axes
+
+    @contextmanager
+    def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
+        with self.static_patch_model(
+            model=unet,
+            seamless_axes=self._seamless_axes,
+        ):
+            yield
+
+    @staticmethod
+    @contextmanager
+    def static_patch_model(
+        model: torch.nn.Module,
+        seamless_axes: List[str],
+    ):
+        if not seamless_axes:
+            yield
+            return
+
+        x_mode = "circular" if "x" in seamless_axes else "constant"
+        y_mode = "circular" if "y" in seamless_axes else "constant"
+
+        # override conv_forward
+        # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
+        def _conv_forward_asymmetric(
+            self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None
+        ):
+            self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
+            self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
+            working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
+            working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
+            return torch.nn.functional.conv2d(
+                working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
+            )
+
+        original_layers: List[Tuple[nn.Conv2d, Callable]] = []
+        try:
+            for layer in model.modules():
+                if not isinstance(layer, torch.nn.Conv2d):
+                    continue
+
+                if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
+                    layer.lora_layer = lambda *x: 0
+                original_layers.append((layer, layer._conv_forward))
+                layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
+
+            yield
+
+        finally:
+            for layer, orig_conv_forward in original_layers:
+                layer._conv_forward = orig_conv_forward
--- a/invokeai/backend/stable_diffusion/extensions/t2i_adapter.py
+++ b/invokeai/backend/stable_diffusion/extensions/t2i_adapter.py
@ -0,0 +1,120 @@
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING, List, Optional, Union
+
+import torch
+from diffusers import T2IAdapter
+from PIL.Image import Image
+
+from invokeai.app.util.controlnet_utils import prepare_control_image
+from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningMode
+from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
+
+if TYPE_CHECKING:
+    from invokeai.app.invocations.model import ModelIdentifierField
+    from invokeai.app.services.shared.invocation_context import InvocationContext
+    from invokeai.app.util.controlnet_utils import CONTROLNET_RESIZE_VALUES
+    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
+
+
+class T2IAdapterExt(ExtensionBase):
+    def __init__(
+        self,
+        node_context: InvocationContext,
+        model_id: ModelIdentifierField,
+        image: Image,
+        weight: Union[float, List[float]],
+        begin_step_percent: float,
+        end_step_percent: float,
+        resize_mode: CONTROLNET_RESIZE_VALUES,
+    ):
+        super().__init__()
+        self._node_context = node_context
+        self._model_id = model_id
+        self._image = image
+        self._weight = weight
+        self._resize_mode = resize_mode
+        self._begin_step_percent = begin_step_percent
+        self._end_step_percent = end_step_percent
+
+        self._adapter_state: Optional[List[torch.Tensor]] = None
+
+        # The max_unet_downscale is the maximum amount that the UNet model downscales the latent image internally.
+        model_config = self._node_context.models.get_config(self._model_id.key)
+        if model_config.base == BaseModelType.StableDiffusion1:
+            self._max_unet_downscale = 8
+        elif model_config.base == BaseModelType.StableDiffusionXL:
+            self._max_unet_downscale = 4
+        else:
+            raise ValueError(f"Unexpected T2I-Adapter base model type: '{model_config.base}'.")
+
+    @callback(ExtensionCallbackType.SETUP)
+    def setup(self, ctx: DenoiseContext):
+        t2i_model: T2IAdapter
+        with self._node_context.models.load(self._model_id) as t2i_model:
+            _, _, latents_height, latents_width = ctx.inputs.orig_latents.shape
+
+            self._adapter_state = self._run_model(
+                model=t2i_model,
+                image=self._image,
+                latents_height=latents_height,
+                latents_width=latents_width,
+            )
+
+    def _run_model(
+        self,
+        model: T2IAdapter,
+        image: Image,
+        latents_height: int,
+        latents_width: int,
+    ):
+        # Resize the T2I-Adapter input image.
+        # We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
+        # result will match the latent image's dimensions after max_unet_downscale is applied.
+        input_height = latents_height // self._max_unet_downscale * model.total_downscale_factor
+        input_width = latents_width // self._max_unet_downscale * model.total_downscale_factor
+
+        # Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
+        # a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
+        # T2I-Adapter model.
+        #
+        # Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
+        # of the same requirements (e.g. preserving binary masks during resize).
+        t2i_image = prepare_control_image(
+            image=image,
+            do_classifier_free_guidance=False,
+            width=input_width,
+            height=input_height,
+            num_channels=model.config["in_channels"],
+            device=model.device,
+            dtype=model.dtype,
+            resize_mode=self._resize_mode,
+        )
+
+        return model(t2i_image)
+
+    @callback(ExtensionCallbackType.PRE_UNET)
+    def pre_unet_step(self, ctx: DenoiseContext):
+        # skip if model not active in current step
+        total_steps = len(ctx.inputs.timesteps)
+        first_step = math.floor(self._begin_step_percent * total_steps)
+        last_step = math.ceil(self._end_step_percent * total_steps)
+        if ctx.step_index < first_step or ctx.step_index > last_step:
+            return
+
+        weight = self._weight
+        if isinstance(weight, list):
+            weight = weight[ctx.step_index]
+
+        adapter_state = self._adapter_state
+        if ctx.conditioning_mode == ConditioningMode.Both:
+            adapter_state = [torch.cat([v] * 2) for v in adapter_state]
+
+        if ctx.unet_kwargs.down_intrablock_additional_residuals is None:
+            ctx.unet_kwargs.down_intrablock_additional_residuals = [v * weight for v in adapter_state]
+        else:
+            for i, value in enumerate(adapter_state):
+                ctx.unet_kwargs.down_intrablock_additional_residuals[i] += value * weight
--- a/invokeai/backend/stable_diffusion/schedulers/schedulers.py
+++ b/invokeai/backend/stable_diffusion/schedulers/schedulers.py
@ -20,10 +20,14 @@ from diffusers import (
 )
 from diffusers.schedulers.scheduling_utils import SchedulerMixin

+# TODO: add dpmpp_3s/dpmpp_3s_k when fix released
+# https://github.com/huggingface/diffusers/issues/9007
+
 SCHEDULER_NAME_VALUES = Literal[
    "ddim",
    "ddpm",
    "deis",
+    "deis_k",
    "lms",
    "lms_k",
    "pndm",
@ -33,16 +37,21 @@ SCHEDULER_NAME_VALUES = Literal[
    "euler_k",
    "euler_a",
    "kdpm_2",
+    "kdpm_2_k",
    "kdpm_2_a",
+    "kdpm_2_a_k",
    "dpmpp_2s",
    "dpmpp_2s_k",
    "dpmpp_2m",
    "dpmpp_2m_k",
    "dpmpp_2m_sde",
    "dpmpp_2m_sde_k",
+    "dpmpp_3m",
+    "dpmpp_3m_k",
    "dpmpp_sde",
    "dpmpp_sde_k",
    "unipc",
+    "unipc_k",
    "lcm",
    "tcd",
 ]
@ -50,7 +59,8 @@ SCHEDULER_NAME_VALUES = Literal[
 SCHEDULER_MAP: dict[SCHEDULER_NAME_VALUES, tuple[Type[SchedulerMixin], dict[str, Any]]] = {
    "ddim": (DDIMScheduler, {}),
    "ddpm": (DDPMScheduler, {}),
-    "deis": (DEISMultistepScheduler, {}),
+    "deis": (DEISMultistepScheduler, {"use_karras_sigmas": False}),
+    "deis_k": (DEISMultistepScheduler, {"use_karras_sigmas": True}),
    "lms": (LMSDiscreteScheduler, {"use_karras_sigmas": False}),
    "lms_k": (LMSDiscreteScheduler, {"use_karras_sigmas": True}),
    "pndm": (PNDMScheduler, {}),
@ -59,17 +69,28 @@ SCHEDULER_MAP: dict[SCHEDULER_NAME_VALUES, tuple[Type[SchedulerMixin], dict[str,
    "euler": (EulerDiscreteScheduler, {"use_karras_sigmas": False}),
    "euler_k": (EulerDiscreteScheduler, {"use_karras_sigmas": True}),
    "euler_a": (EulerAncestralDiscreteScheduler, {}),
-    "kdpm_2": (KDPM2DiscreteScheduler, {}),
-    "kdpm_2_a": (KDPM2AncestralDiscreteScheduler, {}),
-    "dpmpp_2s": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": False}),
-    "dpmpp_2s_k": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": True}),
-    "dpmpp_2m": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False}),
-    "dpmpp_2m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True}),
-    "dpmpp_2m_sde": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False, "algorithm_type": "sde-dpmsolver++"}),
-    "dpmpp_2m_sde_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True, "algorithm_type": "sde-dpmsolver++"}),
+    "kdpm_2": (KDPM2DiscreteScheduler, {"use_karras_sigmas": False}),
+    "kdpm_2_k": (KDPM2DiscreteScheduler, {"use_karras_sigmas": True}),
+    "kdpm_2_a": (KDPM2AncestralDiscreteScheduler, {"use_karras_sigmas": False}),
+    "kdpm_2_a_k": (KDPM2AncestralDiscreteScheduler, {"use_karras_sigmas": True}),
+    "dpmpp_2s": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": False, "solver_order": 2}),
+    "dpmpp_2s_k": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": True, "solver_order": 2}),
+    "dpmpp_2m": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False, "solver_order": 2}),
+    "dpmpp_2m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True, "solver_order": 2}),
+    "dpmpp_2m_sde": (
+        DPMSolverMultistepScheduler,
+        {"use_karras_sigmas": False, "solver_order": 2, "algorithm_type": "sde-dpmsolver++"},
+    ),
+    "dpmpp_2m_sde_k": (
+        DPMSolverMultistepScheduler,
+        {"use_karras_sigmas": True, "solver_order": 2, "algorithm_type": "sde-dpmsolver++"},
+    ),
+    "dpmpp_3m": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False, "solver_order": 3}),
+    "dpmpp_3m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True, "solver_order": 3}),
    "dpmpp_sde": (DPMSolverSDEScheduler, {"use_karras_sigmas": False, "noise_sampler_seed": 0}),
    "dpmpp_sde_k": (DPMSolverSDEScheduler, {"use_karras_sigmas": True, "noise_sampler_seed": 0}),
-    "unipc": (UniPCMultistepScheduler, {"cpu_only": True}),
+    "unipc": (UniPCMultistepScheduler, {"use_karras_sigmas": False, "cpu_only": True}),
+    "unipc_k": (UniPCMultistepScheduler, {"use_karras_sigmas": True, "cpu_only": True}),
    "lcm": (LCMScheduler, {}),
    "tcd": (TCDScheduler, {}),
 }
--- a/invokeai/backend/stable_diffusion/seamless.py
+++ b/invokeai/backend/stable_diffusion/seamless.py
@ -1,51 +0,0 @@
-from contextlib import contextmanager
-from typing import Callable, List, Optional, Tuple, Union
-
-import torch
-import torch.nn as nn
-from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
-from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
-from diffusers.models.lora import LoRACompatibleConv
-from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
-
-
-@contextmanager
-def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL, AutoencoderTiny], seamless_axes: List[str]):
-    if not seamless_axes:
-        yield
-        return
-
-    # override conv_forward
-    # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
-    def _conv_forward_asymmetric(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
-        self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
-        self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
-        working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
-        working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
-        return torch.nn.functional.conv2d(
-            working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
-        )
-
-    original_layers: List[Tuple[nn.Conv2d, Callable]] = []
-
-    try:
-        x_mode = "circular" if "x" in seamless_axes else "constant"
-        y_mode = "circular" if "y" in seamless_axes else "constant"
-
-        conv_layers: List[torch.nn.Conv2d] = []
-
-        for module in model.modules():
-            if isinstance(module, torch.nn.Conv2d):
-                conv_layers.append(module)
-
-        for layer in conv_layers:
-            if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
-                layer.lora_layer = lambda *x: 0
-            original_layers.append((layer, layer._conv_forward))
-            layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
-
-        yield
-
-    finally:
-        for layer, orig_conv_forward in original_layers:
-            layer._conv_forward = orig_conv_forward