Merge branch 'main' into stalker-modular_lora

This commit is contained in:
Ryan Dick
2024-07-31 15:10:44 -04:00
148 changed files with 4546 additions and 2801 deletions

View File

@ -354,7 +354,7 @@ class CLIPVisionDiffusersConfig(DiffusersConfigBase):
"""Model config for CLIPVision."""
type: Literal[ModelType.CLIPVision] = ModelType.CLIPVision
format: Literal[ModelFormat.Diffusers]
format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
@staticmethod
def get_tag() -> Tag:
@ -365,7 +365,7 @@ class T2IAdapterConfig(DiffusersConfigBase, ControlAdapterConfigBase):
"""Model config for T2I."""
type: Literal[ModelType.T2IAdapter] = ModelType.T2IAdapter
format: Literal[ModelFormat.Diffusers]
format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
@staticmethod
def get_tag() -> Tag:

View File

@ -98,6 +98,9 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
ModelVariantType.Normal: StableDiffusionXLPipeline,
ModelVariantType.Inpaint: StableDiffusionXLInpaintPipeline,
},
BaseModelType.StableDiffusionXLRefiner: {
ModelVariantType.Normal: StableDiffusionXLPipeline,
},
}
assert isinstance(config, MainCheckpointConfig)
try:

View File

@ -187,164 +187,171 @@ STARTER_MODELS: list[StarterModel] = [
# endregion
# region ControlNet
StarterModel(
name="QRCode Monster",
name="QRCode Monster v2 (SD1.5)",
base=BaseModelType.StableDiffusion1,
source="monster-labs/control_v1p_sd15_qrcode_monster",
description="Controlnet model that generates scannable creative QR codes",
source="monster-labs/control_v1p_sd15_qrcode_monster::v2",
description="ControlNet model that generates scannable creative QR codes",
type=ModelType.ControlNet,
),
StarterModel(
name="QRCode Monster (SDXL)",
base=BaseModelType.StableDiffusionXL,
source="monster-labs/control_v1p_sdxl_qrcode_monster",
description="ControlNet model that generates scannable creative QR codes",
type=ModelType.ControlNet,
),
StarterModel(
name="canny",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_canny",
description="Controlnet weights trained on sd-1.5 with canny conditioning.",
description="ControlNet weights trained on sd-1.5 with canny conditioning.",
type=ModelType.ControlNet,
),
StarterModel(
name="inpaint",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_inpaint",
description="Controlnet weights trained on sd-1.5 with canny conditioning, inpaint version",
description="ControlNet weights trained on sd-1.5 with canny conditioning, inpaint version",
type=ModelType.ControlNet,
),
StarterModel(
name="mlsd",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_mlsd",
description="Controlnet weights trained on sd-1.5 with canny conditioning, MLSD version",
description="ControlNet weights trained on sd-1.5 with canny conditioning, MLSD version",
type=ModelType.ControlNet,
),
StarterModel(
name="depth",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11f1p_sd15_depth",
description="Controlnet weights trained on sd-1.5 with depth conditioning",
description="ControlNet weights trained on sd-1.5 with depth conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="normal_bae",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_normalbae",
description="Controlnet weights trained on sd-1.5 with normalbae image conditioning",
description="ControlNet weights trained on sd-1.5 with normalbae image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="seg",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_seg",
description="Controlnet weights trained on sd-1.5 with seg image conditioning",
description="ControlNet weights trained on sd-1.5 with seg image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="lineart",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_lineart",
description="Controlnet weights trained on sd-1.5 with lineart image conditioning",
description="ControlNet weights trained on sd-1.5 with lineart image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="lineart_anime",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15s2_lineart_anime",
description="Controlnet weights trained on sd-1.5 with anime image conditioning",
description="ControlNet weights trained on sd-1.5 with anime image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="openpose",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_openpose",
description="Controlnet weights trained on sd-1.5 with openpose image conditioning",
description="ControlNet weights trained on sd-1.5 with openpose image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="scribble",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_scribble",
description="Controlnet weights trained on sd-1.5 with scribble image conditioning",
description="ControlNet weights trained on sd-1.5 with scribble image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="softedge",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11p_sd15_softedge",
description="Controlnet weights trained on sd-1.5 with soft edge conditioning",
description="ControlNet weights trained on sd-1.5 with soft edge conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="shuffle",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11e_sd15_shuffle",
description="Controlnet weights trained on sd-1.5 with shuffle image conditioning",
description="ControlNet weights trained on sd-1.5 with shuffle image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="tile",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11f1e_sd15_tile",
description="Controlnet weights trained on sd-1.5 with tiled image conditioning",
description="ControlNet weights trained on sd-1.5 with tiled image conditioning",
type=ModelType.ControlNet,
),
StarterModel(
name="ip2p",
base=BaseModelType.StableDiffusion1,
source="lllyasviel/control_v11e_sd15_ip2p",
description="Controlnet weights trained on sd-1.5 with ip2p conditioning.",
description="ControlNet weights trained on sd-1.5 with ip2p conditioning.",
type=ModelType.ControlNet,
),
StarterModel(
name="canny-sdxl",
base=BaseModelType.StableDiffusionXL,
source="xinsir/controlnet-canny-sdxl-1.0",
description="Controlnet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
source="xinsir/controlNet-canny-sdxl-1.0",
description="ControlNet weights trained on sdxl-1.0 with canny conditioning, by Xinsir.",
type=ModelType.ControlNet,
),
StarterModel(
name="depth-sdxl",
base=BaseModelType.StableDiffusionXL,
source="diffusers/controlnet-depth-sdxl-1.0",
description="Controlnet weights trained on sdxl-1.0 with depth conditioning.",
source="diffusers/controlNet-depth-sdxl-1.0",
description="ControlNet weights trained on sdxl-1.0 with depth conditioning.",
type=ModelType.ControlNet,
),
StarterModel(
name="softedge-dexined-sdxl",
base=BaseModelType.StableDiffusionXL,
source="SargeZT/controlnet-sd-xl-1.0-softedge-dexined",
description="Controlnet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
source="SargeZT/controlNet-sd-xl-1.0-softedge-dexined",
description="ControlNet weights trained on sdxl-1.0 with dexined soft edge preprocessing.",
type=ModelType.ControlNet,
),
StarterModel(
name="depth-16bit-zoe-sdxl",
base=BaseModelType.StableDiffusionXL,
source="SargeZT/controlnet-sd-xl-1.0-depth-16bit-zoe",
description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
source="SargeZT/controlNet-sd-xl-1.0-depth-16bit-zoe",
description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (16 bits).",
type=ModelType.ControlNet,
),
StarterModel(
name="depth-zoe-sdxl",
base=BaseModelType.StableDiffusionXL,
source="diffusers/controlnet-zoe-depth-sdxl-1.0",
description="Controlnet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
source="diffusers/controlNet-zoe-depth-sdxl-1.0",
description="ControlNet weights trained on sdxl-1.0 with Zoe's preprocessor (32 bits).",
type=ModelType.ControlNet,
),
StarterModel(
name="openpose-sdxl",
base=BaseModelType.StableDiffusionXL,
source="xinsir/controlnet-openpose-sdxl-1.0",
description="Controlnet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
source="xinsir/controlNet-openpose-sdxl-1.0",
description="ControlNet weights trained on sdxl-1.0 compatible with the DWPose processor by Xinsir.",
type=ModelType.ControlNet,
),
StarterModel(
name="scribble-sdxl",
base=BaseModelType.StableDiffusionXL,
source="xinsir/controlnet-scribble-sdxl-1.0",
description="Controlnet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
source="xinsir/controlNet-scribble-sdxl-1.0",
description="ControlNet weights trained on sdxl-1.0 compatible with various lineart processors and black/white sketches by Xinsir.",
type=ModelType.ControlNet,
),
StarterModel(
name="tile-sdxl",
base=BaseModelType.StableDiffusionXL,
source="xinsir/controlnet-tile-sdxl-1.0",
description="Controlnet weights trained on sdxl-1.0 with tiled image conditioning",
source="xinsir/controlNet-tile-sdxl-1.0",
description="ControlNet weights trained on sdxl-1.0 with tiled image conditioning",
type=ModelType.ControlNet,
),
# endregion

View File

@ -7,11 +7,9 @@ from invokeai.backend.stable_diffusion.diffusers_pipeline import ( # noqa: F401
StableDiffusionGeneratorPipeline,
)
from invokeai.backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent # noqa: F401
from invokeai.backend.stable_diffusion.seamless import set_seamless # noqa: F401
__all__ = [
"PipelineIntermediateState",
"StableDiffusionGeneratorPipeline",
"InvokeAIDiffuserComponent",
"set_seamless",
]

View File

@ -0,0 +1,120 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Optional
import einops
import torch
from diffusers import UNet2DConditionModel
from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
if TYPE_CHECKING:
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
class InpaintExt(ExtensionBase):
"""An extension for inpainting with non-inpainting models. See `InpaintModelExt` for inpainting with inpainting
models.
"""
def __init__(
self,
mask: torch.Tensor,
is_gradient_mask: bool,
):
"""Initialize InpaintExt.
Args:
mask (torch.Tensor): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
inpainted.
is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
1.
"""
super().__init__()
self._mask = mask
self._is_gradient_mask = is_gradient_mask
# Noise, which used to noisify unmasked part of image
# if noise provided to context, then it will be used
# if no noise provided, then noise will be generated based on seed
self._noise: Optional[torch.Tensor] = None
@staticmethod
def _is_normal_model(unet: UNet2DConditionModel):
"""Checks if the provided UNet belongs to a regular model.
The `in_channels` of a UNet vary depending on model type:
- normal - 4
- depth - 5
- inpaint - 9
"""
return unet.conv_in.in_channels == 4
def _apply_mask(self, ctx: DenoiseContext, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
batch_size = latents.size(0)
mask = einops.repeat(self._mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
if t.dim() == 0:
# some schedulers expect t to be one-dimensional.
# TODO: file diffusers bug about inconsistency?
t = einops.repeat(t, "-> batch", batch=batch_size)
# Noise shouldn't be re-randomized between steps here. The multistep schedulers
# get very confused about what is happening from step to step when we do that.
mask_latents = ctx.scheduler.add_noise(ctx.inputs.orig_latents, self._noise, t)
# TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
# mask_latents = self.scheduler.scale_model_input(mask_latents, t)
mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
if self._is_gradient_mask:
threshold = (t.item()) / ctx.scheduler.config.num_train_timesteps
mask_bool = mask < 1 - threshold
masked_input = torch.where(mask_bool, latents, mask_latents)
else:
masked_input = torch.lerp(latents, mask_latents.to(dtype=latents.dtype), mask.to(dtype=latents.dtype))
return masked_input
@callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
def init_tensors(self, ctx: DenoiseContext):
if not self._is_normal_model(ctx.unet):
raise ValueError(
"InpaintExt should be used only on normal (non-inpainting) models. This could be caused by an "
"inpainting model that was incorrectly marked as a non-inpainting model. In some cases, this can be "
"fixed by removing and re-adding the model (so that it gets re-probed)."
)
self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
self._noise = ctx.inputs.noise
# 'noise' might be None if the latents have already been noised (e.g. when running the SDXL refiner).
# We still need noise for inpainting, so we generate it from the seed here.
if self._noise is None:
self._noise = torch.randn(
ctx.latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(ctx.seed),
).to(device=ctx.latents.device, dtype=ctx.latents.dtype)
# Use negative order to make extensions with default order work with patched latents
@callback(ExtensionCallbackType.PRE_STEP, order=-100)
def apply_mask_to_initial_latents(self, ctx: DenoiseContext):
ctx.latents = self._apply_mask(ctx, ctx.latents, ctx.timestep)
# TODO: redo this with preview events rewrite
# Use negative order to make extensions with default order work with patched latents
@callback(ExtensionCallbackType.POST_STEP, order=-100)
def apply_mask_to_step_output(self, ctx: DenoiseContext):
timestep = ctx.scheduler.timesteps[-1]
if hasattr(ctx.step_output, "denoised"):
ctx.step_output.denoised = self._apply_mask(ctx, ctx.step_output.denoised, timestep)
elif hasattr(ctx.step_output, "pred_original_sample"):
ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.pred_original_sample, timestep)
else:
ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.prev_sample, timestep)
# Restore unmasked part after the last step is completed
@callback(ExtensionCallbackType.POST_DENOISE_LOOP)
def restore_unmasked(self, ctx: DenoiseContext):
if self._is_gradient_mask:
ctx.latents = torch.where(self._mask < 1, ctx.latents, ctx.inputs.orig_latents)
else:
ctx.latents = torch.lerp(ctx.latents, ctx.inputs.orig_latents, self._mask)

View File

@ -0,0 +1,88 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Optional
import torch
from diffusers import UNet2DConditionModel
from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
if TYPE_CHECKING:
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
class InpaintModelExt(ExtensionBase):
"""An extension for inpainting with inpainting models. See `InpaintExt` for inpainting with non-inpainting
models.
"""
def __init__(
self,
mask: Optional[torch.Tensor],
masked_latents: Optional[torch.Tensor],
is_gradient_mask: bool,
):
"""Initialize InpaintModelExt.
Args:
mask (Optional[torch.Tensor]): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
inpainted.
masked_latents (Optional[torch.Tensor]): Latents of initial image, with masked out by black color inpainted area.
If mask provided, then too should be provided. Shape: (1, 1, latent_height, latent_width)
is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
1.
"""
super().__init__()
if mask is not None and masked_latents is None:
raise ValueError("Source image required for inpaint mask when inpaint model used!")
# Inverse mask, because inpaint models treat mask as: 0 - remain same, 1 - inpaint
self._mask = None
if mask is not None:
self._mask = 1 - mask
self._masked_latents = masked_latents
self._is_gradient_mask = is_gradient_mask
@staticmethod
def _is_inpaint_model(unet: UNet2DConditionModel):
"""Checks if the provided UNet belongs to a regular model.
The `in_channels` of a UNet vary depending on model type:
- normal - 4
- depth - 5
- inpaint - 9
"""
return unet.conv_in.in_channels == 9
@callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
def init_tensors(self, ctx: DenoiseContext):
if not self._is_inpaint_model(ctx.unet):
raise ValueError("InpaintModelExt should be used only on inpaint models!")
if self._mask is None:
self._mask = torch.ones_like(ctx.latents[:1, :1])
self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
if self._masked_latents is None:
self._masked_latents = torch.zeros_like(ctx.latents[:1])
self._masked_latents = self._masked_latents.to(device=ctx.latents.device, dtype=ctx.latents.dtype)
# Do last so that other extensions works with normal latents
@callback(ExtensionCallbackType.PRE_UNET, order=1000)
def append_inpaint_layers(self, ctx: DenoiseContext):
batch_size = ctx.unet_kwargs.sample.shape[0]
b_mask = torch.cat([self._mask] * batch_size)
b_masked_latents = torch.cat([self._masked_latents] * batch_size)
ctx.unet_kwargs.sample = torch.cat(
[ctx.unet_kwargs.sample, b_mask, b_masked_latents],
dim=1,
)
# Restore unmasked part as inpaint model can change unmasked part slightly
@callback(ExtensionCallbackType.POST_DENOISE_LOOP)
def restore_unmasked(self, ctx: DenoiseContext):
if self._is_gradient_mask:
ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
else:
ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)

View File

@ -0,0 +1,71 @@
from __future__ import annotations
from contextlib import contextmanager
from typing import Callable, Dict, List, Optional, Tuple
import torch
import torch.nn as nn
from diffusers import UNet2DConditionModel
from diffusers.models.lora import LoRACompatibleConv
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase
class SeamlessExt(ExtensionBase):
def __init__(
self,
seamless_axes: List[str],
):
super().__init__()
self._seamless_axes = seamless_axes
@contextmanager
def patch_unet(self, unet: UNet2DConditionModel, cached_weights: Optional[Dict[str, torch.Tensor]] = None):
with self.static_patch_model(
model=unet,
seamless_axes=self._seamless_axes,
):
yield
@staticmethod
@contextmanager
def static_patch_model(
model: torch.nn.Module,
seamless_axes: List[str],
):
if not seamless_axes:
yield
return
x_mode = "circular" if "x" in seamless_axes else "constant"
y_mode = "circular" if "y" in seamless_axes else "constant"
# override conv_forward
# https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
def _conv_forward_asymmetric(
self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None
):
self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
return torch.nn.functional.conv2d(
working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
)
original_layers: List[Tuple[nn.Conv2d, Callable]] = []
try:
for layer in model.modules():
if not isinstance(layer, torch.nn.Conv2d):
continue
if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
layer.lora_layer = lambda *x: 0
original_layers.append((layer, layer._conv_forward))
layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
yield
finally:
for layer, orig_conv_forward in original_layers:
layer._conv_forward = orig_conv_forward

View File

@ -0,0 +1,120 @@
from __future__ import annotations
import math
from typing import TYPE_CHECKING, List, Optional, Union
import torch
from diffusers import T2IAdapter
from PIL.Image import Image
from invokeai.app.util.controlnet_utils import prepare_control_image
from invokeai.backend.model_manager import BaseModelType
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningMode
from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback
if TYPE_CHECKING:
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.controlnet_utils import CONTROLNET_RESIZE_VALUES
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext
class T2IAdapterExt(ExtensionBase):
def __init__(
self,
node_context: InvocationContext,
model_id: ModelIdentifierField,
image: Image,
weight: Union[float, List[float]],
begin_step_percent: float,
end_step_percent: float,
resize_mode: CONTROLNET_RESIZE_VALUES,
):
super().__init__()
self._node_context = node_context
self._model_id = model_id
self._image = image
self._weight = weight
self._resize_mode = resize_mode
self._begin_step_percent = begin_step_percent
self._end_step_percent = end_step_percent
self._adapter_state: Optional[List[torch.Tensor]] = None
# The max_unet_downscale is the maximum amount that the UNet model downscales the latent image internally.
model_config = self._node_context.models.get_config(self._model_id.key)
if model_config.base == BaseModelType.StableDiffusion1:
self._max_unet_downscale = 8
elif model_config.base == BaseModelType.StableDiffusionXL:
self._max_unet_downscale = 4
else:
raise ValueError(f"Unexpected T2I-Adapter base model type: '{model_config.base}'.")
@callback(ExtensionCallbackType.SETUP)
def setup(self, ctx: DenoiseContext):
t2i_model: T2IAdapter
with self._node_context.models.load(self._model_id) as t2i_model:
_, _, latents_height, latents_width = ctx.inputs.orig_latents.shape
self._adapter_state = self._run_model(
model=t2i_model,
image=self._image,
latents_height=latents_height,
latents_width=latents_width,
)
def _run_model(
self,
model: T2IAdapter,
image: Image,
latents_height: int,
latents_width: int,
):
# Resize the T2I-Adapter input image.
# We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
# result will match the latent image's dimensions after max_unet_downscale is applied.
input_height = latents_height // self._max_unet_downscale * model.total_downscale_factor
input_width = latents_width // self._max_unet_downscale * model.total_downscale_factor
# Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
# a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
# T2I-Adapter model.
#
# Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
# of the same requirements (e.g. preserving binary masks during resize).
t2i_image = prepare_control_image(
image=image,
do_classifier_free_guidance=False,
width=input_width,
height=input_height,
num_channels=model.config["in_channels"],
device=model.device,
dtype=model.dtype,
resize_mode=self._resize_mode,
)
return model(t2i_image)
@callback(ExtensionCallbackType.PRE_UNET)
def pre_unet_step(self, ctx: DenoiseContext):
# skip if model not active in current step
total_steps = len(ctx.inputs.timesteps)
first_step = math.floor(self._begin_step_percent * total_steps)
last_step = math.ceil(self._end_step_percent * total_steps)
if ctx.step_index < first_step or ctx.step_index > last_step:
return
weight = self._weight
if isinstance(weight, list):
weight = weight[ctx.step_index]
adapter_state = self._adapter_state
if ctx.conditioning_mode == ConditioningMode.Both:
adapter_state = [torch.cat([v] * 2) for v in adapter_state]
if ctx.unet_kwargs.down_intrablock_additional_residuals is None:
ctx.unet_kwargs.down_intrablock_additional_residuals = [v * weight for v in adapter_state]
else:
for i, value in enumerate(adapter_state):
ctx.unet_kwargs.down_intrablock_additional_residuals[i] += value * weight

View File

@ -20,10 +20,14 @@ from diffusers import (
)
from diffusers.schedulers.scheduling_utils import SchedulerMixin
# TODO: add dpmpp_3s/dpmpp_3s_k when fix released
# https://github.com/huggingface/diffusers/issues/9007
SCHEDULER_NAME_VALUES = Literal[
"ddim",
"ddpm",
"deis",
"deis_k",
"lms",
"lms_k",
"pndm",
@ -33,16 +37,21 @@ SCHEDULER_NAME_VALUES = Literal[
"euler_k",
"euler_a",
"kdpm_2",
"kdpm_2_k",
"kdpm_2_a",
"kdpm_2_a_k",
"dpmpp_2s",
"dpmpp_2s_k",
"dpmpp_2m",
"dpmpp_2m_k",
"dpmpp_2m_sde",
"dpmpp_2m_sde_k",
"dpmpp_3m",
"dpmpp_3m_k",
"dpmpp_sde",
"dpmpp_sde_k",
"unipc",
"unipc_k",
"lcm",
"tcd",
]
@ -50,7 +59,8 @@ SCHEDULER_NAME_VALUES = Literal[
SCHEDULER_MAP: dict[SCHEDULER_NAME_VALUES, tuple[Type[SchedulerMixin], dict[str, Any]]] = {
"ddim": (DDIMScheduler, {}),
"ddpm": (DDPMScheduler, {}),
"deis": (DEISMultistepScheduler, {}),
"deis": (DEISMultistepScheduler, {"use_karras_sigmas": False}),
"deis_k": (DEISMultistepScheduler, {"use_karras_sigmas": True}),
"lms": (LMSDiscreteScheduler, {"use_karras_sigmas": False}),
"lms_k": (LMSDiscreteScheduler, {"use_karras_sigmas": True}),
"pndm": (PNDMScheduler, {}),
@ -59,17 +69,28 @@ SCHEDULER_MAP: dict[SCHEDULER_NAME_VALUES, tuple[Type[SchedulerMixin], dict[str,
"euler": (EulerDiscreteScheduler, {"use_karras_sigmas": False}),
"euler_k": (EulerDiscreteScheduler, {"use_karras_sigmas": True}),
"euler_a": (EulerAncestralDiscreteScheduler, {}),
"kdpm_2": (KDPM2DiscreteScheduler, {}),
"kdpm_2_a": (KDPM2AncestralDiscreteScheduler, {}),
"dpmpp_2s": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": False}),
"dpmpp_2s_k": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": True}),
"dpmpp_2m": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False}),
"dpmpp_2m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True}),
"dpmpp_2m_sde": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False, "algorithm_type": "sde-dpmsolver++"}),
"dpmpp_2m_sde_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True, "algorithm_type": "sde-dpmsolver++"}),
"kdpm_2": (KDPM2DiscreteScheduler, {"use_karras_sigmas": False}),
"kdpm_2_k": (KDPM2DiscreteScheduler, {"use_karras_sigmas": True}),
"kdpm_2_a": (KDPM2AncestralDiscreteScheduler, {"use_karras_sigmas": False}),
"kdpm_2_a_k": (KDPM2AncestralDiscreteScheduler, {"use_karras_sigmas": True}),
"dpmpp_2s": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": False, "solver_order": 2}),
"dpmpp_2s_k": (DPMSolverSinglestepScheduler, {"use_karras_sigmas": True, "solver_order": 2}),
"dpmpp_2m": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False, "solver_order": 2}),
"dpmpp_2m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True, "solver_order": 2}),
"dpmpp_2m_sde": (
DPMSolverMultistepScheduler,
{"use_karras_sigmas": False, "solver_order": 2, "algorithm_type": "sde-dpmsolver++"},
),
"dpmpp_2m_sde_k": (
DPMSolverMultistepScheduler,
{"use_karras_sigmas": True, "solver_order": 2, "algorithm_type": "sde-dpmsolver++"},
),
"dpmpp_3m": (DPMSolverMultistepScheduler, {"use_karras_sigmas": False, "solver_order": 3}),
"dpmpp_3m_k": (DPMSolverMultistepScheduler, {"use_karras_sigmas": True, "solver_order": 3}),
"dpmpp_sde": (DPMSolverSDEScheduler, {"use_karras_sigmas": False, "noise_sampler_seed": 0}),
"dpmpp_sde_k": (DPMSolverSDEScheduler, {"use_karras_sigmas": True, "noise_sampler_seed": 0}),
"unipc": (UniPCMultistepScheduler, {"cpu_only": True}),
"unipc": (UniPCMultistepScheduler, {"use_karras_sigmas": False, "cpu_only": True}),
"unipc_k": (UniPCMultistepScheduler, {"use_karras_sigmas": True, "cpu_only": True}),
"lcm": (LCMScheduler, {}),
"tcd": (TCDScheduler, {}),
}

View File

@ -1,51 +0,0 @@
from contextlib import contextmanager
from typing import Callable, List, Optional, Tuple, Union
import torch
import torch.nn as nn
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
from diffusers.models.lora import LoRACompatibleConv
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
@contextmanager
def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL, AutoencoderTiny], seamless_axes: List[str]):
if not seamless_axes:
yield
return
# override conv_forward
# https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
def _conv_forward_asymmetric(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
return torch.nn.functional.conv2d(
working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
)
original_layers: List[Tuple[nn.Conv2d, Callable]] = []
try:
x_mode = "circular" if "x" in seamless_axes else "constant"
y_mode = "circular" if "y" in seamless_axes else "constant"
conv_layers: List[torch.nn.Conv2d] = []
for module in model.modules():
if isinstance(module, torch.nn.Conv2d):
conv_layers.append(module)
for layer in conv_layers:
if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
layer.lora_layer = lambda *x: 0
original_layers.append((layer, layer._conv_forward))
layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
yield
finally:
for layer, orig_conv_forward in original_layers:
layer._conv_forward = orig_conv_forward