Merge branch 'main' into lstein/feat/simple-mm2-api

2024-08-30 20:32:17 +00:00 · 2024-05-17 22:54:03 -04:00
parent e77c7e40b7 17e1fc5254
commit 987ee704a1
241 changed files with 10422 additions and 7910 deletions
--- a/invokeai/backend/image_util/init.py
+++ b/invokeai/backend/image_util/init.py
@ -4,5 +4,4 @@ Initialization file for invokeai.backend.image_util methods.

 from .infill_methods.patchmatch import PatchMatch  # noqa: F401
 from .pngwriter import PngWriter, PromptFormatter, retrieve_metadata, write_metadata  # noqa: F401
-from .seamless import configure_model_padding  # noqa: F401
 from .util import InitImageResizer, make_grid  # noqa: F401
--- a/invokeai/backend/image_util/safety_checker.py
+++ b/invokeai/backend/image_util/safety_checker.py
@ -8,7 +8,7 @@ from pathlib import Path

 import numpy as np
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
-from PIL import Image
+from PIL import Image, ImageFilter
 from transformers import AutoFeatureExtractor

 import invokeai.backend.util.logging as logger
@ -16,6 +16,7 @@ from invokeai.app.services.config.config_default import get_config
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.silence_warnings import SilenceWarnings

+repo_id = "CompVis/stable-diffusion-safety-checker"
 CHECKER_PATH = "core/convert/stable-diffusion-safety-checker"


@ -24,30 +25,30 @@ class SafetyChecker:
    Wrapper around SafetyChecker model.
    """

-    safety_checker = None
    feature_extractor = None
-    tried_load: bool = False
+    safety_checker = None

    @classmethod
    def _load_safety_checker(cls):
-        if cls.tried_load:
+        if cls.safety_checker is not None and cls.feature_extractor is not None:
            return

        try:
-            cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(get_config().models_path / CHECKER_PATH)
-            cls.feature_extractor = AutoFeatureExtractor.from_pretrained(get_config().models_path / CHECKER_PATH)
+            model_path = get_config().models_path / CHECKER_PATH
+            if model_path.exists():
+                cls.feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
+                cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(model_path)
+            else:
+                model_path.mkdir(parents=True, exist_ok=True)
+                cls.feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
+                cls.feature_extractor.save_pretrained(model_path, safe_serialization=True)
+                cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(repo_id)
+                cls.safety_checker.save_pretrained(model_path, safe_serialization=True)
        except Exception as e:
            logger.warning(f"Could not load NSFW checker: {str(e)}")
-        cls.tried_load = True
-
-    @classmethod
-    def safety_checker_available(cls) -> bool:
-        return Path(get_config().models_path, CHECKER_PATH).exists()

    @classmethod
    def has_nsfw_concept(cls, image: Image.Image) -> bool:
-        if not cls.safety_checker_available() and cls.tried_load:
-            return False
        cls._load_safety_checker()
        if cls.safety_checker is None or cls.feature_extractor is None:
            return False
@ -60,3 +61,24 @@ class SafetyChecker:
        with SilenceWarnings():
            checked_image, has_nsfw_concept = cls.safety_checker(images=x_image, clip_input=features.pixel_values)
        return has_nsfw_concept[0]
+
+    @classmethod
+    def blur_if_nsfw(cls, image: Image.Image) -> Image.Image:
+        if cls.has_nsfw_concept(image):
+            logger.warning("A potentially NSFW image has been detected. Image will be blurred.")
+            blurry_image = image.filter(filter=ImageFilter.GaussianBlur(radius=32))
+            caution = cls._get_caution_img()
+            # Center the caution image on the blurred image
+            x = (blurry_image.width - caution.width) // 2
+            y = (blurry_image.height - caution.height) // 2
+            blurry_image.paste(caution, (x, y), caution)
+            image = blurry_image
+
+        return image
+
+    @classmethod
+    def _get_caution_img(cls) -> Image.Image:
+        import invokeai.app.assets.images as image_assets
+
+        caution = Image.open(Path(image_assets.__path__[0]) / "caution.png")
+        return caution.resize((caution.width // 2, caution.height // 2))
--- a/invokeai/backend/image_util/seamless.py
+++ b/invokeai/backend/image_util/seamless.py
@ -1,52 +0,0 @@
-import torch.nn as nn
-
-
-def _conv_forward_asymmetric(self, input, weight, bias):
-    """
-    Patch for Conv2d._conv_forward that supports asymmetric padding
-    """
-    working = nn.functional.pad(input, self.asymmetric_padding["x"], mode=self.asymmetric_padding_mode["x"])
-    working = nn.functional.pad(working, self.asymmetric_padding["y"], mode=self.asymmetric_padding_mode["y"])
-    return nn.functional.conv2d(
-        working,
-        weight,
-        bias,
-        self.stride,
-        nn.modules.utils._pair(0),
-        self.dilation,
-        self.groups,
-    )
-
-
-def configure_model_padding(model, seamless, seamless_axes):
-    """
-    Modifies the 2D convolution layers to use a circular padding mode based on
-    the `seamless` and `seamless_axes` options.
-    """
-    # TODO: get an explicit interface for this in diffusers: https://github.com/huggingface/diffusers/issues/556
-    for m in model.modules():
-        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
-            if seamless:
-                m.asymmetric_padding_mode = {}
-                m.asymmetric_padding = {}
-                m.asymmetric_padding_mode["x"] = "circular" if ("x" in seamless_axes) else "constant"
-                m.asymmetric_padding["x"] = (
-                    m._reversed_padding_repeated_twice[0],
-                    m._reversed_padding_repeated_twice[1],
-                    0,
-                    0,
-                )
-                m.asymmetric_padding_mode["y"] = "circular" if ("y" in seamless_axes) else "constant"
-                m.asymmetric_padding["y"] = (
-                    0,
-                    0,
-                    m._reversed_padding_repeated_twice[2],
-                    m._reversed_padding_repeated_twice[3],
-                )
-                m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
-            else:
-                m._conv_forward = nn.Conv2d._conv_forward.__get__(m, nn.Conv2d)
-                if hasattr(m, "asymmetric_padding_mode"):
-                    del m.asymmetric_padding_mode
-                if hasattr(m, "asymmetric_padding"):
-                    del m.asymmetric_padding
--- a/invokeai/backend/stable_diffusion/seamless.py
+++ b/invokeai/backend/stable_diffusion/seamless.py
@ -1,89 +1,51 @@
-from __future__ import annotations
-
 from contextlib import contextmanager
-from typing import Callable, List, Union
+from typing import Callable, List, Optional, Tuple, Union

+import torch
 import torch.nn as nn
 from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
 from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
+from diffusers.models.lora import LoRACompatibleConv
 from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel


-def _conv_forward_asymmetric(self, input, weight, bias):
-    """
-    Patch for Conv2d._conv_forward that supports asymmetric padding
-    """
-    working = nn.functional.pad(input, self.asymmetric_padding["x"], mode=self.asymmetric_padding_mode["x"])
-    working = nn.functional.pad(working, self.asymmetric_padding["y"], mode=self.asymmetric_padding_mode["y"])
-    return nn.functional.conv2d(
-        working,
-        weight,
-        bias,
-        self.stride,
-        nn.modules.utils._pair(0),
-        self.dilation,
-        self.groups,
-    )
-
-
@contextmanager
 def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL, AutoencoderTiny], seamless_axes: List[str]):
    if not seamless_axes:
        yield
        return

-    # Callable: (input: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor
-    to_restore: list[tuple[nn.Conv2d | nn.ConvTranspose2d, Callable]] = []
+    # override conv_forward
+    # https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
+    def _conv_forward_asymmetric(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
+        self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
+        self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
+        working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
+        working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
+        return torch.nn.functional.conv2d(
+            working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
+        )
+
+    original_layers: List[Tuple[nn.Conv2d, Callable]] = []
+
    try:
-        # Hard coded to skip down block layers, allowing for seamless tiling at the expense of prompt adherence
-        skipped_layers = 1
-        for m_name, m in model.named_modules():
-            if not isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
-                continue
+        x_mode = "circular" if "x" in seamless_axes else "constant"
+        y_mode = "circular" if "y" in seamless_axes else "constant"

-            if isinstance(model, UNet2DConditionModel) and m_name.startswith("down_blocks.") and ".resnets." in m_name:
-                # down_blocks.1.resnets.1.conv1
-                _, block_num, _, resnet_num, submodule_name = m_name.split(".")
-                block_num = int(block_num)
-                resnet_num = int(resnet_num)
+        conv_layers: List[torch.nn.Conv2d] = []

-                if block_num >= len(model.down_blocks) - skipped_layers:
-                    continue
+        for module in model.modules():
+            if isinstance(module, torch.nn.Conv2d):
+                conv_layers.append(module)

-                # Skip the second resnet (could be configurable)
-                if resnet_num > 0:
-                    continue
-
-                # Skip Conv2d layers (could be configurable)
-                if submodule_name == "conv2":
-                    continue
-
-            m.asymmetric_padding_mode = {}
-            m.asymmetric_padding = {}
-            m.asymmetric_padding_mode["x"] = "circular" if ("x" in seamless_axes) else "constant"
-            m.asymmetric_padding["x"] = (
-                m._reversed_padding_repeated_twice[0],
-                m._reversed_padding_repeated_twice[1],
-                0,
-                0,
-            )
-            m.asymmetric_padding_mode["y"] = "circular" if ("y" in seamless_axes) else "constant"
-            m.asymmetric_padding["y"] = (
-                0,
-                0,
-                m._reversed_padding_repeated_twice[2],
-                m._reversed_padding_repeated_twice[3],
-            )
-
-            to_restore.append((m, m._conv_forward))
-            m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
+        for layer in conv_layers:
+            if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
+                layer.lora_layer = lambda *x: 0
+            original_layers.append((layer, layer._conv_forward))
+            layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)

        yield

    finally:
-        for module, orig_conv_forward in to_restore:
-            module._conv_forward = orig_conv_forward
-            if hasattr(module, "asymmetric_padding_mode"):
-                del module.asymmetric_padding_mode
-            if hasattr(module, "asymmetric_padding"):
-                del module.asymmetric_padding
+        for layer, orig_conv_forward in original_layers:
+            layer._conv_forward = orig_conv_forward