Merge branch 'main' into lstein/feat/simple-mm2-api

2024-08-30 20:32:17 +00:00 · 2024-04-28 11:33:26 -04:00
parent 70903ef057 241a1fdb57
commit bb04f496e0
103 changed files with 4797 additions and 779 deletions
--- a/invokeai/backend/image_util/hed.py
+++ b/invokeai/backend/image_util/hed.py
@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
 from PIL import Image

 from invokeai.backend.image_util.util import (
-    non_maximum_suppression,
+    nms,
    normalize_image_channel_count,
    np_to_pil,
    pil_to_np,
@ -134,7 +134,7 @@ class HEDProcessor:
        detected_map = cv2.resize(detected_map, (width, height), interpolation=cv2.INTER_LINEAR)

        if scribble:
-            detected_map = non_maximum_suppression(detected_map, 127, 3.0)
+            detected_map = nms(detected_map, 127, 3.0)
            detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
            detected_map[detected_map > 4] = 255
            detected_map[detected_map < 255] = 0
--- a/invokeai/backend/image_util/util.py
+++ b/invokeai/backend/image_util/util.py
@ -1,4 +1,5 @@
 from math import ceil, floor, sqrt
+from typing import Optional

 import cv2
 import numpy as np
@ -153,10 +154,13 @@ def resize_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.n
        return cv2.resize(input_image, (w, h), interpolation=cv2.INTER_AREA)


-def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float):
+def nms(np_img: np.ndarray, threshold: Optional[int] = None, sigma: Optional[float] = None) -> np.ndarray:
    """
    Apply non-maximum suppression to an image.

+    If both threshold and sigma are provided, the image will blurred before the suppression and thresholded afterwards,
+    resulting in a binary output image.
+
    This function is adapted from https://github.com/lllyasviel/ControlNet.

    Args:
@ -166,23 +170,36 @@ def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float):

    Returns:
        The image after non-maximum suppression.
+
+    Raises:
+        ValueError: If only one of threshold and sigma provided.
    """

-    image = cv2.GaussianBlur(image.astype(np.float32), (0, 0), sigma)
+    # Raise a value error if only one of threshold and sigma is provided
+    if (threshold is None) != (sigma is None):
+        raise ValueError("Both threshold and sigma must be provided if one is provided.")
+
+    if sigma is not None and threshold is not None:
+        # Blurring the image can help to thin out features
+        np_img = cv2.GaussianBlur(np_img.astype(np.float32), (0, 0), sigma)

    filter_1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
    filter_2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
    filter_3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
    filter_4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)

-    y = np.zeros_like(image)
+    nms_img = np.zeros_like(np_img)

    for f in [filter_1, filter_2, filter_3, filter_4]:
-        np.putmask(y, cv2.dilate(image, kernel=f) == image, image)
+        np.putmask(nms_img, cv2.dilate(np_img, kernel=f) == np_img, np_img)

-    z = np.zeros_like(y, dtype=np.uint8)
-    z[y > threshold] = 255
-    return z
+    if sigma is not None and threshold is not None:
+        # We blurred - now threshold to get a binary image
+        thresholded = np.zeros_like(nms_img, dtype=np.uint8)
+        thresholded[nms_img > threshold] = 255
+        return thresholded
+
+    return nms_img


 def safe_step(x: np.ndarray, step: int = 2) -> np.ndarray:
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@ -301,12 +301,12 @@ class MainConfigBase(ModelConfigBase):
    default_settings: Optional[MainModelDefaultSettings] = Field(
        description="Default settings for this model", default=None
    )
+    variant: ModelVariantType = ModelVariantType.Normal


 class MainCheckpointConfig(CheckpointConfigBase, MainConfigBase):
    """Model config for main checkpoint models."""

-    variant: ModelVariantType = ModelVariantType.Normal
    prediction_type: SchedulerPredictionType = SchedulerPredictionType.Epsilon
    upcast_attention: bool = False

--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@ -51,6 +51,7 @@ LEGACY_CONFIGS: Dict[BaseModelType, Dict[ModelVariantType, Union[str, Dict[Sched
    },
    BaseModelType.StableDiffusionXL: {
        ModelVariantType.Normal: "sd_xl_base.yaml",
+        ModelVariantType.Inpaint: "sd_xl_inpaint.yaml",
    },
    BaseModelType.StableDiffusionXLRefiner: {
        ModelVariantType.Normal: "sd_xl_refiner.yaml",
--- a/invokeai/backend/model_manager/starter_models.py
+++ b/invokeai/backend/model_manager/starter_models.py
@ -155,7 +155,7 @@ STARTER_MODELS: list[StarterModel] = [
    StarterModel(
        name="IP Adapter",
        base=BaseModelType.StableDiffusion1,
-        source="InvokeAI/ip_adapter_sd15",
+        source="https://huggingface.co/InvokeAI/ip_adapter_sd15/resolve/main/ip-adapter_sd15.safetensors",
        description="IP-Adapter for SD 1.5 models",
        type=ModelType.IPAdapter,
        dependencies=[ip_adapter_sd_image_encoder],
@ -163,7 +163,7 @@ STARTER_MODELS: list[StarterModel] = [
    StarterModel(
        name="IP Adapter Plus",
        base=BaseModelType.StableDiffusion1,
-        source="InvokeAI/ip_adapter_plus_sd15",
+        source="https://huggingface.co/InvokeAI/ip_adapter_plus_sd15/resolve/main/ip-adapter-plus_sd15.safetensors",
        description="Refined IP-Adapter for SD 1.5 models",
        type=ModelType.IPAdapter,
        dependencies=[ip_adapter_sd_image_encoder],
@ -171,7 +171,7 @@ STARTER_MODELS: list[StarterModel] = [
    StarterModel(
        name="IP Adapter Plus Face",
        base=BaseModelType.StableDiffusion1,
-        source="InvokeAI/ip_adapter_plus_face_sd15",
+        source="https://huggingface.co/InvokeAI/ip_adapter_plus_face_sd15/resolve/main/ip-adapter-plus-face_sd15.safetensors",
        description="Refined IP-Adapter for SD 1.5 models, adapted for faces",
        type=ModelType.IPAdapter,
        dependencies=[ip_adapter_sd_image_encoder],
@ -179,7 +179,7 @@ STARTER_MODELS: list[StarterModel] = [
    StarterModel(
        name="IP Adapter SDXL",
        base=BaseModelType.StableDiffusionXL,
-        source="InvokeAI/ip_adapter_sdxl",
+        source="https://huggingface.co/InvokeAI/ip_adapter_sdxl_vit_h/resolve/main/ip-adapter_sdxl_vit-h.safetensors",
        description="IP-Adapter for SDXL models",
        type=ModelType.IPAdapter,
        dependencies=[ip_adapter_sdxl_image_encoder],
--- a/invokeai/backend/util/catch_sigint.py
+++ b/invokeai/backend/util/catch_sigint.py
@ -0,0 +1,29 @@
+"""
+This module defines a context manager `catch_sigint()` which temporarily replaces
+the sigINT handler defined by the ASGI in order to allow the user to ^C the application
+and shut it down immediately. This was implemented in order to allow the user to interrupt
+slow model hashing during startup.
+
+Use like this:
+
+  from invokeai.backend.util.catch_sigint import catch_sigint
+  with catch_sigint():
+      run_some_hard_to_interrupt_process()
+"""
+
+import signal
+from contextlib import contextmanager
+from typing import Generator
+
+
+def sigint_handler(signum, frame):  # type: ignore
+    signal.signal(signal.SIGINT, signal.SIG_DFL)
+    signal.raise_signal(signal.SIGINT)
+
+
+@contextmanager
+def catch_sigint() -> Generator[None, None, None]:
+    original_handler = signal.getsignal(signal.SIGINT)
+    signal.signal(signal.SIGINT, sigint_handler)
+    yield
+    signal.signal(signal.SIGINT, original_handler)