From 32078227388f38f5763d546f2cc0b76780f04989 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:41:37 +1000 Subject: [PATCH 1/9] Update invokeai_version.py --- invokeai/version/invokeai_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/version/invokeai_version.py b/invokeai/version/invokeai_version.py index caf00b543f..0c1d77bc3d 100644 --- a/invokeai/version/invokeai_version.py +++ b/invokeai/version/invokeai_version.py @@ -1 +1 @@ -__version__ = "4.2.0a2" +__version__ = "4.2.0a3" From 5b8f77f990a97dc97bd95508387af0d503faa9fd Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:26:04 +1000 Subject: [PATCH 2/9] tidy(nodes): move cnet mode literals to utils Now they can be used in type signatures without circular imports. --- invokeai/app/invocations/controlnet_image_processors.py | 9 +-------- invokeai/app/invocations/metadata.py | 2 +- invokeai/app/invocations/t2i_adapter.py | 2 +- invokeai/app/util/controlnet_utils.py | 7 +++++++ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index a49c910eeb..354a736a74 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -35,6 +35,7 @@ from invokeai.app.invocations.model import ModelIdentifierField from invokeai.app.invocations.primitives import ImageOutput from invokeai.app.invocations.util import validate_begin_end_step, validate_weights from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES from invokeai.backend.image_util.canny import get_canny_edges from invokeai.backend.image_util.depth_anything import DepthAnythingDetector from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector @@ -44,14 +45,6 @@ from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output -CONTROLNET_MODE_VALUES = Literal["balanced", "more_prompt", "more_control", "unbalanced"] -CONTROLNET_RESIZE_VALUES = Literal[ - "just_resize", - "crop_resize", - "fill_resize", - "just_resize_simple", -] - class ControlField(BaseModel): image: ImageField = Field(description="The control image") diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index a02d0a57ef..9c7264a9bb 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -3,7 +3,6 @@ from typing import Any, Literal, Optional, Union from pydantic import BaseModel, ConfigDict, Field from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output -from invokeai.app.invocations.controlnet_image_processors import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES from invokeai.app.invocations.fields import ( FieldDescriptions, ImageField, @@ -14,6 +13,7 @@ from invokeai.app.invocations.fields import ( ) from invokeai.app.invocations.model import ModelIdentifierField from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES from ...version import __version__ diff --git a/invokeai/app/invocations/t2i_adapter.py b/invokeai/app/invocations/t2i_adapter.py index e550a7b313..b22a089d3f 100644 --- a/invokeai/app/invocations/t2i_adapter.py +++ b/invokeai/app/invocations/t2i_adapter.py @@ -8,11 +8,11 @@ from invokeai.app.invocations.baseinvocation import ( invocation, invocation_output, ) -from invokeai.app.invocations.controlnet_image_processors import CONTROLNET_RESIZE_VALUES from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField, OutputField, UIType from invokeai.app.invocations.model import ModelIdentifierField from invokeai.app.invocations.util import validate_begin_end_step, validate_weights from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.app.util.controlnet_utils import CONTROLNET_RESIZE_VALUES class T2IAdapterField(BaseModel): diff --git a/invokeai/app/util/controlnet_utils.py b/invokeai/app/util/controlnet_utils.py index b3e2560211..e9baab010d 100644 --- a/invokeai/app/util/controlnet_utils.py +++ b/invokeai/app/util/controlnet_utils.py @@ -7,6 +7,13 @@ from controlnet_aux.util import HWC3 from diffusers.utils import PIL_INTERPOLATION from einops import rearrange from PIL import Image +CONTROLNET_RESIZE_VALUES = Literal[ + "just_resize", + "crop_resize", + "fill_resize", + "just_resize_simple", +] +CONTROLNET_MODE_VALUES = Literal["balanced", "more_prompt", "more_control", "unbalanced"] ################################################################### # Copy of scripts/lvminthin.py from Mikubill/sd-webui-controlnet From 6b0bf5968284b8d0878d5db7d51adf34d5925267 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:28:39 +1000 Subject: [PATCH 3/9] feat(backend): update nms util to make blur/thresholding optional --- invokeai/backend/image_util/hed.py | 4 ++-- invokeai/backend/image_util/util.py | 31 +++++++++++++++++++------ tests/app/util/test_controlnet_utils.py | 8 +++++++ 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/invokeai/backend/image_util/hed.py b/invokeai/backend/image_util/hed.py index 378e3b96e9..97706df8b9 100644 --- a/invokeai/backend/image_util/hed.py +++ b/invokeai/backend/image_util/hed.py @@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download from PIL import Image from invokeai.backend.image_util.util import ( - non_maximum_suppression, + nms, normalize_image_channel_count, np_to_pil, pil_to_np, @@ -134,7 +134,7 @@ class HEDProcessor: detected_map = cv2.resize(detected_map, (width, height), interpolation=cv2.INTER_LINEAR) if scribble: - detected_map = non_maximum_suppression(detected_map, 127, 3.0) + detected_map = nms(detected_map, 127, 3.0) detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0) detected_map[detected_map > 4] = 255 detected_map[detected_map < 255] = 0 diff --git a/invokeai/backend/image_util/util.py b/invokeai/backend/image_util/util.py index 7cfe0ad1a5..f704f068e3 100644 --- a/invokeai/backend/image_util/util.py +++ b/invokeai/backend/image_util/util.py @@ -1,4 +1,5 @@ from math import ceil, floor, sqrt +from typing import Optional import cv2 import numpy as np @@ -153,10 +154,13 @@ def resize_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.n return cv2.resize(input_image, (w, h), interpolation=cv2.INTER_AREA) -def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float): +def nms(np_img: np.ndarray, threshold: Optional[int] = None, sigma: Optional[float] = None) -> np.ndarray: """ Apply non-maximum suppression to an image. + If both threshold and sigma are provided, the image will blurred before the suppression and thresholded afterwards, + resulting in a binary output image. + This function is adapted from https://github.com/lllyasviel/ControlNet. Args: @@ -166,23 +170,36 @@ def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float): Returns: The image after non-maximum suppression. + + Raises: + ValueError: If only one of threshold and sigma provided. """ - image = cv2.GaussianBlur(image.astype(np.float32), (0, 0), sigma) + # Raise a value error if only one of threshold and sigma is provided + if (threshold is None) != (sigma is None): + raise ValueError("Both threshold and sigma must be provided if one is provided.") + + if sigma is not None and threshold is not None: + # Blurring the image can help to thin out features + np_img = cv2.GaussianBlur(np_img.astype(np.float32), (0, 0), sigma) filter_1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) filter_2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) filter_3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) filter_4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) - y = np.zeros_like(image) + nms_img = np.zeros_like(np_img) for f in [filter_1, filter_2, filter_3, filter_4]: - np.putmask(y, cv2.dilate(image, kernel=f) == image, image) + np.putmask(nms_img, cv2.dilate(np_img, kernel=f) == np_img, np_img) - z = np.zeros_like(y, dtype=np.uint8) - z[y > threshold] = 255 - return z + if sigma is not None and threshold is not None: + # We blurred - now threshold to get a binary image + thresholded = np.zeros_like(nms_img, dtype=np.uint8) + thresholded[nms_img > threshold] = 255 + return thresholded + + return nms_img def safe_step(x: np.ndarray, step: int = 2) -> np.ndarray: diff --git a/tests/app/util/test_controlnet_utils.py b/tests/app/util/test_controlnet_utils.py index 21662cce8d..9806fe7806 100644 --- a/tests/app/util/test_controlnet_utils.py +++ b/tests/app/util/test_controlnet_utils.py @@ -3,6 +3,7 @@ import pytest from PIL import Image from invokeai.app.util.controlnet_utils import prepare_control_image +from invokeai.backend.image_util.util import nms @pytest.mark.parametrize("num_channels", [1, 2, 3]) @@ -40,3 +41,10 @@ def test_prepare_control_image_num_channels_too_large(num_channels): device="cpu", do_classifier_free_guidance=False, ) + + +@pytest.mark.parametrize("threshold,sigma", [(None, 1.0), (1, None)]) +def test_nms_invalid_options(threshold: None | int, sigma: None | float): + """Test that an exception is raised in nms(...) if only one of the `threshold` or `sigma` parameters are provided.""" + with pytest.raises(ValueError): + nms(np.zeros((256, 256, 3), dtype=np.uint8), threshold, sigma) From 398f37c0ed2fd321438003204a9c2145fde91be8 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 25 Apr 2024 13:05:11 +1000 Subject: [PATCH 4/9] tidy(backend): clean up controlnet_utils - Use the our adaptation of the HWC3 function with better types - Extraction some of the util functions, name them better, add comments - Improve type annotations - Remove unreachable codepaths --- invokeai/app/util/controlnet_utils.py | 257 +++++++++++++------------- 1 file changed, 129 insertions(+), 128 deletions(-) diff --git a/invokeai/app/util/controlnet_utils.py b/invokeai/app/util/controlnet_utils.py index e9baab010d..fde8d52ee6 100644 --- a/invokeai/app/util/controlnet_utils.py +++ b/invokeai/app/util/controlnet_utils.py @@ -1,12 +1,13 @@ -from typing import Union +from typing import Any, Literal, Union import cv2 import numpy as np import torch -from controlnet_aux.util import HWC3 -from diffusers.utils import PIL_INTERPOLATION from einops import rearrange from PIL import Image + +from invokeai.backend.image_util.util import nms, normalize_image_channel_count + CONTROLNET_RESIZE_VALUES = Literal[ "just_resize", "crop_resize", @@ -75,17 +76,6 @@ def lvmin_thin(x, prunings=True): return y -def nake_nms(x): - f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) - f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) - f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) - f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) - y = np.zeros_like(x) - for f in [f1, f2, f3, f4]: - np.putmask(y, cv2.dilate(x, kernel=f) == x, x) - return y - - ################################################################################ # copied from Mikubill/sd-webui-controlnet external_code.py and modified for InvokeAI ################################################################################ @@ -141,98 +131,122 @@ def pixel_perfect_resolution( return int(np.round(estimation)) +def clone_contiguous(x: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]: + """Get a memory-contiguous clone of the given numpy array, as a safety measure and to improve computation efficiency.""" + return np.ascontiguousarray(x).copy() + + +def np_img_to_torch(np_img: np.ndarray[Any, Any], device: torch.device) -> torch.Tensor: + """Convert a numpy image to a PyTorch tensor. The image is normalized to 0-1, rearranged to BCHW format and sent to + the specified device.""" + + torch_img = torch.from_numpy(np_img) + normalized = torch_img.float() / 255.0 + bchw = rearrange(normalized, "h w c -> 1 c h w") + on_device = bchw.to(device) + return on_device.clone() + + +def heuristic_resize(np_img: np.ndarray[Any, Any], size: tuple[int, int]) -> np.ndarray[Any, Any]: + """Resizes an image using a heuristic to choose the best resizing strategy. + + - If the image appears to be an edge map, special handling will be applied to ensure the edges are not distorted. + - Single-pixel edge maps use NMS and thinning to keep the edges as single-pixel lines. + - Low-color-count images are resized with nearest-neighbor to preserve color information (for e.g. segmentation maps). + - The alpha channel is handled separately to ensure it is resized correctly. + + Args: + np_img (np.ndarray): The input image. + size (tuple[int, int]): The target size for the image. + + Returns: + np.ndarray: The resized image. + + Adapted from https://github.com/Mikubill/sd-webui-controlnet. + """ + + # Return early if the image is already at the requested size + if np_img.shape[0] == size[1] and np_img.shape[1] == size[0]: + return np_img + + # If the image has an alpha channel, separate it for special handling later. + inpaint_mask = None + if np_img.ndim == 3 and np_img.shape[2] == 4: + inpaint_mask = np_img[:, :, 3] + np_img = np_img[:, :, 0:3] + + new_size_is_smaller = (size[0] * size[1]) < (np_img.shape[0] * np_img.shape[1]) + new_size_is_bigger = (size[0] * size[1]) > (np_img.shape[0] * np_img.shape[1]) + unique_color_count = np.unique(np_img.reshape(-1, np_img.shape[2]), axis=0).shape[0] + is_one_pixel_edge = False + is_binary = False + + if unique_color_count == 2: + # If the image has only two colors, it is likely binary. Check if the image has one-pixel edges. + is_binary = np.min(np_img) < 16 and np.max(np_img) > 240 + if is_binary: + eroded = cv2.erode(np_img, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1) + dilated = cv2.dilate(eroded, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1) + one_pixel_edge_count = np.where(dilated < np_img)[0].shape[0] + all_edge_count = np.where(np_img > 127)[0].shape[0] + is_one_pixel_edge = one_pixel_edge_count * 2 > all_edge_count + + if 2 < unique_color_count < 200: + # With a low color count, we assume this is a map where exact colors are important. Near-neighbor preserves + # the colors as needed. + interpolation = cv2.INTER_NEAREST + elif new_size_is_smaller: + # This works best for downscaling + interpolation = cv2.INTER_AREA + else: + # Fall back for other cases + interpolation = cv2.INTER_CUBIC # Must be CUBIC because we now use nms. NEVER CHANGE THIS + + # This may be further transformed depending on the binary nature of the image. + resized = cv2.resize(np_img, size, interpolation=interpolation) + + if inpaint_mask is not None: + # Resize the inpaint mask to match the resized image using the same interpolation method. + inpaint_mask = cv2.resize(inpaint_mask, size, interpolation=interpolation) + + # If the image is binary, we will perform some additional processing to ensure the edges are preserved. + if is_binary: + resized = np.mean(resized.astype(np.float32), axis=2).clip(0, 255).astype(np.uint8) + if is_one_pixel_edge: + # Use NMS and thinning to keep the edges as single-pixel lines. + resized = nms(resized) + _, resized = cv2.threshold(resized, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + resized = lvmin_thin(resized, prunings=new_size_is_bigger) + else: + _, resized = cv2.threshold(resized, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + resized = np.stack([resized] * 3, axis=2) + + # Restore the alpha channel if it was present. + if inpaint_mask is not None: + inpaint_mask = (inpaint_mask > 127).astype(np.float32) * 255.0 + inpaint_mask = inpaint_mask[:, :, None].clip(0, 255).astype(np.uint8) + resized = np.concatenate([resized, inpaint_mask], axis=2) + + return resized + + ########################################################################### # Copied from detectmap_proc method in scripts/detectmap_proc.py in Mikubill/sd-webui-controlnet # modified for InvokeAI ########################################################################### -# def detectmap_proc(detected_map, module, resize_mode, h, w): -def np_img_resize(np_img: np.ndarray, resize_mode: str, h: int, w: int, device: torch.device = torch.device("cpu")): - # if 'inpaint' in module: - # np_img = np_img.astype(np.float32) - # else: - # np_img = HWC3(np_img) - np_img = HWC3(np_img) +def np_img_resize( + np_img: np.ndarray, + resize_mode: CONTROLNET_RESIZE_VALUES, + h: int, + w: int, + device: torch.device = torch.device("cpu"), +) -> tuple[torch.Tensor, np.ndarray[Any, Any]]: + np_img = normalize_image_channel_count(np_img) - def safe_numpy(x): - # A very safe method to make sure that Apple/Mac works - y = x - - # below is very boring but do not change these. If you change these Apple or Mac may fail. - y = y.copy() - y = np.ascontiguousarray(y) - y = y.copy() - return y - - def get_pytorch_control(x): - # A very safe method to make sure that Apple/Mac works - y = x - - # below is very boring but do not change these. If you change these Apple or Mac may fail. - y = torch.from_numpy(y) - y = y.float() / 255.0 - y = rearrange(y, "h w c -> 1 c h w") - y = y.clone() - # y = y.to(devices.get_device_for("controlnet")) - y = y.to(device) - y = y.clone() - return y - - def high_quality_resize(x: np.ndarray, size): - # Written by lvmin - # Super high-quality control map up-scaling, considering binary, seg, and one-pixel edges - inpaint_mask = None - if x.ndim == 3 and x.shape[2] == 4: - inpaint_mask = x[:, :, 3] - x = x[:, :, 0:3] - - new_size_is_smaller = (size[0] * size[1]) < (x.shape[0] * x.shape[1]) - new_size_is_bigger = (size[0] * size[1]) > (x.shape[0] * x.shape[1]) - unique_color_count = np.unique(x.reshape(-1, x.shape[2]), axis=0).shape[0] - is_one_pixel_edge = False - is_binary = False - if unique_color_count == 2: - is_binary = np.min(x) < 16 and np.max(x) > 240 - if is_binary: - xc = x - xc = cv2.erode(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1) - xc = cv2.dilate(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1) - one_pixel_edge_count = np.where(xc < x)[0].shape[0] - all_edge_count = np.where(x > 127)[0].shape[0] - is_one_pixel_edge = one_pixel_edge_count * 2 > all_edge_count - - if 2 < unique_color_count < 200: - interpolation = cv2.INTER_NEAREST - elif new_size_is_smaller: - interpolation = cv2.INTER_AREA - else: - interpolation = cv2.INTER_CUBIC # Must be CUBIC because we now use nms. NEVER CHANGE THIS - - y = cv2.resize(x, size, interpolation=interpolation) - if inpaint_mask is not None: - inpaint_mask = cv2.resize(inpaint_mask, size, interpolation=interpolation) - - if is_binary: - y = np.mean(y.astype(np.float32), axis=2).clip(0, 255).astype(np.uint8) - if is_one_pixel_edge: - y = nake_nms(y) - _, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) - y = lvmin_thin(y, prunings=new_size_is_bigger) - else: - _, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) - y = np.stack([y] * 3, axis=2) - - if inpaint_mask is not None: - inpaint_mask = (inpaint_mask > 127).astype(np.float32) * 255.0 - inpaint_mask = inpaint_mask[:, :, None].clip(0, 255).astype(np.uint8) - y = np.concatenate([y, inpaint_mask], axis=2) - - return y - - # if resize_mode == external_code.ResizeMode.RESIZE: if resize_mode == "just_resize": # RESIZE - np_img = high_quality_resize(np_img, (w, h)) - np_img = safe_numpy(np_img) - return get_pytorch_control(np_img), np_img + np_img = heuristic_resize(np_img, (w, h)) + np_img = clone_contiguous(np_img) + return np_img_to_torch(np_img, device), np_img old_h, old_w, _ = np_img.shape old_w = float(old_w) @@ -243,7 +257,6 @@ def np_img_resize(np_img: np.ndarray, resize_mode: str, h: int, w: int, device: def safeint(x: Union[int, float]) -> int: return int(np.round(x)) - # if resize_mode == external_code.ResizeMode.OUTER_FIT: if resize_mode == "fill_resize": # OUTER_FIT k = min(k0, k1) borders = np.concatenate([np_img[0, :, :], np_img[-1, :, :], np_img[:, 0, :], np_img[:, -1, :]], axis=0) @@ -252,23 +265,23 @@ def np_img_resize(np_img: np.ndarray, resize_mode: str, h: int, w: int, device: # Inpaint hijack high_quality_border_color[3] = 255 high_quality_background = np.tile(high_quality_border_color[None, None], [h, w, 1]) - np_img = high_quality_resize(np_img, (safeint(old_w * k), safeint(old_h * k))) + np_img = heuristic_resize(np_img, (safeint(old_w * k), safeint(old_h * k))) new_h, new_w, _ = np_img.shape pad_h = max(0, (h - new_h) // 2) pad_w = max(0, (w - new_w) // 2) high_quality_background[pad_h : pad_h + new_h, pad_w : pad_w + new_w] = np_img np_img = high_quality_background - np_img = safe_numpy(np_img) - return get_pytorch_control(np_img), np_img + np_img = clone_contiguous(np_img) + return np_img_to_torch(np_img, device), np_img else: # resize_mode == "crop_resize" (INNER_FIT) k = max(k0, k1) - np_img = high_quality_resize(np_img, (safeint(old_w * k), safeint(old_h * k))) + np_img = heuristic_resize(np_img, (safeint(old_w * k), safeint(old_h * k))) new_h, new_w, _ = np_img.shape pad_h = max(0, (new_h - h) // 2) pad_w = max(0, (new_w - w) // 2) np_img = np_img[pad_h : pad_h + h, pad_w : pad_w + w] - np_img = safe_numpy(np_img) - return get_pytorch_control(np_img), np_img + np_img = clone_contiguous(np_img) + return np_img_to_torch(np_img, device), np_img def prepare_control_image( @@ -276,12 +289,12 @@ def prepare_control_image( width: int, height: int, num_channels: int = 3, - device="cuda", - dtype=torch.float16, - do_classifier_free_guidance=True, - control_mode="balanced", - resize_mode="just_resize_simple", -): + device: str = "cuda", + dtype: torch.dtype = torch.float16, + control_mode: CONTROLNET_MODE_VALUES = "balanced", + resize_mode: CONTROLNET_RESIZE_VALUES = "just_resize_simple", + do_classifier_free_guidance: bool = True, +) -> torch.Tensor: """Pre-process images for ControlNets or T2I-Adapters. Args: @@ -299,26 +312,15 @@ def prepare_control_image( resize_mode (str, optional): Defaults to "just_resize_simple". Raises: - NotImplementedError: If resize_mode == "crop_resize_simple". - NotImplementedError: If resize_mode == "fill_resize_simple". ValueError: If `resize_mode` is not recognized. ValueError: If `num_channels` is out of range. Returns: torch.Tensor: The pre-processed input tensor. """ - if ( - resize_mode == "just_resize_simple" - or resize_mode == "crop_resize_simple" - or resize_mode == "fill_resize_simple" - ): + if resize_mode == "just_resize_simple": image = image.convert("RGB") - if resize_mode == "just_resize_simple": - image = image.resize((width, height), resample=PIL_INTERPOLATION["lanczos"]) - elif resize_mode == "crop_resize_simple": - raise NotImplementedError(f"prepare_control_image is not implemented for resize_mode='{resize_mode}'.") - elif resize_mode == "fill_resize_simple": - raise NotImplementedError(f"prepare_control_image is not implemented for resize_mode='{resize_mode}'.") + image = image.resize((width, height), resample=Image.LANCZOS) nimage = np.array(image) nimage = nimage[None, :] nimage = np.concatenate([nimage], axis=0) @@ -335,8 +337,7 @@ def prepare_control_image( resize_mode=resize_mode, h=height, w=width, - # device=torch.device('cpu') - device=device, + device=torch.device(device), ) else: raise ValueError(f"Unsupported resize_mode: '{resize_mode}'.") From dac2d78da693ef76ba13eb8b237da13aeac8dc3f Mon Sep 17 00:00:00 2001 From: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Date: Wed, 24 Apr 2024 21:46:22 -0400 Subject: [PATCH 5/9] Update README.md --- README.md | 141 +++--------------------------------------------------- 1 file changed, 6 insertions(+), 135 deletions(-) diff --git a/README.md b/README.md index ff06db8d21..b9f8463492 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ Linux/Mac systems, and `C:\Users\YourName\invokeai` on Windows. This directory w macOS, open a Terminal window, drag `invoke.sh` from the folder into the Terminal, and press return. On Linux, run `invoke.sh` -9. Press 2 to open the "browser-based UI", press enter/return, wait a +9. Press 1 to open the "browser-based UI", press enter/return, wait a minute or two for Stable Diffusion to start up, then open your browser and go to http://localhost:9090. @@ -183,22 +183,15 @@ the command `npm install -g pnpm` if needed) pip install InvokeAI --use-pep517 ``` -6. Configure InvokeAI and install a starting set of image generation models (you only need to do this once): - - ```terminal - invokeai-configure --root . - ``` - Don't miss the dot at the end! - -7. Launch the web server (do it every time you run InvokeAI): +6. Launch the web server (do it every time you run InvokeAI): ```terminal invokeai-web ``` -8. Point your browser to http://localhost:9090 to bring up the web interface. +7. Point your browser to http://localhost:9090 to bring up the web interface. -9. Type `banana sushi` in the box on the top left and click `Invoke`. +8. Type `banana sushi` in the box on the top left and click `Invoke`. Be sure to activate the virtual environment each time before re-launching InvokeAI, using `source .venv/bin/activate` or `.venv\Scripts\activate`. @@ -211,128 +204,6 @@ AMD card (using the ROCm driver). For full installation and upgrade instructions, please see: [InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/INSTALL_SOURCE/) - -### Migrating a v2.3 InvokeAI root directory - -The InvokeAI root directory is where the InvokeAI startup file, -installed models, and generated images are stored. It is ordinarily -named `invokeai` and located in your home directory. The contents and -layout of this directory has changed between versions 2.3 and 3.0 and -cannot be used directly. - -We currently recommend that you use the installer to create a new root -directory named differently from the 2.3 one, e.g. `invokeai-3` and -then use a migration script to copy your 2.3 models into the new -location. However, if you choose, you can upgrade this directory in -place. This section gives both recipes. - -#### Creating a new root directory and migrating old models - -This is the safer recipe because it leaves your old root directory in -place to fall back on. - -1. Follow the instructions above to create and install InvokeAI in a -directory that has a different name from the 2.3 invokeai directory. -In this example, we will use "invokeai-3" - -2. When you are prompted to select models to install, select a minimal -set of models, such as stable-diffusion-v1.5 only. - -3. After installation is complete launch `invokeai.sh` (Linux/Mac) or -`invokeai.bat` and select option 8 "Open the developers console". This -will take you to the command line. - -4. Issue the command `invokeai-migrate3 --from /path/to/v2.3-root --to -/path/to/invokeai-3-root`. Provide the correct `--from` and `--to` -paths for your v2.3 and v3.0 root directories respectively. - -This will copy and convert your old models from 2.3 format to 3.0 -format and create a new `models` directory in the 3.0 directory. The -old models directory (which contains the models selected at install -time) will be renamed `models.orig` and can be deleted once you have -confirmed that the migration was successful. - - If you wish, you can pass the 2.3 root directory to both `--from` and -`--to` in order to update in place. Warning: this directory will no -longer be usable with InvokeAI 2.3. - -#### Migrating in place - -For the adventurous, you may do an in-place upgrade from 2.3 to 3.0 -without touching the command line. ***This recipe does not work on -Windows platforms due to a bug in the Windows version of the 2.3 -upgrade script.** See the next section for a Windows recipe. - -##### For Mac and Linux Users: - -1. Launch the InvokeAI launcher script in your current v2.3 root directory. - -2. Select option [9] "Update InvokeAI" to bring up the updater dialog. - -3. Select option [1] to upgrade to the latest release. - -4. Once the upgrade is finished you will be returned to the launcher -menu. Select option [6] "Re-run the configure script to fix a broken -install or to complete a major upgrade". - -This will run the configure script against the v2.3 directory and -update it to the 3.0 format. The following files will be replaced: - - - The invokeai.init file, replaced by invokeai.yaml - - The models directory - - The configs/models.yaml model index - -The original versions of these files will be saved with the suffix -".orig" appended to the end. Once you have confirmed that the upgrade -worked, you can safely remove these files. Alternatively you can -restore a working v2.3 directory by removing the new files and -restoring the ".orig" files' original names. - -##### For Windows Users: - -Windows Users can upgrade with the - -1. Enter the 2.3 root directory you wish to upgrade -2. Launch `invoke.sh` or `invoke.bat` -3. Select the "Developer's console" option [8] -4. Type the following commands - -``` -pip install "invokeai @ https://github.com/invoke-ai/InvokeAI/archive/refs/tags/v3.0.0" --use-pep517 --upgrade -invokeai-configure --root . -``` -(Replace `v3.0.0` with the current release number if this document is out of date). - -The first command will install and upgrade new software to run -InvokeAI. The second will prepare the 2.3 directory for use with 3.0. -You may now launch the WebUI in the usual way, by selecting option [1] -from the launcher script - -#### Migrating Images - -The migration script will migrate your invokeai settings and models, -including textual inversion models, LoRAs and merges that you may have -installed previously. However it does **not** migrate the generated -images stored in your 2.3-format outputs directory. To do this, you -need to run an additional step: - -1. From a working InvokeAI 3.0 root directory, start the launcher and -enter menu option [8] to open the "developer's console". - -2. At the developer's console command line, type the command: - -```bash -invokeai-import-images -``` - -3. This will lead you through the process of confirming the desired - source and destination for the imported images. The images will - appear in the gallery board of your choice, and contain the - original prompt, model name, and other parameters used to generate - the image. - -(Many kudos to **techjedi** for contributing this script.) - ## Hardware Requirements InvokeAI is supported across Linux, Windows and macOS. Linux @@ -381,7 +252,7 @@ Invoke AI provides an organized gallery system for easily storing, accessing, an ### Other features - *Support for both ckpt and diffusers models* -- *SD 2.0, 2.1, XL support* +- *SD1.5, SD2.0, and SDXL support* - *Upscaling Tools* - *Embedding Manager & Support* - *Model Manager & Support* @@ -427,5 +298,5 @@ their time, hard work and effort. For support, please use this repository's GitHub Issues tracking service, or join the [Discord][discord link]. -Original portions of the software are Copyright (c) 2023 by respective contributors. +Original portions of the software are Copyright (c) 2024 by respective contributors. From d546823c4d18248bf0a5e4d8dd16c28e7164246e Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:29:26 +1000 Subject: [PATCH 6/9] docs: pruning and tidying readme --- README.md | 353 ++++++++++++------------------------------------------ 1 file changed, 79 insertions(+), 274 deletions(-) diff --git a/README.md b/README.md index b9f8463492..6ce131493c 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,90 @@ ![project hero](https://github.com/invoke-ai/InvokeAI/assets/31807370/6e3728c7-e90e-4711-905c-3b55844ff5be) -# Invoke - Professional Creative AI Tools for Visual Media -## To learn more about Invoke, or implement our Business solutions, visit [invoke.com](https://www.invoke.com/about) +# Invoke - Professional Creative AI Tools for Visual Media + +## To learn more about Invoke, or implement our Business solutions, visit [invoke.com](https://www.invoke.com/about) +[![discord badge]][discord link] [![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link] [![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link] [![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link] + -[![discord badge]][discord link] +Invoke is a leading creative engine built to empower professionals and enthusiasts alike. Generate and create stunning visual media using the latest AI-driven technologies. Invoke offers an industry leading web-based UI, and serves as the foundation for multiple commercial products. -[![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link] +**Quick links**: [Installation](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/) - [Discord](https://discord.gg/ZmtBAhwWhy) - [Documentation and Tutorials](https://invoke-ai.github.io/InvokeAI) - [Bug Reports](https://github.com/invoke-ai/InvokeAI/issues) - [Contributing](https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/) -[![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link] +
-[![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link] +![Highlighted Features - Canvas and Workflows](https://github.com/invoke-ai/InvokeAI/assets/31807370/708f7a82-084f-4860-bfbe-e2588c53548d) + +
+ +## Quick Start + +1. Download and unzip the installer from the bottom of the [latest release](https://github.com/invoke-ai/InvokeAI/releases/latest). +2. Run the installer script. + +- **Windows**: Double-click on the `install.bat` script. +- **macOS**: Open a Terminal window, drag the file `install.sh` from Finder into the Terminal, and press enter. +- **Linux**: Run `install.sh`. + +3. When prompted, enter a location for the install and select your GPU type. +4. Once the install finishes, find the directory you selected during install. The default location is `C:\Users\Username\invokeai` for Windows or `~/invokeai` for Linux/macOS. +6. Run the launcher script (`invoke.bat` for Windows, `invoke.sh` for macOS and Linux) - the same way you ran the installer script in step 2. +7. Select option 1 to start the application. Once it starts up, open your browser and go to . +8. Open the model manager tab to install a starter model and then you'll be ready to generate. + +More detail, including hardware requirements and manual install instructions, are available in the [installation documentation](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/). + +## Features + +Full details on features can be found in [our documentation](https://invoke-ai.github.io/InvokeAI/features/). + +### Web Server & UI + +Invoke runs a locally hosted web server & React UI with an industry-leading user experience. + +### Unified Canvas + +The Unified Canvas is a fully integrated canvas implementation with support for all core generation capabilities, in/out-painting, brush tools, and more. This creative tool unlocks the capability for artists to create with AI as a creative collaborator, and can be used to augment AI-generated imagery, sketches, photography, renders, and more. + +### Workflows & Nodes + +Invoke offers a fully featured workflow management solution, enabling users to combine the power of node-based workflows with the easy of a UI. This allows for customizable generation pipelines to be developed and shared by users looking to create specific workflows to support their production use-cases. + +### Board & Gallery Management + +Invoke features an organized gallery system for easily storing, accessing, and remixing your content in the Invoke workspace. Images can be dragged/dropped onto any Image-base UI element in the application, and rich metadata within the Image allows for easy recall of key prompts or settings used in your workflow. + +### Other features + +- Support for both ckpt and diffusers models +- SD1.5, SD2.0, and SDXL support +- Upscaling Tools +- Embedding Manager & Support +- Model Manager & Support +- Workflow creation & management +- Node-Based Architecture + +## Troubleshooting, FAQ and Support + +Please review our **[FAQ](https://invoke-ai.github.io/InvokeAI/help/FAQ/)** for solutions to common installation problems and other issues. + +For more help, please join our [Discord][discord link]. + +## Contributing + +Anyone who wishes to contribute to this project - whether documentation, features, bug fixes, code cleanup, testing, or code reviews - is very much encouraged to do so. + +Get started with contributing by reading our [Contribution documentation](https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/), joining the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) or the GitHub discussion board. + +We hope you enjoy using Invoke as much as we enjoy creating it, and we hope you will elect to become part of our community. + +## Thanks + +Invoke is a combined effort of [passionate and talented people from across the world](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for their time, hard work and effort. + +Original portions of the software are Copyright © 2024 by respective contributors. [CI checks on main badge]: https://flat.badgen.net/github/checks/invoke-ai/InvokeAI/main?label=CI%20status%20on%20main&cache=900&icon=github [CI checks on main link]:https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain @@ -32,271 +104,4 @@ [latest release badge]: https://flat.badgen.net/github/release/invoke-ai/InvokeAI/development?icon=github [latest release link]: https://github.com/invoke-ai/InvokeAI/releases [translation status badge]: https://hosted.weblate.org/widgets/invokeai/-/svg-badge.svg -[translation status link]: https://hosted.weblate.org/engage/invokeai/ - - - -InvokeAI is a leading creative engine built to empower professionals -and enthusiasts alike. Generate and create stunning visual media using -the latest AI-driven technologies. InvokeAI offers an industry leading -Web Interface, interactive Command Line Interface, and also serves as -the foundation for multiple commercial products. - -**Quick links**: [[How to - Install](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/)] [Discord Server] [Documentation and - Tutorials] - [Bug Reports] - [Discussion, - Ideas & Q&A] - [Contributing] - -
- - -![Highlighted Features - Canvas and Workflows](https://github.com/invoke-ai/InvokeAI/assets/31807370/708f7a82-084f-4860-bfbe-e2588c53548d) - - -
- -## Table of Contents - -Table of Contents 📝 - -**Getting Started** -1. 🏁 [Quick Start](#quick-start) -3. 🖥️ [Hardware Requirements](#hardware-requirements) - -**More About Invoke** -1. 🌟 [Features](#features) -2. 📣 [Latest Changes](#latest-changes) -3. 🛠️ [Troubleshooting](#troubleshooting) - -**Supporting the Project** -1. 🤝 [Contributing](#contributing) -2. 👥 [Contributors](#contributors) -3. 💕 [Support](#support) - -## Quick Start - -For full installation and upgrade instructions, please see: -[InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/) - -If upgrading from version 2.3, please read [Migrating a 2.3 root -directory to 3.0](#migrating-to-3) first. - -### Automatic Installer (suggested for 1st time users) - -1. Go to the bottom of the [Latest Release Page](https://github.com/invoke-ai/InvokeAI/releases/latest) - -2. Download the .zip file for your OS (Windows/macOS/Linux). - -3. Unzip the file. - -4. **Windows:** double-click on the `install.bat` script. **macOS:** Open a Terminal window, drag the file `install.sh` from Finder -into the Terminal, and press return. **Linux:** run `install.sh`. - -5. You'll be asked to confirm the location of the folder in which -to install InvokeAI and its image generation model files. Pick a -location with at least 15 GB of free memory. More if you plan on -installing lots of models. - -6. Wait while the installer does its thing. After installing the software, -the installer will launch a script that lets you configure InvokeAI and -select a set of starting image generation models. - -7. Find the folder that InvokeAI was installed into (it is not the -same as the unpacked zip file directory!) The default location of this -folder (if you didn't change it in step 5) is `~/invokeai` on -Linux/Mac systems, and `C:\Users\YourName\invokeai` on Windows. This directory will contain launcher scripts named `invoke.sh` and `invoke.bat`. - -8. On Windows systems, double-click on the `invoke.bat` file. On -macOS, open a Terminal window, drag `invoke.sh` from the folder into -the Terminal, and press return. On Linux, run `invoke.sh` - -9. Press 1 to open the "browser-based UI", press enter/return, wait a -minute or two for Stable Diffusion to start up, then open your browser -and go to http://localhost:9090. - -10. Type `banana sushi` in the box on the top left and click `Invoke` - -### Command-Line Installation (for developers and users familiar with Terminals) - -You must have Python 3.10 through 3.11 installed on your machine. Earlier or -later versions are not supported. -Node.js also needs to be installed along with `pnpm` (can be installed with -the command `npm install -g pnpm` if needed) - -1. Open a command-line window on your machine. The PowerShell is recommended for Windows. -2. Create a directory to install InvokeAI into. You'll need at least 15 GB of free space: - - ```terminal - mkdir invokeai - ```` - -3. Create a virtual environment named `.venv` inside this directory and activate it: - - ```terminal - cd invokeai - python -m venv .venv --prompt InvokeAI - ``` - -4. Activate the virtual environment (do it every time you run InvokeAI) - - _For Linux/Mac users:_ - - ```sh - source .venv/bin/activate - ``` - - _For Windows users:_ - - ```ps - .venv\Scripts\activate - ``` - -5. Install the InvokeAI module and its dependencies. Choose the command suited for your platform & GPU. - - _For Windows/Linux with an NVIDIA GPU:_ - - ```terminal - pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu121 - ``` - - _For Linux with an AMD GPU:_ - - ```sh - pip install InvokeAI --use-pep517 --extra-index-url https://download.pytorch.org/whl/rocm5.6 - ``` - - _For non-GPU systems:_ - ```terminal - pip install InvokeAI --use-pep517 --extra-index-url https://download.pytorch.org/whl/cpu - ``` - - _For Macintoshes, either Intel or M1/M2/M3:_ - - ```sh - pip install InvokeAI --use-pep517 - ``` - -6. Launch the web server (do it every time you run InvokeAI): - - ```terminal - invokeai-web - ``` - -7. Point your browser to http://localhost:9090 to bring up the web interface. - -8. Type `banana sushi` in the box on the top left and click `Invoke`. - -Be sure to activate the virtual environment each time before re-launching InvokeAI, -using `source .venv/bin/activate` or `.venv\Scripts\activate`. - -## Detailed Installation Instructions - -This fork is supported across Linux, Windows and Macintosh. Linux -users can use either an Nvidia-based card (with CUDA support) or an -AMD card (using the ROCm driver). For full installation and upgrade -instructions, please see: -[InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/INSTALL_SOURCE/) - -## Hardware Requirements - -InvokeAI is supported across Linux, Windows and macOS. Linux -users can use either an Nvidia-based card (with CUDA support) or an -AMD card (using the ROCm driver). - -### System - -You will need one of the following: - -- An NVIDIA-based graphics card with 4 GB or more VRAM memory. 6-8 GB - of VRAM is highly recommended for rendering using the Stable - Diffusion XL models -- An Apple computer with an M1 chip. -- An AMD-based graphics card with 4GB or more VRAM memory (Linux - only), 6-8 GB for XL rendering. - -We do not recommend the GTX 1650 or 1660 series video cards. They are -unable to run in half-precision mode and do not have sufficient VRAM -to render 512x512 images. - -**Memory** - At least 12 GB Main Memory RAM. - -**Disk** - At least 12 GB of free disk space for the machine learning model, Python, and all its dependencies. - -## Features - -Feature documentation can be reviewed by navigating to [the InvokeAI Documentation page](https://invoke-ai.github.io/InvokeAI/features/) - -### *Web Server & UI* - -InvokeAI offers a locally hosted Web Server & React Frontend, with an industry leading user experience. The Web-based UI allows for simple and intuitive workflows, and is responsive for use on mobile devices and tablets accessing the web server. - -### *Unified Canvas* - -The Unified Canvas is a fully integrated canvas implementation with support for all core generation capabilities, in/outpainting, brush tools, and more. This creative tool unlocks the capability for artists to create with AI as a creative collaborator, and can be used to augment AI-generated imagery, sketches, photography, renders, and more. - -### *Workflows & Nodes* - -InvokeAI offers a fully featured workflow management solution, enabling users to combine the power of nodes based workflows with the easy of a UI. This allows for customizable generation pipelines to be developed and shared by users looking to create specific workflows to support their production use-cases. - -### *Board & Gallery Management* - -Invoke AI provides an organized gallery system for easily storing, accessing, and remixing your content in the Invoke workspace. Images can be dragged/dropped onto any Image-base UI element in the application, and rich metadata within the Image allows for easy recall of key prompts or settings used in your workflow. - -### Other features - -- *Support for both ckpt and diffusers models* -- *SD1.5, SD2.0, and SDXL support* -- *Upscaling Tools* -- *Embedding Manager & Support* -- *Model Manager & Support* -- *Workflow creation & management* -- *Node-Based Architecture* - - -### Latest Changes - -For our latest changes, view our [Release -Notes](https://github.com/invoke-ai/InvokeAI/releases) and the -[CHANGELOG](docs/CHANGELOG.md). - -### Troubleshooting / FAQ - -Please check out our **[FAQ](https://invoke-ai.github.io/InvokeAI/help/FAQ/)** to get solutions for common installation -problems and other issues. For more help, please join our [Discord][discord link] - -## Contributing - -Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code -cleanup, testing, or code reviews, is very much encouraged to do so. - -Get started with contributing by reading our [Contribution documentation](https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/), joining the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) or the GitHub discussion board. - -If you are unfamiliar with how -to contribute to GitHub projects, we have a new contributor checklist you can follow to get started contributing: -[New Contributor Checklist](https://invoke-ai.github.io/InvokeAI/contributing/contribution_guides/newContributorChecklist/). - -We hope you enjoy using our software as much as we enjoy creating it, -and we hope that some of those of you who are reading this will elect -to become part of our community. - -Welcome to InvokeAI! - -### Contributors - -This fork is a combined effort of various people from across the world. -[Check out the list of all these amazing people](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for -their time, hard work and effort. - -### Support - -For support, please use this repository's GitHub Issues tracking service, or join the [Discord][discord link]. - -Original portions of the software are Copyright (c) 2024 by respective contributors. - +[translation status link]: https://hosted.weblate.org/engage/invokeai/ \ No newline at end of file From caa7c0f2bd3748980e251f7c506c3d3592e47d3d Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 25 Apr 2024 23:41:00 +1000 Subject: [PATCH 7/9] docs: more pruning and tidying readme --- README.md | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 6ce131493c..f540e7be75 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ # Invoke - Professional Creative AI Tools for Visual Media -## To learn more about Invoke, or implement our Business solutions, visit [invoke.com](https://www.invoke.com/about) - +#### To learn more about Invoke, or implement our Business solutions, visit [invoke.com] + [![discord badge]][discord link] [![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link] [![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link] [![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link] Invoke is a leading creative engine built to empower professionals and enthusiasts alike. Generate and create stunning visual media using the latest AI-driven technologies. Invoke offers an industry leading web-based UI, and serves as the foundation for multiple commercial products. -**Quick links**: [Installation](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/) - [Discord](https://discord.gg/ZmtBAhwWhy) - [Documentation and Tutorials](https://invoke-ai.github.io/InvokeAI) - [Bug Reports](https://github.com/invoke-ai/InvokeAI/issues) - [Contributing](https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/) +[Installation][installation docs] - [Documentation and Tutorials][docs home] - [Bug Reports][github issues] - [Contributing][contributing docs]
@@ -22,24 +22,30 @@ Invoke is a leading creative engine built to empower professionals and enthusias ## Quick Start -1. Download and unzip the installer from the bottom of the [latest release](https://github.com/invoke-ai/InvokeAI/releases/latest). +1. Download and unzip the installer from the bottom of the [latest release][latest release link]. 2. Run the installer script. -- **Windows**: Double-click on the `install.bat` script. -- **macOS**: Open a Terminal window, drag the file `install.sh` from Finder into the Terminal, and press enter. -- **Linux**: Run `install.sh`. + - **Windows**: Double-click on the `install.bat` script. + - **macOS**: Open a Terminal window, drag the file `install.sh` from Finder into the Terminal, and press enter. + - **Linux**: Run `install.sh`. 3. When prompted, enter a location for the install and select your GPU type. 4. Once the install finishes, find the directory you selected during install. The default location is `C:\Users\Username\invokeai` for Windows or `~/invokeai` for Linux/macOS. -6. Run the launcher script (`invoke.bat` for Windows, `invoke.sh` for macOS and Linux) - the same way you ran the installer script in step 2. -7. Select option 1 to start the application. Once it starts up, open your browser and go to . -8. Open the model manager tab to install a starter model and then you'll be ready to generate. +5. Run the launcher script (`invoke.bat` for Windows, `invoke.sh` for macOS and Linux) the same way you ran the installer script in step 2. +6. Select option 1 to start the application. Once it starts up, open your browser and go to . +7. Open the model manager tab to install a starter model and then you'll be ready to generate. -More detail, including hardware requirements and manual install instructions, are available in the [installation documentation](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/). +More detail, including hardware requirements and manual install instructions, are available in the [installation documentation][installation docs]. + +## Troubleshooting, FAQ and Support + +Please review our [FAQ][faq] for solutions to common installation problems and other issues. + +For more help, please join our [Discord][discord link]. ## Features -Full details on features can be found in [our documentation](https://invoke-ai.github.io/InvokeAI/features/). +Full details on features can be found in [our documentation][features docs]. ### Web Server & UI @@ -67,28 +73,31 @@ Invoke features an organized gallery system for easily storing, accessing, and r - Workflow creation & management - Node-Based Architecture -## Troubleshooting, FAQ and Support - -Please review our **[FAQ](https://invoke-ai.github.io/InvokeAI/help/FAQ/)** for solutions to common installation problems and other issues. - -For more help, please join our [Discord][discord link]. - ## Contributing Anyone who wishes to contribute to this project - whether documentation, features, bug fixes, code cleanup, testing, or code reviews - is very much encouraged to do so. -Get started with contributing by reading our [Contribution documentation](https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/), joining the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) or the GitHub discussion board. +Get started with contributing by reading our [contribution documentation][contributing docs], joining the [#dev-chat] or the GitHub discussion board. We hope you enjoy using Invoke as much as we enjoy creating it, and we hope you will elect to become part of our community. ## Thanks -Invoke is a combined effort of [passionate and talented people from across the world](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for their time, hard work and effort. +Invoke is a combined effort of [passionate and talented people from across the world][contributors]. We thank them for their time, hard work and effort. Original portions of the software are Copyright © 2024 by respective contributors. +[features docs]: https://invoke-ai.github.io/InvokeAI/features/ +[faq]: https://invoke-ai.github.io/InvokeAI/help/FAQ/ +[contributors]: https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/ +[invoke.com]: https://www.invoke.com/about +[github issues]: https://github.com/invoke-ai/InvokeAI/issues +[docs home]: https://invoke-ai.github.io/InvokeAI +[installation docs]: https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/ +[#dev-chat]: https://discord.com/channels/1020123559063990373/1049495067846524939 +[contributing docs]: https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/ [CI checks on main badge]: https://flat.badgen.net/github/checks/invoke-ai/InvokeAI/main?label=CI%20status%20on%20main&cache=900&icon=github -[CI checks on main link]:https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain +[CI checks on main link]: https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain [discord badge]: https://flat.badgen.net/discord/members/ZmtBAhwWhy?icon=discord [discord link]: https://discord.gg/ZmtBAhwWhy [github forks badge]: https://flat.badgen.net/github/forks/invoke-ai/InvokeAI?icon=github @@ -102,6 +111,6 @@ Original portions of the software are Copyright © 2024 by respective contributo [latest commit to main badge]: https://flat.badgen.net/github/last-commit/invoke-ai/InvokeAI/main?icon=github&color=yellow&label=last%20dev%20commit&cache=900 [latest commit to main link]: https://github.com/invoke-ai/InvokeAI/commits/main [latest release badge]: https://flat.badgen.net/github/release/invoke-ai/InvokeAI/development?icon=github -[latest release link]: https://github.com/invoke-ai/InvokeAI/releases +[latest release link]: https://github.com/invoke-ai/InvokeAI/releases/latest [translation status badge]: https://hosted.weblate.org/widgets/invokeai/-/svg-badge.svg -[translation status link]: https://hosted.weblate.org/engage/invokeai/ \ No newline at end of file +[translation status link]: https://hosted.weblate.org/engage/invokeai/ From 3595beac1e1453154787d5d56eefab0d9eafe064 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Fri, 26 Apr 2024 07:30:45 +1000 Subject: [PATCH 8/9] docs: remove references to config script in CONFIGURATION.md --- docs/features/CONFIGURATION.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/features/CONFIGURATION.md b/docs/features/CONFIGURATION.md index 41f7a3ced3..d6bfe44901 100644 --- a/docs/features/CONFIGURATION.md +++ b/docs/features/CONFIGURATION.md @@ -51,13 +51,11 @@ The settings in this file will override the defaults. You only need to change this file if the default for a particular setting doesn't work for you. +You'll find an example file next to `invokeai.yaml` that shows the default values. + Some settings, like [Model Marketplace API Keys], require the YAML to be formatted correctly. Here is a [basic guide to YAML files]. -You can fix a broken `invokeai.yaml` by deleting it and running the -configuration script again -- option [6] in the launcher, "Re-run the -configure script". - #### Custom Config File Location You can use any config file with the `--config` CLI arg. Pass in the path to the `invokeai.yaml` file you want to use. From 241a1fdb57ffb6f7d1b6dc9e0007dd0c523c808a Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Sat, 27 Apr 2024 19:58:46 +1000 Subject: [PATCH 9/9] feat(mm): support sdxl ckpt inpainting models There are only a couple SDXL inpainting models, and my tests indicate they are not as good as SD1.5 inpainting, but at least we support them now. - Add the config file. This matches what is used in A1111. The only difference from the non-inpainting SDXL config is the number of in-channels. - Update the legacy config maps to use this config file. --- invokeai/backend/model_manager/probe.py | 1 + .../stable-diffusion/sd_xl_inpaint.yaml | 98 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 invokeai/configs/stable-diffusion/sd_xl_inpaint.yaml diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py index bf21a7fe7b..8f33e4b49f 100644 --- a/invokeai/backend/model_manager/probe.py +++ b/invokeai/backend/model_manager/probe.py @@ -51,6 +51,7 @@ LEGACY_CONFIGS: Dict[BaseModelType, Dict[ModelVariantType, Union[str, Dict[Sched }, BaseModelType.StableDiffusionXL: { ModelVariantType.Normal: "sd_xl_base.yaml", + ModelVariantType.Inpaint: "sd_xl_inpaint.yaml", }, BaseModelType.StableDiffusionXLRefiner: { ModelVariantType.Normal: "sd_xl_refiner.yaml", diff --git a/invokeai/configs/stable-diffusion/sd_xl_inpaint.yaml b/invokeai/configs/stable-diffusion/sd_xl_inpaint.yaml new file mode 100644 index 0000000000..eea5c15a49 --- /dev/null +++ b/invokeai/configs/stable-diffusion/sd_xl_inpaint.yaml @@ -0,0 +1,98 @@ +model: + target: sgm.models.diffusion.DiffusionEngine + params: + scale_factor: 0.13025 + disable_first_stage_autocast: True + + denoiser_config: + target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser + params: + num_idx: 1000 + + weighting_config: + target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting + scaling_config: + target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling + discretization_config: + target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization + + network_config: + target: sgm.modules.diffusionmodules.openaimodel.UNetModel + params: + adm_in_channels: 2816 + num_classes: sequential + use_checkpoint: True + in_channels: 9 + out_channels: 4 + model_channels: 320 + attention_resolutions: [4, 2] + num_res_blocks: 2 + channel_mult: [1, 2, 4] + num_head_channels: 64 + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16 + context_dim: 2048 + spatial_transformer_attn_type: softmax-xformers + legacy: False + + conditioner_config: + target: sgm.modules.GeneralConditioner + params: + emb_models: + # crossattn cond + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: hidden + layer_idx: 11 + # crossattn and vector cond + - is_trainable: False + input_key: txt + target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2 + params: + arch: ViT-bigG-14 + version: laion2b_s39b_b160k + freeze: True + layer: penultimate + always_return_pooled: True + legacy: False + # vector cond + - is_trainable: False + input_key: original_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 # multiplied by two + # vector cond + - is_trainable: False + input_key: crop_coords_top_left + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 # multiplied by two + # vector cond + - is_trainable: False + input_key: target_size_as_tuple + target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND + params: + outdim: 256 # multiplied by two + + first_stage_config: + target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + attn_type: vanilla-xformers + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: [1, 2, 4, 4] + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity \ No newline at end of file