From 10c3753d7f9ed7cf2415091160f08ee45e30a681 Mon Sep 17 00:00:00 2001 From: user1 Date: Sun, 25 Jun 2023 11:16:39 -0700 Subject: [PATCH 01/10] Added SAM preprocessor --- .../app/invocations/controlnet_image_processors.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index baf558ac24..0fdfc12905 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -29,6 +29,7 @@ from controlnet_aux import ( ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, + SamDetector, ) from .image import ImageOutput, PILInvocationConfig @@ -455,3 +456,15 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo mediapipe_face_processor = MediapipeFaceDetector() processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence) return processed_image + + +class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): + """Applies segment anything processing to image""" + # fmt: off + type: Literal["segment_anything_processor"] = "segment_anything_processor" + # fmt: on + + def run_processor(self, image): + segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints") + processed_image = segment_anything_processor(image) + return processed_image From de4064bdac1d80ec3aac496b3727854094a42d8f Mon Sep 17 00:00:00 2001 From: user1 Date: Sun, 25 Jun 2023 12:38:17 -0700 Subject: [PATCH 02/10] Fixed problem with with non-reproducible results from ControlNet SegmentAnything preprocessor. Cause was controlnet_aux randomization of segmentation coloring, which seems to lead to some randomization of resulting images using ControlNet seg model. Switched to using deterministic ADE20K color palette instead, which solved the problem. --- .../controlnet_image_processors.py | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 0fdfc12905..c5777284a5 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -4,7 +4,7 @@ from builtins import float, bool import numpy as np -from typing import Literal, Optional, Union, List +from typing import Literal, Optional, Union, List, Dict from PIL import Image, ImageFilter, ImageOps from pydantic import BaseModel, Field, validator @@ -32,6 +32,9 @@ from controlnet_aux import ( SamDetector, ) +from controlnet_aux.util import ade_palette + + from .image import ImageOutput, PILInvocationConfig CONTROLNET_DEFAULT_MODELS = [ @@ -465,6 +468,35 @@ class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocation # fmt: on def run_processor(self, image): - segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints") + # segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints") + segment_anything_processor = SamDetectorReproducibleColors.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints") processed_image = segment_anything_processor(image) return processed_image + +class SamDetectorReproducibleColors(SamDetector): + + # overriding SamDetector.show_anns() method to use reproducible colors for segmentation image + # base class show_anns() method randomizes colors, + # which seems to also lead to non-reproducible image generation + # so using ADE20k color palette instead + def show_anns(self, anns: List[Dict]): + if len(anns) == 0: + return + sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True) + h, w = anns[0]['segmentation'].shape + final_img = Image.fromarray(np.zeros((h, w, 3), dtype=np.uint8), mode="RGB") + print("number of annotations: ", len(sorted_anns)) + print("type of annotations: ", type(sorted_anns)) + palette = ade_palette() + for i, ann in enumerate(sorted_anns): + m = ann['segmentation'] + img = np.empty((m.shape[0], m.shape[1], 3), dtype=np.uint8) + # doing modulo just in case number of annotated regions exceeds number of colors in palette + ann_color = palette[i % len(palette)] + print(ann_color) + img[:, :, 0] = ann_color[0] + img[:, :, 1] = ann_color[1] + img[:, :, 2] = ann_color[2] + final_img.paste(Image.fromarray(img, mode="RGB"), (0, 0), Image.fromarray(np.uint8(m * 255))) + + return np.array(final_img, dtype=np.uint8) From b872e7a5e0ec7c1cb1c7ef5ca31065bce9cd74af Mon Sep 17 00:00:00 2001 From: user1 Date: Sun, 25 Jun 2023 12:54:48 -0700 Subject: [PATCH 03/10] Simplifying ControlNet SAM preprocessor segmentation color mapping. --- invokeai/app/invocations/controlnet_image_processors.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index c5777284a5..d7825111d3 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -485,18 +485,12 @@ class SamDetectorReproducibleColors(SamDetector): sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True) h, w = anns[0]['segmentation'].shape final_img = Image.fromarray(np.zeros((h, w, 3), dtype=np.uint8), mode="RGB") - print("number of annotations: ", len(sorted_anns)) - print("type of annotations: ", type(sorted_anns)) palette = ade_palette() for i, ann in enumerate(sorted_anns): m = ann['segmentation'] img = np.empty((m.shape[0], m.shape[1], 3), dtype=np.uint8) # doing modulo just in case number of annotated regions exceeds number of colors in palette ann_color = palette[i % len(palette)] - print(ann_color) - img[:, :, 0] = ann_color[0] - img[:, :, 1] = ann_color[1] - img[:, :, 2] = ann_color[2] + img[:, :] = ann_color final_img.paste(Image.fromarray(img, mode="RGB"), (0, 0), Image.fromarray(np.uint8(m * 255))) - return np.array(final_img, dtype=np.uint8) From 414a04774c79f980fb20a9a7da4d9d738737c36a Mon Sep 17 00:00:00 2001 From: user1 Date: Sun, 25 Jun 2023 14:19:55 -0700 Subject: [PATCH 04/10] Added LeReS ControlNet image preprocessor. --- .../controlnet_image_processors.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index d7825111d3..f573c17c0d 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -30,6 +30,7 @@ from controlnet_aux import ( ZoeDetector, MediapipeFaceDetector, SamDetector, + LeresDetector, ) from controlnet_aux.util import ade_palette @@ -460,6 +461,27 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence) return processed_image +class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): + """Applies leres processing to image""" + # fmt: off + type: Literal["leres_image_processor"] = "leres_image_processor" + # Inputs + thr_a: float = Field(default=0, description="Leres parameter `thr_a`") + thr_b: float = Field(default=0, description="Leres parameter `thr_b`") + boost: bool = Field(default=False, description="Whether to use boost mode") + detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection") + image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image") + # fmt: on + + def run_processor(self, image): + leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators") + processed_image = leres_processor(image, + thr_a=self.thr_a, + thr_b=self.thr_b, + boost=self.boost, + detect_resolution=self.detect_resolution, + image_resolution=self.image_resolution) + return processed_image class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): """Applies segment anything processing to image""" From 45aa338a9814a3df1a97efd02b5d45553c5b87ab Mon Sep 17 00:00:00 2001 From: user1 Date: Sun, 25 Jun 2023 14:22:34 -0700 Subject: [PATCH 05/10] Changed pyproject.toml to require controlnet_aux >= 0.0.5 (to enable use of SAM ControlNet preprocessor) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 03396312ac..d470b76937 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "click", "clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel>=1.2.1", - "controlnet-aux>=0.0.4", + "controlnet-aux>=0.0.5", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 "datasets", "diffusers[torch]~=0.17.1", From 10e8389fa4ab702899264c88c0613bed95e06cb0 Mon Sep 17 00:00:00 2001 From: user1 Date: Sun, 25 Jun 2023 14:25:14 -0700 Subject: [PATCH 06/10] Commenting out LeReS ControlNet image preprocessor until release of controlnet_aux v0.0.6 (supported on controlnet_aux current main, but not on latest release v0.0.5) --- .../controlnet_image_processors.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index f573c17c0d..2bd0a5cf04 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -30,7 +30,7 @@ from controlnet_aux import ( ZoeDetector, MediapipeFaceDetector, SamDetector, - LeresDetector, + # LeresDetector, ) from controlnet_aux.util import ade_palette @@ -461,27 +461,27 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence) return processed_image -class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): - """Applies leres processing to image""" - # fmt: off - type: Literal["leres_image_processor"] = "leres_image_processor" - # Inputs - thr_a: float = Field(default=0, description="Leres parameter `thr_a`") - thr_b: float = Field(default=0, description="Leres parameter `thr_b`") - boost: bool = Field(default=False, description="Whether to use boost mode") - detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection") - image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image") - # fmt: on - - def run_processor(self, image): - leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators") - processed_image = leres_processor(image, - thr_a=self.thr_a, - thr_b=self.thr_b, - boost=self.boost, - detect_resolution=self.detect_resolution, - image_resolution=self.image_resolution) - return processed_image +# class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): +# """Applies leres processing to image""" +# # fmt: off +# type: Literal["leres_image_processor"] = "leres_image_processor" +# # Inputs +# thr_a: float = Field(default=0, description="Leres parameter `thr_a`") +# thr_b: float = Field(default=0, description="Leres parameter `thr_b`") +# boost: bool = Field(default=False, description="Whether to use boost mode") +# detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection") +# image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image") +# # fmt: on +# +# def run_processor(self, image): +# leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators") +# processed_image = leres_processor(image, +# thr_a=self.thr_a, +# thr_b=self.thr_b, +# boost=self.boost, +# detect_resolution=self.detect_resolution, +# image_resolution=self.image_resolution) +# return processed_image class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): """Applies segment anything processing to image""" From 873c18bc4b6b1645e729bb920c9e966f6008042a Mon Sep 17 00:00:00 2001 From: user1 Date: Mon, 26 Jun 2023 04:27:26 -0700 Subject: [PATCH 07/10] Added TileResampler ControlNet preprocessor node. Also fixes to SegmentAnything ControlNet preprocessor node. --- .../controlnet_image_processors.py | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 2bd0a5cf04..870f14dc27 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -1,8 +1,9 @@ -# InvokeAI nodes for ControlNet image preprocessors +# Invocations for ControlNet image preprocessors # initial implementation by Gregg Helt, 2023 # heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux from builtins import float, bool +import cv2 import numpy as np from typing import Literal, Optional, Union, List, Dict from PIL import Image, ImageFilter, ImageOps @@ -33,7 +34,7 @@ from controlnet_aux import ( # LeresDetector, ) -from controlnet_aux.util import ade_palette +from controlnet_aux.util import HWC3, ade_palette from .image import ImageOutput, PILInvocationConfig @@ -483,6 +484,43 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo # image_resolution=self.image_resolution) # return processed_image + +class TileResamplerProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): + + # fmt: off + type: Literal["tile_image_processor"] = "tile_image_processor" + # Inputs + #res: int = Field(default=512, ge=0, le=1024, description="The pixel resolution for each tile") + down_sampling_rate: float = Field(default=1.0, ge=1.0, le=8.0, description="Down sampling rate") + # fmt: on + + # tile_resample copied from sd-webui-controlnet/scripts/processor.py + def tile_resample(self, + np_img: np.ndarray, + res=512, # never used? + down_sampling_rate=1.0, + ): + np_img = HWC3(np_img) + if down_sampling_rate < 1.1: + return np_img + H, W, C = np_img.shape + H = int(float(H) / float(down_sampling_rate)) + W = int(float(W) / float(down_sampling_rate)) + np_img = cv2.resize(np_img, (W, H), interpolation=cv2.INTER_AREA) + return np_img + + def run_processor(self, img): + np_img = np.array(img, dtype=np.uint8) + processed_np_image = self.tile_resample(np_img, + #res=self.tile_size, + down_sampling_rate=self.down_sampling_rate + ) + processed_image = Image.fromarray(processed_np_image) + return processed_image + + + + class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): """Applies segment anything processing to image""" # fmt: off @@ -492,7 +530,8 @@ class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocation def run_processor(self, image): # segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints") segment_anything_processor = SamDetectorReproducibleColors.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints") - processed_image = segment_anything_processor(image) + np_img = np.array(image, dtype=np.uint8) + processed_image = segment_anything_processor(np_img) return processed_image class SamDetectorReproducibleColors(SamDetector): From af566adf566402b8522a92562b1e7a73891d1649 Mon Sep 17 00:00:00 2001 From: user1 Date: Mon, 26 Jun 2023 04:29:43 -0700 Subject: [PATCH 08/10] For MediapipeFace ControlNet preprocessor, if input image is RGBA format then convert to RGB (otherwise MediapipeFace image processing throws an error) --- invokeai/app/invocations/controlnet_image_processors.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 870f14dc27..8c354d9908 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -458,6 +458,10 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo # fmt: on def run_processor(self, image): + # MediaPipeFaceDetector throws an error if image has alpha channel + # so convert to RGB if needed + if image.mode == 'RGBA': + image = image.convert('RGB') mediapipe_face_processor = MediapipeFaceDetector() processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence) return processed_image From 10d2d85c83718ed79c1b933c539b09bfd072fe66 Mon Sep 17 00:00:00 2001 From: user1 Date: Mon, 26 Jun 2023 12:03:05 -0700 Subject: [PATCH 09/10] Started to add ControlNet resize_crop and resize_fill options, but commented out, not ready to deploy yet. --- invokeai/app/invocations/controlnet_image_processors.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 8c354d9908..01deebc9fa 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -101,6 +101,9 @@ CONTROLNET_DEFAULT_MODELS = [ CONTROLNET_NAME_VALUES = Literal[tuple(CONTROLNET_DEFAULT_MODELS)] CONTROLNET_MODE_VALUES = Literal[tuple(["balanced", "more_prompt", "more_control", "unbalanced"])] +# crop and fill options not ready yet +# CONTROLNET_RESIZE_VALUES = Literal[tuple(["just_resize", "crop_resize", "fill_resize"])] + class ControlField(BaseModel): image: ImageField = Field(default=None, description="The control image") @@ -111,7 +114,8 @@ class ControlField(BaseModel): description="When the ControlNet is first applied (% of total steps)") end_step_percent: float = Field(default=1, ge=0, le=1, description="When the ControlNet is last applied (% of total steps)") - control_mode: CONTROLNET_MODE_VALUES = Field(default="balanced", description="The contorl mode to use") + control_mode: CONTROLNET_MODE_VALUES = Field(default="balanced", description="The control mode to use") + # resize_mode: CONTROLNET_RESIZE_VALUES = Field(default="just_resize", description="The resize mode to use") @validator("control_weight") def abs_le_one(cls, v): @@ -186,7 +190,7 @@ class ControlNetInvocation(BaseInvocation): ), ) -# TODO: move image processors to separate file (image_analysis.py + class ImageProcessorInvocation(BaseInvocation, PILInvocationConfig): """Base class for invocations that preprocess images for ControlNet""" From fc322aa9f7f4a65b9533c2ba18eb67e2d0da859a Mon Sep 17 00:00:00 2001 From: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Date: Tue, 27 Jun 2023 23:45:47 -0400 Subject: [PATCH 10/10] Update controlnet-aux to 0.0.6 and add LeReS --- .../controlnet_image_processors.py | 44 +++++++++---------- pyproject.toml | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 01deebc9fa..8cfe35598d 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -31,7 +31,7 @@ from controlnet_aux import ( ZoeDetector, MediapipeFaceDetector, SamDetector, - # LeresDetector, + LeresDetector, ) from controlnet_aux.util import HWC3, ade_palette @@ -470,27 +470,27 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence) return processed_image -# class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): -# """Applies leres processing to image""" -# # fmt: off -# type: Literal["leres_image_processor"] = "leres_image_processor" -# # Inputs -# thr_a: float = Field(default=0, description="Leres parameter `thr_a`") -# thr_b: float = Field(default=0, description="Leres parameter `thr_b`") -# boost: bool = Field(default=False, description="Whether to use boost mode") -# detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection") -# image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image") -# # fmt: on -# -# def run_processor(self, image): -# leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators") -# processed_image = leres_processor(image, -# thr_a=self.thr_a, -# thr_b=self.thr_b, -# boost=self.boost, -# detect_resolution=self.detect_resolution, -# image_resolution=self.image_resolution) -# return processed_image +class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): + """Applies leres processing to image""" + # fmt: off + type: Literal["leres_image_processor"] = "leres_image_processor" + # Inputs + thr_a: float = Field(default=0, description="Leres parameter `thr_a`") + thr_b: float = Field(default=0, description="Leres parameter `thr_b`") + boost: bool = Field(default=False, description="Whether to use boost mode") + detect_resolution: int = Field(default=512, ge=0, description="The pixel resolution for detection") + image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image") + # fmt: on + + def run_processor(self, image): + leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators") + processed_image = leres_processor(image, + thr_a=self.thr_a, + thr_b=self.thr_b, + boost=self.boost, + detect_resolution=self.detect_resolution, + image_resolution=self.image_resolution) + return processed_image class TileResamplerProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): diff --git a/pyproject.toml b/pyproject.toml index d470b76937..6e5b8f4e22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "click", "clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel>=1.2.1", - "controlnet-aux>=0.0.5", + "controlnet-aux>=0.0.6", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 "datasets", "diffusers[torch]~=0.17.1",