From 01d8ab04a56c932b68f59df5add0652f25a5c428 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 21 Mar 2024 18:17:53 +1100 Subject: [PATCH] feat(nodes): add missing detect_resolution to processors Some processors, like Canny, didn't use `detect_resolution`. The resultant control images were then resized by the processors from 512x512 to the desired dimensions. The result is that the control images are the right size, but very low quality. Using detect_resolution fixes this. --- .../controlnet_image_processors.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 8db1f09866..ed56344847 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -171,11 +171,12 @@ class ImageProcessorInvocation(BaseInvocation, WithMetadata, WithBoard): title="Canny Processor", tags=["controlnet", "canny"], category="controlnet", - version="1.3.1", + version="1.3.2", ) class CannyImageProcessorInvocation(ImageProcessorInvocation): """Canny edge detection for ControlNet""" + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) low_threshold: int = InputField( default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)" @@ -195,6 +196,7 @@ class CannyImageProcessorInvocation(ImageProcessorInvocation): self.low_threshold, self.high_threshold, image_resolution=self.image_resolution, + detect_resolution=self.detect_resolution, ) return processed_image @@ -278,13 +280,14 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation): title="Midas Depth Processor", tags=["controlnet", "midas"], category="controlnet", - version="1.2.2", + version="1.2.3", ) class MidasDepthImageProcessorInvocation(ImageProcessorInvocation): """Applies Midas depth processing to image""" a_mult: float = InputField(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)") bg_th: float = InputField(default=0.1, ge=0, description="Midas parameter `bg_th`") + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) # depth_and_normal not supported in controlnet_aux v0.0.3 # depth_and_normal: bool = InputField(default=False, description="whether to use depth and normal mode") @@ -296,6 +299,7 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation): a=np.pi * self.a_mult, bg_th=self.bg_th, image_resolution=self.image_resolution, + detect_resolution=self.detect_resolution, # dept_and_normal not supported in controlnet_aux v0.0.3 # depth_and_normal=self.depth_and_normal, ) @@ -420,19 +424,24 @@ class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation): title="Mediapipe Face Processor", tags=["controlnet", "mediapipe", "face"], category="controlnet", - version="1.2.2", + version="1.2.3", ) class MediapipeFaceProcessorInvocation(ImageProcessorInvocation): """Applies mediapipe face processing to image""" max_faces: int = InputField(default=1, ge=1, description="Maximum number of faces to detect") min_confidence: float = InputField(default=0.5, ge=0, le=1, description="Minimum confidence for face detection") + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) def run_processor(self, image): mediapipe_face_processor = MediapipeFaceDetector() processed_image = mediapipe_face_processor( - image, max_faces=self.max_faces, min_confidence=self.min_confidence, image_resolution=self.image_resolution + image, + max_faces=self.max_faces, + min_confidence=self.min_confidence, + image_resolution=self.image_resolution, + detect_resolution=self.detect_resolution, ) return processed_image @@ -511,11 +520,12 @@ class TileResamplerProcessorInvocation(ImageProcessorInvocation): title="Segment Anything Processor", tags=["controlnet", "segmentanything"], category="controlnet", - version="1.2.2", + version="1.2.3", ) class SegmentAnythingProcessorInvocation(ImageProcessorInvocation): """Applies segment anything processing to image""" + detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res) image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res) def run_processor(self, image): @@ -524,7 +534,9 @@ class SegmentAnythingProcessorInvocation(ImageProcessorInvocation): "ybelkada/segment-anything", subfolder="checkpoints" ) np_img = np.array(image, dtype=np.uint8) - processed_image = segment_anything_processor(np_img, image_resolution=self.image_resolution) + processed_image = segment_anything_processor( + np_img, image_resolution=self.image_resolution, detect_resolution=self.detect_resolution + ) return processed_image