From 8960ceb98b60fdeac8b38d1b17682de3080f4dc8 Mon Sep 17 00:00:00 2001 From: user1 Date: Tue, 23 May 2023 16:21:13 -0700 Subject: [PATCH] Added Mediapipe image processor for use as ControlNet preprocessor. Also hacked in ability to specify HF subfolder when loading ControlNet models from string. --- .../controlnet_image_processors.py | 20 +++++++++++++++++-- invokeai/app/invocations/latent.py | 15 ++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/invokeai/app/invocations/controlnet_image_processors.py b/invokeai/app/invocations/controlnet_image_processors.py index 1987381a7e..aeef02b944 100644 --- a/invokeai/app/invocations/controlnet_image_processors.py +++ b/invokeai/app/invocations/controlnet_image_processors.py @@ -84,9 +84,11 @@ CONTROLNET_DEFAULT_MODELS = [ ############################################## # ControlNetMediaPipeface, ControlNet v1.1 ############################################## - "CrucibleAI/ControlNetMediaPipeFace",# SD 2.1? - # diffusion_sd15 needs to be passed to from_pretrained() as subfolder arg # ["CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"], # SD 1.5 + # diffusion_sd15 needs to be passed to from_pretrained() as subfolder arg + # hacked t2l to split to model & subfolder if format is "model,subfolder" + "CrucibleAI/ControlNetMediaPipeFace,diffusion_sd15", # SD 1.5 + "CrucibleAI/ControlNetMediaPipeFace", # SD 2.1? ] CONTROLNET_NAME_VALUES = Literal[tuple(CONTROLNET_DEFAULT_MODELS)] @@ -403,3 +405,17 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvoca # processed_image = zoe_depth_processor(image) # return processed_image + +class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig): + """Applies mediapipe face processing to image""" + # fmt: off + type: Literal["mediapipe_face_processor"] = "mediapipe_face_processor" + # Inputs + max_faces: int = Field(default=1, ge=1, description="maximum number of faces to detect") + min_confidence: float = Field(default=0.5, ge=0, le=1, description="minimum confidence for face detection") + # fmt: on + + def run_processor(self, image): + mediapipe_face_processor = MediapipeFaceDetector() + processed_image = mediapipe_face_processor(image) + return processed_image diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 98d56f575e..0bda8fb5d7 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -277,8 +277,19 @@ class TextToLatentsInvocation(BaseInvocation): control_models = [] for control_info in control_list: # handle control models - control_model = ControlNetModel.from_pretrained(control_info.control_model, - torch_dtype=model.unet.dtype).to(model.device) + if ("," in control_info.control_model): + control_model_split = control_info.control_model.split(",") + control_name = control_model_split[0] + control_subfolder = control_model_split[1] + print("Using HF model subfolders") + print(" control_name: ", control_name) + print(" control_subfolder: ", control_subfolder) + control_model = ControlNetModel.from_pretrained(control_name, + subfolder=control_subfolder, + torch_dtype=model.unet.dtype).to(model.device) + else: + control_model = ControlNetModel.from_pretrained(control_info.control_model, + torch_dtype=model.unet.dtype).to(model.device) control_models.append(control_model) control_image_field = control_info.image input_image = context.services.images.get(control_image_field.image_type, control_image_field.image_name)