Merge branch 'main' into release/make-web-dist-startable

2025-07-25 21:05:37 +00:00 · 2023-05-28 19:46:09 -04:00
parent 36b2f12219 9a796364da
commit d37b08a7dd
53 changed files with 1985 additions and 41 deletions
--- a/invokeai/app/invocations/baseinvocation.py
+++ b/invokeai/app/invocations/baseinvocation.py
@ -96,6 +96,7 @@ class UIConfig(TypedDict, total=False):
            "image",
            "latents",
            "model",
+            "control",
        ],
    ]
    tags: List[str]
--- a/invokeai/app/invocations/collections.py
+++ b/invokeai/app/invocations/collections.py
@ -22,6 +22,14 @@ class IntCollectionOutput(BaseInvocationOutput):
    # Outputs
    collection: list[int] = Field(default=[], description="The int collection")

+class FloatCollectionOutput(BaseInvocationOutput):
+    """A collection of floats"""
+
+    type: Literal["float_collection"] = "float_collection"
+
+    # Outputs
+    collection: list[float] = Field(default=[], description="The float collection")
+

 class RangeInvocation(BaseInvocation):
    """Creates a range of numbers from start to stop with step"""
--- a/invokeai/app/invocations/controlnet_image_processors.py
+++ b/invokeai/app/invocations/controlnet_image_processors.py
@ -0,0 +1,428 @@
+# InvokeAI nodes for ControlNet image preprocessors
+# initial implementation by Gregg Helt, 2023
+# heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux
+
+import numpy as np
+from typing import Literal, Optional, Union, List
+from PIL import Image, ImageFilter, ImageOps
+from pydantic import BaseModel, Field
+
+from ..models.image import ImageField, ImageType, ImageCategory
+from .baseinvocation import (
+    BaseInvocation,
+    BaseInvocationOutput,
+    InvocationContext,
+    InvocationConfig,
+)
+from controlnet_aux import (
+    CannyDetector,
+    HEDdetector,
+    LineartDetector,
+    LineartAnimeDetector,
+    MidasDetector,
+    MLSDdetector,
+    NormalBaeDetector,
+    OpenposeDetector,
+    PidiNetDetector,
+    ContentShuffleDetector,
+    ZoeDetector,
+    MediapipeFaceDetector,
+)
+
+from .image import ImageOutput, PILInvocationConfig
+
+CONTROLNET_DEFAULT_MODELS = [
+    ###########################################
+    # lllyasviel sd v1.5, ControlNet v1.0 models
+    ##############################################
+    "lllyasviel/sd-controlnet-canny",
+    "lllyasviel/sd-controlnet-depth",
+    "lllyasviel/sd-controlnet-hed",
+    "lllyasviel/sd-controlnet-seg",
+    "lllyasviel/sd-controlnet-openpose",
+    "lllyasviel/sd-controlnet-scribble",
+    "lllyasviel/sd-controlnet-normal",
+    "lllyasviel/sd-controlnet-mlsd",
+
+    #############################################
+    # lllyasviel sd v1.5, ControlNet v1.1 models
+    #############################################
+    "lllyasviel/control_v11p_sd15_canny",
+    "lllyasviel/control_v11p_sd15_openpose",
+    "lllyasviel/control_v11p_sd15_seg",
+    # "lllyasviel/control_v11p_sd15_depth",  # broken
+    "lllyasviel/control_v11f1p_sd15_depth",
+    "lllyasviel/control_v11p_sd15_normalbae",
+    "lllyasviel/control_v11p_sd15_scribble",
+    "lllyasviel/control_v11p_sd15_mlsd",
+    "lllyasviel/control_v11p_sd15_softedge",
+    "lllyasviel/control_v11p_sd15s2_lineart_anime",
+    "lllyasviel/control_v11p_sd15_lineart",
+    "lllyasviel/control_v11p_sd15_inpaint",
+    # "lllyasviel/control_v11u_sd15_tile",
+    # problem (temporary?) with huffingface "lllyasviel/control_v11u_sd15_tile",
+    # so for now replace  "lllyasviel/control_v11f1e_sd15_tile",
+    "lllyasviel/control_v11e_sd15_shuffle",
+    "lllyasviel/control_v11e_sd15_ip2p",
+    "lllyasviel/control_v11f1e_sd15_tile",
+
+     #################################################
+     #  thibaud sd v2.1 models (ControlNet v1.0? or v1.1?
+     ##################################################
+     "thibaud/controlnet-sd21-openpose-diffusers",
+     "thibaud/controlnet-sd21-canny-diffusers",
+     "thibaud/controlnet-sd21-depth-diffusers",
+     "thibaud/controlnet-sd21-scribble-diffusers",
+     "thibaud/controlnet-sd21-hed-diffusers",
+     "thibaud/controlnet-sd21-zoedepth-diffusers",
+     "thibaud/controlnet-sd21-color-diffusers",
+     "thibaud/controlnet-sd21-openposev2-diffusers",
+     "thibaud/controlnet-sd21-lineart-diffusers",
+     "thibaud/controlnet-sd21-normalbae-diffusers",
+     "thibaud/controlnet-sd21-ade20k-diffusers",
+
+     ##############################################
+     #  ControlNetMediaPipeface, ControlNet v1.1
+     ##############################################
+     # ["CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"],  # SD 1.5
+     #    diffusion_sd15 needs to be passed to from_pretrained() as subfolder arg
+     #    hacked t2l to split to model & subfolder if format is "model,subfolder"
+     "CrucibleAI/ControlNetMediaPipeFace,diffusion_sd15",  # SD 1.5
+     "CrucibleAI/ControlNetMediaPipeFace",  # SD 2.1?
+]
+
+CONTROLNET_NAME_VALUES = Literal[tuple(CONTROLNET_DEFAULT_MODELS)]
+
+class ControlField(BaseModel):
+    image: ImageField = Field(default=None, description="processed image")
+    control_model: Optional[str] = Field(default=None, description="control model used")
+    control_weight: Optional[float] = Field(default=1, description="weight given to controlnet")
+    begin_step_percent: float = Field(default=0, ge=0, le=1,
+                                                description="% of total steps at which controlnet is first applied")
+    end_step_percent: float = Field(default=1, ge=0, le=1,
+                                    description="% of total steps at which controlnet is last applied")
+
+    class Config:
+        schema_extra = {
+            "required": ["image", "control_model", "control_weight", "begin_step_percent", "end_step_percent"]
+        }
+
+
+class ControlOutput(BaseInvocationOutput):
+    """node output for ControlNet info"""
+    # fmt: off
+    type: Literal["control_output"] = "control_output"
+    control: ControlField = Field(default=None, description="The control info dict")
+    # fmt: on
+
+
+class ControlNetInvocation(BaseInvocation):
+    """Collects ControlNet info to pass to other nodes"""
+    # fmt: off
+    type: Literal["controlnet"] = "controlnet"
+    # Inputs
+    image: ImageField = Field(default=None, description="image to process")
+    control_model: CONTROLNET_NAME_VALUES = Field(default="lllyasviel/sd-controlnet-canny",
+                                                  description="control model used")
+    control_weight: float = Field(default=1.0, ge=0, le=1, description="weight given to controlnet")
+    # TODO: add support in backend core for begin_step_percent, end_step_percent, guess_mode
+    begin_step_percent: float = Field(default=0, ge=0, le=1,
+                                        description="% of total steps at which controlnet is first applied")
+    end_step_percent: float = Field(default=1, ge=0, le=1,
+                                      description="% of total steps at which controlnet is last applied")
+    # fmt: on
+
+
+    def invoke(self, context: InvocationContext) -> ControlOutput:
+
+        return ControlOutput(
+            control=ControlField(
+                image=self.image,
+                control_model=self.control_model,
+                control_weight=self.control_weight,
+                begin_step_percent=self.begin_step_percent,
+                end_step_percent=self.end_step_percent,
+            ),
+        )
+
+# TODO: move image processors to separate file (image_analysis.py
+class ImageProcessorInvocation(BaseInvocation, PILInvocationConfig):
+    """Base class for invocations that preprocess images for ControlNet"""
+
+    # fmt: off
+    type: Literal["image_processor"] = "image_processor"
+    # Inputs
+    image: ImageField = Field(default=None, description="image to process")
+    # fmt: on
+
+
+    def run_processor(self, image):
+        # superclass just passes through image without processing
+        return image
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+
+        raw_image = context.services.images.get_pil_image(
+            self.image.image_type, self.image.image_name
+        )
+        # image type should be PIL.PngImagePlugin.PngImageFile ?
+        processed_image = self.run_processor(raw_image)
+
+        # FIXME: what happened to image metadata?
+        # metadata = context.services.metadata.build_metadata(
+        #     session_id=context.graph_execution_state_id, node=self
+        # )
+
+        # currently can't see processed image in node UI without a showImage node,
+        #    so for now setting image_type to RESULT instead of INTERMEDIATE so will get saved in gallery
+        image_dto = context.services.images.create(
+            image=processed_image,
+            image_type=ImageType.RESULT,
+            image_category=ImageCategory.GENERAL,
+            session_id=context.graph_execution_state_id,
+            node_id=self.id,
+            is_intermediate=self.is_intermediate
+        )
+
+        """Builds an ImageOutput and its ImageField"""
+        processed_image_field = ImageField(
+            image_name=image_dto.image_name,
+            image_type=image_dto.image_type,
+        )
+        return ImageOutput(
+            image=processed_image_field,
+            # width=processed_image.width,
+            width = image_dto.width,
+            # height=processed_image.height,
+            height = image_dto.height,
+            # mode=processed_image.mode,
+        )
+
+
+class CannyImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Canny edge detection for ControlNet"""
+    # fmt: off
+    type: Literal["canny_image_processor"] = "canny_image_processor"
+    # Input
+    low_threshold: float = Field(default=100, ge=0, description="low threshold of Canny pixel gradient")
+    high_threshold: float = Field(default=200, ge=0, description="high threshold of Canny pixel gradient")
+    # fmt: on
+
+    def run_processor(self, image):
+        canny_processor = CannyDetector()
+        processed_image = canny_processor(image, self.low_threshold, self.high_threshold)
+        return processed_image
+
+
+class HedImageprocessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies HED edge detection to image"""
+    # fmt: off
+    type: Literal["hed_image_processor"] = "hed_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    # safe not supported in controlnet_aux v0.0.3
+    # safe: bool = Field(default=False, description="whether to use safe mode")
+    scribble: bool = Field(default=False, description="whether to use scribble mode")
+    # fmt: on
+
+    def run_processor(self, image):
+        hed_processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = hed_processor(image,
+                                        detect_resolution=self.detect_resolution,
+                                        image_resolution=self.image_resolution,
+                                        # safe not supported in controlnet_aux v0.0.3
+                                        # safe=self.safe,
+                                        scribble=self.scribble,
+                                        )
+        return processed_image
+
+
+class LineartImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies line art processing to image"""
+    # fmt: off
+    type: Literal["lineart_image_processor"] = "lineart_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    coarse: bool = Field(default=False, description="whether to use coarse mode")
+    # fmt: on
+
+    def run_processor(self, image):
+        lineart_processor = LineartDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = lineart_processor(image,
+                                            detect_resolution=self.detect_resolution,
+                                            image_resolution=self.image_resolution,
+                                            coarse=self.coarse)
+        return processed_image
+
+
+class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies line art anime processing to image"""
+    # fmt: off
+    type: Literal["lineart_anime_image_processor"] = "lineart_anime_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    # fmt: on
+
+    def run_processor(self, image):
+        processor = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = processor(image,
+                                    detect_resolution=self.detect_resolution,
+                                    image_resolution=self.image_resolution,
+                                    )
+        return processed_image
+
+
+class OpenposeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies Openpose processing to image"""
+    # fmt: off
+    type: Literal["openpose_image_processor"] = "openpose_image_processor"
+    # Inputs
+    hand_and_face: bool = Field(default=False, description="whether to use hands and face mode")
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    # fmt: on
+
+    def run_processor(self, image):
+        openpose_processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = openpose_processor(image,
+                                             detect_resolution=self.detect_resolution,
+                                             image_resolution=self.image_resolution,
+                                             hand_and_face=self.hand_and_face,
+                                             )
+        return processed_image
+
+
+class MidasDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies Midas depth processing to image"""
+    # fmt: off
+    type: Literal["midas_depth_image_processor"] = "midas_depth_image_processor"
+    # Inputs
+    a_mult: float = Field(default=2.0, ge=0, description="Midas parameter a = amult * PI")
+    bg_th: float = Field(default=0.1, ge=0, description="Midas parameter bg_th")
+    # depth_and_normal not supported in controlnet_aux v0.0.3
+    # depth_and_normal: bool = Field(default=False, description="whether to use depth and normal mode")
+    # fmt: on
+
+    def run_processor(self, image):
+        midas_processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = midas_processor(image,
+                                          a=np.pi * self.a_mult,
+                                          bg_th=self.bg_th,
+                                          # dept_and_normal not supported in controlnet_aux v0.0.3
+                                          # depth_and_normal=self.depth_and_normal,
+                                          )
+        return processed_image
+
+
+class NormalbaeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies NormalBae processing to image"""
+    # fmt: off
+    type: Literal["normalbae_image_processor"] = "normalbae_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    # fmt: on
+
+    def run_processor(self, image):
+        normalbae_processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = normalbae_processor(image,
+                                              detect_resolution=self.detect_resolution,
+                                              image_resolution=self.image_resolution)
+        return processed_image
+
+
+class MlsdImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies MLSD processing to image"""
+    # fmt: off
+    type: Literal["mlsd_image_processor"] = "mlsd_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    thr_v: float = Field(default=0.1, ge=0, description="MLSD parameter thr_v")
+    thr_d: float = Field(default=0.1, ge=0, description="MLSD parameter thr_d")
+    # fmt: on
+
+    def run_processor(self, image):
+        mlsd_processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = mlsd_processor(image,
+                                         detect_resolution=self.detect_resolution,
+                                         image_resolution=self.image_resolution,
+                                         thr_v=self.thr_v,
+                                         thr_d=self.thr_d)
+        return processed_image
+
+
+class PidiImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies PIDI processing to image"""
+    # fmt: off
+    type: Literal["pidi_image_processor"] = "pidi_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    safe: bool = Field(default=False, description="whether to use safe mode")
+    scribble: bool = Field(default=False, description="whether to use scribble mode")
+    # fmt: on
+
+    def run_processor(self, image):
+        pidi_processor = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = pidi_processor(image,
+                                         detect_resolution=self.detect_resolution,
+                                         image_resolution=self.image_resolution,
+                                         safe=self.safe,
+                                         scribble=self.scribble)
+        return processed_image
+
+
+class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies content shuffle processing to image"""
+    # fmt: off
+    type: Literal["content_shuffle_image_processor"] = "content_shuffle_image_processor"
+    # Inputs
+    detect_resolution: int = Field(default=512, ge=0, description="pixel resolution for edge detection")
+    image_resolution: int = Field(default=512, ge=0, description="pixel resolution for output image")
+    h: Union[int | None] = Field(default=512, ge=0, description="content shuffle h parameter")
+    w: Union[int | None] = Field(default=512, ge=0, description="content shuffle w parameter")
+    f: Union[int | None] = Field(default=256, ge=0, description="cont")
+    # fmt: on
+
+    def run_processor(self, image):
+        content_shuffle_processor = ContentShuffleDetector()
+        processed_image = content_shuffle_processor(image,
+                                                    detect_resolution=self.detect_resolution,
+                                                    image_resolution=self.image_resolution,
+                                                    h=self.h,
+                                                    w=self.w,
+                                                    f=self.f
+                                                    )
+        return processed_image
+
+
+# should work with controlnet_aux >= 0.0.4 and timm <= 0.6.13
+class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies Zoe depth processing to image"""
+    # fmt: off
+    type: Literal["zoe_depth_image_processor"] = "zoe_depth_image_processor"
+    # fmt: on
+
+    def run_processor(self, image):
+        zoe_depth_processor = ZoeDetector.from_pretrained("lllyasviel/Annotators")
+        processed_image = zoe_depth_processor(image)
+        return processed_image
+
+
+class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
+    """Applies mediapipe face processing to image"""
+    # fmt: off
+    type: Literal["mediapipe_face_processor"] = "mediapipe_face_processor"
+    # Inputs
+    max_faces: int = Field(default=1, ge=1, description="maximum number of faces to detect")
+    min_confidence: float = Field(default=0.5, ge=0, le=1, description="minimum confidence for face detection")
+    # fmt: on
+
+    def run_processor(self, image):
+        mediapipe_face_processor = MediapipeFaceDetector()
+        processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence)
+        return processed_image
--- a/invokeai/app/invocations/generate.py
+++ b/invokeai/app/invocations/generate.py
@ -4,7 +4,9 @@ from functools import partial
 from typing import Literal, Optional, Union, get_args

 import numpy as np
+from diffusers import ControlNetModel
 from torch import Tensor
+import torch

 from pydantic import BaseModel, Field

@ -58,6 +60,9 @@ class TextToImageInvocation(BaseInvocation, SDImageInvocation):
    cfg_scale: float = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
    scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
    model:       str = Field(default="", description="The model to use (currently ignored)")
+    progress_images: bool = Field(default=False, description="Whether or not to produce progress images during generation",  )
+    control_model: Optional[str] = Field(default=None, description="The control model to use")
+    control_image: Optional[ImageField] = Field(default=None, description="The processed control image")
    # fmt: on

    # TODO: pass this an emitter method or something? or a session for dispatching?
@ -78,17 +83,35 @@ class TextToImageInvocation(BaseInvocation, SDImageInvocation):
        # Handle invalid model parameter
        model = choose_model(context.services.model_manager, self.model)

+        # loading controlnet image (currently requires pre-processed image)
+        control_image = (
+            None if self.control_image is None
+            else context.services.images.get(
+                self.control_image.image_type, self.control_image.image_name
+            )
+        )
+        # loading controlnet model
+        if (self.control_model is None or self.control_model==''):
+            control_model = None
+        else:
+            # FIXME: change this to dropdown menu?
+            # FIXME: generalize so don't have to hardcode torch_dtype and device
+            control_model = ControlNetModel.from_pretrained(self.control_model,
+                                                            torch_dtype=torch.float16).to("cuda")
+
        # Get the source node id (we are invoking the prepared node)
        graph_execution_state = context.services.graph_execution_manager.get(
            context.graph_execution_state_id
        )
        source_node_id = graph_execution_state.prepared_source_mapping[self.id]

-        outputs = Txt2Img(model).generate(
+        txt2img = Txt2Img(model, control_model=control_model)
+        outputs = txt2img.generate(
            prompt=self.prompt,
            step_callback=partial(self.dispatch_progress, context, source_node_id),
+            control_image=control_image,
            **self.dict(
-                exclude={"prompt"}
+                exclude={"prompt", "control_image" }
            ),  # Shorthand for passing all of the parameters above manually
        )
        # Outputs is an infinite iterator that will return a new InvokeAIGeneratorOutput object
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -1,8 +1,11 @@
 # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)

 import random
-from typing import Literal, Optional, Union
 import einops
+from typing import Literal, Optional, Union, List
+
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
+
 from pydantic import BaseModel, Field, validator
 import torch

@ -11,14 +14,18 @@ from invokeai.app.models.image import ImageCategory
 from invokeai.app.util.misc import SEED_MAX, get_random_seed

 from invokeai.app.util.step_callback import stable_diffusion_step_callback
+from .controlnet_image_processors import ControlField

 from ...backend.model_management.model_manager import ModelManager
 from ...backend.util.devices import choose_torch_device, torch_dtype
 from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import PostprocessingSettings
 from ...backend.image_util.seamless import configure_model_padding
 from ...backend.prompting.conditioning import get_uc_and_c_and_ec
+
 from ...backend.stable_diffusion.diffusers_pipeline import ConditioningData, StableDiffusionGeneratorPipeline, image_resized_to_grid_as_tensor
 from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
+from ...backend.stable_diffusion.diffusers_pipeline import ControlNetData
+
 from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
 import numpy as np
 from ..services.image_file_storage import ImageType
@ -28,7 +35,7 @@ from .compel import ConditioningField
 from ...backend.stable_diffusion import PipelineIntermediateState
 from diffusers.schedulers import SchedulerMixin as Scheduler
 import diffusers
-from diffusers import DiffusionPipeline
+from diffusers import DiffusionPipeline, ControlNetModel


 class LatentsField(BaseModel):
@ -84,13 +91,13 @@ SAMPLER_NAME_VALUES = Literal[

 def get_scheduler(scheduler_name:str, model: StableDiffusionGeneratorPipeline)->Scheduler:
    scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP['ddim'])
-    
+
    scheduler_config = model.scheduler.config
    if "_backup" in scheduler_config:
        scheduler_config = scheduler_config["_backup"]
    scheduler_config = {**scheduler_config, **scheduler_extra_config, "_backup": scheduler_config}
    scheduler = scheduler_class.from_config(scheduler_config)
-    
+
    # hack copied over from generate.py
    if not hasattr(scheduler, 'uses_inpainting_model'):
        scheduler.uses_inpainting_model = lambda: False
@ -169,6 +176,7 @@ class TextToLatentsInvocation(BaseInvocation):
    cfg_scale: float = Field(default=7.5, gt=0, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
    scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
    model:       str = Field(default="", description="The model to use (currently ignored)")
+    control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use")
    # seamless:   bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
    # seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
    # fmt: on
@ -179,7 +187,8 @@ class TextToLatentsInvocation(BaseInvocation):
            "ui": {
                "tags": ["latents", "image"],
                "type_hints": {
-                  "model": "model"
+                  "model": "model",
+                  "control": "control",
                }
            },
        }
@ -238,6 +247,81 @@ class TextToLatentsInvocation(BaseInvocation):
        ).add_scheduler_args_if_applicable(model.scheduler, eta=0.0)#ddim_eta)
        return conditioning_data

+    def prep_control_data(self,
+                          context: InvocationContext,
+                          model: StableDiffusionGeneratorPipeline, # really only need model for dtype and device
+                          control_input: List[ControlField],
+                          latents_shape: List[int],
+                          do_classifier_free_guidance: bool = True,
+                          ) -> List[ControlNetData]:
+        # assuming fixed dimensional scaling of 8:1 for image:latents
+        control_height_resize = latents_shape[2] * 8
+        control_width_resize = latents_shape[3] * 8
+        if control_input is None:
+            # print("control input is None")
+            control_list = None
+        elif isinstance(control_input, list) and len(control_input) == 0:
+            # print("control input is empty list")
+            control_list = None
+        elif isinstance(control_input, ControlField):
+            # print("control input is ControlField")
+            control_list = [control_input]
+        elif isinstance(control_input, list) and len(control_input) > 0 and isinstance(control_input[0], ControlField):
+            # print("control input is list[ControlField]")
+            control_list = control_input
+        else:
+            # print("input control is unrecognized:", type(self.control))
+            control_list = None
+        if (control_list is None):
+            control_data = None
+            # from above handling, any control that is not None should now be of type list[ControlField]
+        else:
+            # FIXME: add checks to skip entry if model or image is None
+            #        and if weight is None, populate with default 1.0?
+            control_data = []
+            control_models = []
+            for control_info in control_list:
+                # handle control models
+                if ("," in control_info.control_model):
+                    control_model_split = control_info.control_model.split(",")
+                    control_name = control_model_split[0]
+                    control_subfolder = control_model_split[1]
+                    print("Using HF model subfolders")
+                    print("    control_name: ", control_name)
+                    print("    control_subfolder: ", control_subfolder)
+                    control_model = ControlNetModel.from_pretrained(control_name,
+                                                                    subfolder=control_subfolder,
+                                                                    torch_dtype=model.unet.dtype).to(model.device)
+                else:
+                    control_model = ControlNetModel.from_pretrained(control_info.control_model,
+                                                                    torch_dtype=model.unet.dtype).to(model.device)
+                control_models.append(control_model)
+                control_image_field = control_info.image
+                input_image = context.services.images.get_pil_image(control_image_field.image_type,
+                                                                    control_image_field.image_name)
+                # self.image.image_type, self.image.image_name
+                # FIXME: still need to test with different widths, heights, devices, dtypes
+                #        and add in batch_size, num_images_per_prompt?
+                #        and do real check for classifier_free_guidance?
+                # prepare_control_image should return torch.Tensor of shape(batch_size, 3, height, width)
+                control_image = model.prepare_control_image(
+                    image=input_image,
+                    do_classifier_free_guidance=do_classifier_free_guidance,
+                    width=control_width_resize,
+                    height=control_height_resize,
+                    # batch_size=batch_size * num_images_per_prompt,
+                    # num_images_per_prompt=num_images_per_prompt,
+                    device=control_model.device,
+                    dtype=control_model.dtype,
+                )
+                control_item = ControlNetData(model=control_model,
+                                              image_tensor=control_image,
+                                              weight=control_info.control_weight,
+                                              begin_step_percent=control_info.begin_step_percent,
+                                              end_step_percent=control_info.end_step_percent)
+                control_data.append(control_item)
+                # MultiControlNetModel has been refactored out, just need list[ControlNetData]
+        return control_data

    def invoke(self, context: InvocationContext) -> LatentsOutput:
        noise = context.services.latents.get(self.noise.latents_name)
@ -252,14 +336,19 @@ class TextToLatentsInvocation(BaseInvocation):
        model = self.get_model(context.services.model_manager)
        conditioning_data = self.get_conditioning_data(context, model)

-        # TODO: Verify the noise is the right size
+        print("type of control input: ", type(self.control))
+        control_data = self.prep_control_data(model=model, context=context, control_input=self.control,
+                                              latents_shape=noise.shape,
+                                              do_classifier_free_guidance=(self.cfg_scale >= 1.0))

+        # TODO: Verify the noise is the right size
        result_latents, result_attention_map_saver = model.latents_from_embeddings(
            latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)),
            noise=noise,
            num_inference_steps=self.steps,
            conditioning_data=conditioning_data,
-            callback=step_callback
+            control_data=control_data,  # list[ControlNetData]
+            callback=step_callback,
        )

        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
@ -285,7 +374,8 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
            "ui": {
                "tags": ["latents"],
                "type_hints": {
-                    "model": "model"
+                    "model": "model",
+                    "control": "control",
                }
            },
        }
@ -304,6 +394,11 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
        model = self.get_model(context.services.model_manager)
        conditioning_data = self.get_conditioning_data(context, model)

+        print("type of control input: ", type(self.control))
+        control_data = self.prep_control_data(model=model, context=context, control_input=self.control,
+                                              latents_shape=noise.shape,
+                                              do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+
        # TODO: Verify the noise is the right size

        initial_latents = latent if self.strength < 1.0 else torch.zeros_like(
@ -318,6 +413,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
            noise=noise,
            num_inference_steps=self.steps,
            conditioning_data=conditioning_data,
+            control_data=control_data,  # list[ControlNetData]
            callback=step_callback
        )

@ -362,8 +458,14 @@ class LatentsToImageInvocation(BaseInvocation):
            np_image = model.decode_latents(latents)
            image = model.numpy_to_pil(np_image)[0]

+            # what happened to metadata?
+            # metadata = context.services.metadata.build_metadata(
+            #     session_id=context.graph_execution_state_id, node=self
+
            torch.cuda.empty_cache()

+            # new (post Image service refactor) way of using services to save image
+            #     and gnenerate unique image_name
            image_dto = context.services.images.create(
                image=image,
                image_type=ImageType.RESULT,
@ -414,6 +516,7 @@ class ResizeLatentsInvocation(BaseInvocation):
        torch.cuda.empty_cache()

        name = f"{context.graph_execution_state_id}__{self.id}"
+        # context.services.latents.set(name, resized_latents)
        context.services.latents.save(name, resized_latents)
        return build_latents_output(latents_name=name, latents=resized_latents)

@ -444,6 +547,7 @@ class ScaleLatentsInvocation(BaseInvocation):
        torch.cuda.empty_cache()

        name = f"{context.graph_execution_state_id}__{self.id}"
+        # context.services.latents.set(name, resized_latents)
        context.services.latents.save(name, resized_latents)
        return build_latents_output(latents_name=name, latents=resized_latents)

@ -468,6 +572,9 @@ class ImageToLatentsInvocation(BaseInvocation):

    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        # image = context.services.images.get(
+        #     self.image.image_type, self.image.image_name
+        # )
        image = context.services.images.get_pil_image(
            self.image.image_type, self.image.image_name
        )
@ -488,6 +595,6 @@ class ImageToLatentsInvocation(BaseInvocation):
        )

        name = f"{context.graph_execution_state_id}__{self.id}"
+        # context.services.latents.set(name, latents)
        context.services.latents.save(name, latents)
        return build_latents_output(latents_name=name, latents=latents)
-        
--- a/invokeai/app/invocations/math.py
+++ b/invokeai/app/invocations/math.py
@ -34,6 +34,15 @@ class IntOutput(BaseInvocationOutput):
    # fmt: on


+class FloatOutput(BaseInvocationOutput):
+    """A float output"""
+
+    # fmt: off
+    type: Literal["float_output"] = "float_output"
+    param: float = Field(default=None, description="The output float")
+    # fmt: on
+
+
 class AddInvocation(BaseInvocation, MathInvocationConfig):
    """Adds two numbers"""

--- a/invokeai/app/invocations/params.py
+++ b/invokeai/app/invocations/params.py
@ -3,7 +3,7 @@
 from typing import Literal
 from pydantic import Field
 from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext
-from .math import IntOutput
+from .math import IntOutput, FloatOutput

 # Pass-through parameter nodes - used by subgraphs

@ -16,3 +16,13 @@ class ParamIntInvocation(BaseInvocation):

    def invoke(self, context: InvocationContext) -> IntOutput:
        return IntOutput(a=self.a)
+
+class ParamFloatInvocation(BaseInvocation):
+    """A float parameter"""
+    #fmt: off
+    type: Literal["param_float"] = "param_float"
+    param: float = Field(default=0.0, description="The float value")
+    #fmt: on
+
+    def invoke(self, context: InvocationContext) -> FloatOutput:
+        return FloatOutput(param=self.param)
--- a/invokeai/app/services/graph.py
+++ b/invokeai/app/services/graph.py
@ -60,6 +60,35 @@ def get_input_field(node: BaseInvocation, field: str) -> Any:
    node_input_field = node_inputs.get(field) or None
    return node_input_field

+from typing import Optional, Union, List, get_args
+
+def is_union_subtype(t1, t2):
+    t1_args = get_args(t1)
+    t2_args = get_args(t2)
+    
+    if not t1_args:
+        # t1 is a single type
+        return t1 in t2_args
+    else:
+        # t1 is a Union, check that all of its types are in t2_args
+        return all(arg in t2_args for arg in t1_args)
+
+def is_list_or_contains_list(t):
+    t_args = get_args(t)
+
+    # If the type is a List
+    if get_origin(t) is list:
+        return True
+
+    # If the type is a Union
+    elif t_args:
+        # Check if any of the types in the Union is a List
+        for arg in t_args:
+            if get_origin(arg) is list:
+                return True
+                
+    return False
+

 def are_connection_types_compatible(from_type: Any, to_type: Any) -> bool:
    if not from_type:
@ -85,7 +114,8 @@ def are_connection_types_compatible(from_type: Any, to_type: Any) -> bool:
        if to_type in get_args(from_type):
            return True

-        if not issubclass(from_type, to_type):
+        # if not issubclass(from_type, to_type):
+        if not is_union_subtype(from_type, to_type):
            return False
    else:
        return False
@ -694,7 +724,11 @@ class Graph(BaseModel):
        input_root_type = next(t[0] for t in type_degrees if t[1] == 0)  # type: ignore

        # Verify that all outputs are lists
-        if not all((get_origin(f) == list for f in output_fields)):
+        # if not all((get_origin(f) == list for f in output_fields)):
+        #     return False
+
+        # Verify that all outputs are lists
+        if not all(is_list_or_contains_list(f) for f in output_fields):
            return False

        # Verify that all outputs match the input type (are a base class or the same class)
--- a/invokeai/backend/generator/base.py
+++ b/invokeai/backend/generator/base.py
@ -75,9 +75,11 @@ class InvokeAIGenerator(metaclass=ABCMeta):
    def __init__(self,
                 model_info: dict,
                 params: InvokeAIGeneratorBasicParams=InvokeAIGeneratorBasicParams(),
+                 **kwargs,
                 ):
        self.model_info=model_info
        self.params=params
+        self.kwargs = kwargs

    def generate(self,
                 prompt: str='',
@ -118,9 +120,12 @@ class InvokeAIGenerator(metaclass=ABCMeta):
            model=model,
            scheduler_name=generator_args.get('scheduler')
        )
-        uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt,model=model)
+
+        # get conditioning from prompt via Compel package
+        uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt, model=model)
+
        gen_class = self._generator_class()
-        generator = gen_class(model, self.params.precision)
+        generator = gen_class(model, self.params.precision, **self.kwargs)
        if self.params.variation_amount > 0:
            generator.set_variation(generator_args.get('seed'),
                                    generator_args.get('variation_amount'),
@ -276,7 +281,7 @@ class Generator:
    precision: str
    model: DiffusionPipeline

-    def __init__(self, model: DiffusionPipeline, precision: str):
+    def __init__(self, model: DiffusionPipeline, precision: str, **kwargs):
        self.model = model
        self.precision = precision
        self.seed = None
--- a/invokeai/backend/generator/txt2img.py
+++ b/invokeai/backend/generator/txt2img.py
@ -4,6 +4,10 @@ invokeai.backend.generator.txt2img inherits from invokeai.backend.generator
 import PIL.Image
 import torch

+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from diffusers.models.controlnet import ControlNetModel, ControlNetOutput
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
+
 from ..stable_diffusion import (
    ConditioningData,
    PostprocessingSettings,
@ -13,8 +17,13 @@ from .base import Generator


 class Txt2Img(Generator):
-    def __init__(self, model, precision):
-        super().__init__(model, precision)
+    def __init__(self, model, precision,
+                 control_model: Optional[Union[ControlNetModel, List[ControlNetModel]]] = None,
+                 **kwargs):
+        self.control_model = control_model
+        if isinstance(self.control_model, list):
+            self.control_model = MultiControlNetModel(self.control_model)
+        super().__init__(model, precision, **kwargs)

    @torch.no_grad()
    def get_make_image(
@ -42,9 +51,12 @@ class Txt2Img(Generator):
        kwargs are 'width' and 'height'
        """
        self.perlin = perlin
+        control_image = kwargs.get("control_image", None)
+        do_classifier_free_guidance = cfg_scale > 1.0

        # noinspection PyTypeChecker
        pipeline: StableDiffusionGeneratorPipeline = self.model
+        pipeline.control_model = self.control_model
        pipeline.scheduler = sampler

        uc, c, extra_conditioning_info = conditioning
@ -61,6 +73,37 @@ class Txt2Img(Generator):
            ),
        ).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)

+        # FIXME: still need to test with different widths, heights, devices, dtypes
+        #        and add in batch_size, num_images_per_prompt?
+        if control_image is not None:
+            if isinstance(self.control_model, ControlNetModel):
+                control_image = pipeline.prepare_control_image(
+                    image=control_image,
+                    do_classifier_free_guidance=do_classifier_free_guidance,
+                    width=width,
+                    height=height,
+                    # batch_size=batch_size * num_images_per_prompt,
+                    # num_images_per_prompt=num_images_per_prompt,
+                    device=self.control_model.device,
+                    dtype=self.control_model.dtype,
+                )
+            elif isinstance(self.control_model, MultiControlNetModel):
+                images = []
+                for image_ in control_image:
+                    image_ = self.model.prepare_control_image(
+                        image=image_,
+                        do_classifier_free_guidance=do_classifier_free_guidance,
+                        width=width,
+                        height=height,
+                        # batch_size=batch_size * num_images_per_prompt,
+                        # num_images_per_prompt=num_images_per_prompt,
+                        device=self.control_model.device,
+                        dtype=self.control_model.dtype,
+                    )
+                    images.append(image_)
+                control_image = images
+            kwargs["control_image"] = control_image
+
        def make_image(x_T: torch.Tensor, _: int) -> PIL.Image.Image:
            pipeline_output = pipeline.image_from_embeddings(
                latents=torch.zeros_like(x_T, dtype=self.torch_dtype()),
@ -68,6 +111,7 @@ class Txt2Img(Generator):
                num_inference_steps=steps,
                conditioning_data=conditioning_data,
                callback=step_callback,
+                **kwargs,
            )

            if (
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@ -2,23 +2,29 @@ from __future__ import annotations

 import dataclasses
 import inspect
+import math
 import secrets
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 from typing import Any, Callable, Generic, List, Optional, Type, TypeVar, Union
+from pydantic import BaseModel, Field

 import einops
 import PIL.Image
+import numpy as np
 from accelerate.utils import set_seed
 import psutil
 import torch
 import torchvision.transforms as T
 from compel import EmbeddingsProvider
 from diffusers.models import AutoencoderKL, UNet2DConditionModel
+from diffusers.models.controlnet import ControlNetModel, ControlNetOutput
 from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import (
    StableDiffusionPipeline,
 )
+from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
+
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import (
    StableDiffusionImg2ImgPipeline,
 )
@ -27,6 +33,7 @@ from diffusers.pipelines.stable_diffusion.safety_checker import (
 )
 from diffusers.schedulers import KarrasDiffusionSchedulers
 from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput
+from diffusers.utils import PIL_INTERPOLATION
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.outputs import BaseOutput
 from torchvision.transforms.functional import resize as tv_resize
@ -207,6 +214,13 @@ class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]):
            raise AssertionError("why was that an empty generator?")
        return result

+@dataclass
+class ControlNetData:
+    model: ControlNetModel = Field(default=None)
+    image_tensor: torch.Tensor= Field(default=None)
+    weight: float = Field(default=1.0)
+    begin_step_percent: float = Field(default=0.0)
+    end_step_percent: float = Field(default=1.0)

@dataclass(frozen=True)
 class ConditioningData:
@ -302,6 +316,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        feature_extractor: Optional[CLIPFeatureExtractor],
        requires_safety_checker: bool = False,
        precision: str = "float32",
+        control_model: ControlNetModel = None,
    ):
        super().__init__(
            vae,
@ -322,6 +337,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            scheduler=scheduler,
            safety_checker=safety_checker,
            feature_extractor=feature_extractor,
+            # FIXME: can't currently register control module
+            # control_model=control_model,
        )
        self.invokeai_diffuser = InvokeAIDiffuserComponent(
            self.unet, self._unet_forward, is_running_diffusers=True
@ -341,6 +358,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):

        self._model_group = FullyLoadedModelGroup(self.unet.device)
        self._model_group.install(*self._submodels)
+        self.control_model = control_model

    def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
        """
@ -463,6 +481,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        noise: torch.Tensor,
        callback: Callable[[PipelineIntermediateState], None] = None,
        run_id=None,
+        **kwargs,
    ) -> InvokeAIStableDiffusionPipelineOutput:
        r"""
        Function invoked when calling the pipeline for generation.
@ -483,6 +502,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            noise=noise,
            run_id=run_id,
            callback=callback,
+            **kwargs,
        )
        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        torch.cuda.empty_cache()
@ -507,6 +527,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        additional_guidance: List[Callable] = None,
        run_id=None,
        callback: Callable[[PipelineIntermediateState], None] = None,
+        control_data: List[ControlNetData] = None,
+        **kwargs,
    ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]:
        if self.scheduler.config.get("cpu_only", False):
            scheduler_device = torch.device('cpu')
@ -527,6 +549,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            additional_guidance=additional_guidance,
            run_id=run_id,
            callback=callback,
+            control_data=control_data,
+            **kwargs,
        )
        return result.latents, result.attention_map_saver

@ -539,6 +563,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        noise: torch.Tensor,
        run_id: str = None,
        additional_guidance: List[Callable] = None,
+        control_data: List[ControlNetData] = None,
+        **kwargs,
    ):
        self._adjust_memory_efficient_attention(latents)
        if run_id is None:
@ -568,7 +594,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            latents = self.scheduler.add_noise(latents, noise, batched_t)

            attention_map_saver: Optional[AttentionMapSaver] = None
-
+            # print("timesteps:", timesteps)
            for i, t in enumerate(self.progress_bar(timesteps)):
                batched_t.fill_(t)
                step_output = self.step(
@ -578,6 +604,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
                    step_index=i,
                    total_step_count=len(timesteps),
                    additional_guidance=additional_guidance,
+                    control_data=control_data,
+                    **kwargs,
                )
                latents = step_output.prev_sample

@ -618,10 +646,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        step_index: int,
        total_step_count: int,
        additional_guidance: List[Callable] = None,
+        control_data: List[ControlNetData] = None,
+        **kwargs,
    ):
        # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
        timestep = t[0]
-
        if additional_guidance is None:
            additional_guidance = []

@ -629,6 +658,48 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        #     i.e. before or after passing it to InvokeAIDiffuserComponent
        latent_model_input = self.scheduler.scale_model_input(latents, timestep)

+        # default is no controlnet, so set controlnet processing output to None
+        down_block_res_samples, mid_block_res_sample = None, None
+
+        if control_data is not None:
+            if conditioning_data.guidance_scale > 1.0:
+                # expand the latents input to control model if doing classifier free guidance
+                #    (which I think for now is always true, there is conditional elsewhere that stops execution if
+                #     classifier_free_guidance is <= 1.0 ?)
+                latent_control_input = torch.cat([latent_model_input] * 2)
+            else:
+                latent_control_input = latent_model_input
+            # control_data should be type List[ControlNetData]
+            # this loop covers both ControlNet (one ControlNetData in list)
+            #      and MultiControlNet (multiple ControlNetData in list)
+            for i, control_datum in enumerate(control_data):
+                # print("controlnet", i, "==>", type(control_datum))
+                first_control_step = math.floor(control_datum.begin_step_percent * total_step_count)
+                last_control_step = math.ceil(control_datum.end_step_percent * total_step_count)
+                # only apply controlnet if current step is within the controlnet's begin/end step range
+                if step_index >= first_control_step and step_index <= last_control_step:
+                    # print("running controlnet", i, "for step", step_index)
+                    down_samples, mid_sample = control_datum.model(
+                        sample=latent_control_input,
+                        timestep=timestep,
+                        encoder_hidden_states=torch.cat([conditioning_data.unconditioned_embeddings,
+                                                         conditioning_data.text_embeddings]),
+                        controlnet_cond=control_datum.image_tensor,
+                        conditioning_scale=control_datum.weight,
+                        # cross_attention_kwargs,
+                        guess_mode=False,
+                        return_dict=False,
+                    )
+                    if down_block_res_samples is None and mid_block_res_sample is None:
+                        down_block_res_samples, mid_block_res_sample = down_samples, mid_sample
+                    else:
+                        # add controlnet outputs together if have multiple controlnets
+                        down_block_res_samples = [
+                            samples_prev + samples_curr
+                            for samples_prev, samples_curr in zip(down_block_res_samples, down_samples)
+                        ]
+                        mid_block_res_sample += mid_sample
+
        # predict the noise residual
        noise_pred = self.invokeai_diffuser.do_diffusion_step(
            latent_model_input,
@ -638,6 +709,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            conditioning_data.guidance_scale,
            step_index=step_index,
            total_step_count=total_step_count,
+            down_block_additional_residuals=down_block_res_samples,  # from controlnet(s)
+            mid_block_additional_residual=mid_block_res_sample,      # from controlnet(s)
        )

        # compute the previous noisy sample x_t -> x_t-1
@ -659,6 +732,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        t,
        text_embeddings,
        cross_attention_kwargs: Optional[dict[str, Any]] = None,
+        **kwargs,
    ):
        """predict the noise residual"""
        if is_inpainting_model(self.unet) and latents.size(1) == 4:
@ -678,7 +752,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):

        # First three args should be positional, not keywords, so torch hooks can see them.
        return self.unet(
-            latents, t, text_embeddings, cross_attention_kwargs=cross_attention_kwargs
+            latents, t, text_embeddings, cross_attention_kwargs=cross_attention_kwargs,
+            **kwargs,
        ).sample

    def img2img_from_embeddings(
@ -728,7 +803,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        noise: torch.Tensor,
        run_id=None,
        callback=None,
-    ) -> InvokeAIStableDiffusionPipelineOutput:        
+    ) -> InvokeAIStableDiffusionPipelineOutput:
        timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength)
        result_latents, result_attention_maps = self.latents_from_embeddings(
            latents=initial_latents if strength < 1.0 else torch.zeros_like(
@ -940,3 +1015,51 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            debug_image(
                img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True
            )
+
+    # Copied from diffusers pipeline_stable_diffusion_controlnet.py
+    # Returns torch.Tensor of shape (batch_size, 3, height, width)
+    def prepare_control_image(
+        self,
+        image,
+        # FIXME: need to fix hardwiring of width and height, change to basing on latents dimensions?
+        # latents,
+        width=512,  # should be 8 * latent.shape[3]
+        height=512, # should be 8 * latent height[2]
+        batch_size=1,
+        num_images_per_prompt=1,
+        device="cuda",
+        dtype=torch.float16,
+        do_classifier_free_guidance=True,
+    ):
+
+        if not isinstance(image, torch.Tensor):
+            if isinstance(image, PIL.Image.Image):
+                image = [image]
+
+            if isinstance(image[0], PIL.Image.Image):
+                images = []
+                for image_ in image:
+                    image_ = image_.convert("RGB")
+                    image_ = image_.resize((width, height), resample=PIL_INTERPOLATION["lanczos"])
+                    image_ = np.array(image_)
+                    image_ = image_[None, :]
+                    images.append(image_)
+                image = images
+                image = np.concatenate(image, axis=0)
+                image = np.array(image).astype(np.float32) / 255.0
+                image = image.transpose(0, 3, 1, 2)
+                image = torch.from_numpy(image)
+            elif isinstance(image[0], torch.Tensor):
+                image = torch.cat(image, dim=0)
+
+        image_batch_size = image.shape[0]
+        if image_batch_size == 1:
+            repeat_by = batch_size
+        else:
+            # image batch size is the same as prompt batch size
+            repeat_by = num_images_per_prompt
+        image = image.repeat_interleave(repeat_by, dim=0)
+        image = image.to(device=device, dtype=dtype)
+        if do_classifier_free_guidance:
+            image = torch.cat([image] * 2)
+        return image
--- a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
+++ b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py
@ -181,6 +181,7 @@ class InvokeAIDiffuserComponent:
        unconditional_guidance_scale: float,
        step_index: Optional[int] = None,
        total_step_count: Optional[int] = None,
+        **kwargs,
    ):
        """
        :param x: current latents
@ -209,7 +210,7 @@ class InvokeAIDiffuserComponent:

        if wants_hybrid_conditioning:
            unconditioned_next_x, conditioned_next_x = self._apply_hybrid_conditioning(
-                x, sigma, unconditioning, conditioning
+                x, sigma, unconditioning, conditioning, **kwargs,
            )
        elif wants_cross_attention_control:
            (
@ -221,13 +222,14 @@ class InvokeAIDiffuserComponent:
                unconditioning,
                conditioning,
                cross_attention_control_types_to_do,
+                **kwargs,
            )
        elif self.sequential_guidance:
            (
                unconditioned_next_x,
                conditioned_next_x,
            ) = self._apply_standard_conditioning_sequentially(
-                x, sigma, unconditioning, conditioning
+                x, sigma, unconditioning, conditioning, **kwargs,
            )

        else:
@ -235,7 +237,7 @@ class InvokeAIDiffuserComponent:
                unconditioned_next_x,
                conditioned_next_x,
            ) = self._apply_standard_conditioning(
-                x, sigma, unconditioning, conditioning
+                x, sigma, unconditioning, conditioning, **kwargs,
            )

        combined_next_x = self._combine(
@ -282,13 +284,13 @@ class InvokeAIDiffuserComponent:

    # methods below are called from do_diffusion_step and should be considered private to this class.

-    def _apply_standard_conditioning(self, x, sigma, unconditioning, conditioning):
+    def _apply_standard_conditioning(self, x, sigma, unconditioning, conditioning, **kwargs):
        # fast batched path
        x_twice = torch.cat([x] * 2)
        sigma_twice = torch.cat([sigma] * 2)
        both_conditionings = torch.cat([unconditioning, conditioning])
        both_results = self.model_forward_callback(
-            x_twice, sigma_twice, both_conditionings
+            x_twice, sigma_twice, both_conditionings, **kwargs,
        )
        unconditioned_next_x, conditioned_next_x = both_results.chunk(2)
        if conditioned_next_x.device.type == "mps":
@ -302,16 +304,17 @@ class InvokeAIDiffuserComponent:
        sigma,
        unconditioning: torch.Tensor,
        conditioning: torch.Tensor,
+        **kwargs,
    ):
        # low-memory sequential path
-        unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning)
-        conditioned_next_x = self.model_forward_callback(x, sigma, conditioning)
+        unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning, **kwargs)
+        conditioned_next_x = self.model_forward_callback(x, sigma, conditioning, **kwargs)
        if conditioned_next_x.device.type == "mps":
            # prevent a result filled with zeros. seems to be a torch bug.
            conditioned_next_x = conditioned_next_x.clone()
        return unconditioned_next_x, conditioned_next_x

-    def _apply_hybrid_conditioning(self, x, sigma, unconditioning, conditioning):
+    def _apply_hybrid_conditioning(self, x, sigma, unconditioning, conditioning, **kwargs):
        assert isinstance(conditioning, dict)
        assert isinstance(unconditioning, dict)
        x_twice = torch.cat([x] * 2)
@ -326,7 +329,7 @@ class InvokeAIDiffuserComponent:
            else:
                both_conditionings[k] = torch.cat([unconditioning[k], conditioning[k]])
        unconditioned_next_x, conditioned_next_x = self.model_forward_callback(
-            x_twice, sigma_twice, both_conditionings
+            x_twice, sigma_twice, both_conditionings, **kwargs,
        ).chunk(2)
        return unconditioned_next_x, conditioned_next_x

@ -337,6 +340,7 @@ class InvokeAIDiffuserComponent:
        unconditioning,
        conditioning,
        cross_attention_control_types_to_do,
+        **kwargs,
    ):
        if self.is_running_diffusers:
            return self._apply_cross_attention_controlled_conditioning__diffusers(
@ -345,6 +349,7 @@ class InvokeAIDiffuserComponent:
                unconditioning,
                conditioning,
                cross_attention_control_types_to_do,
+                **kwargs,
            )
        else:
            return self._apply_cross_attention_controlled_conditioning__compvis(
@ -353,6 +358,7 @@ class InvokeAIDiffuserComponent:
                unconditioning,
                conditioning,
                cross_attention_control_types_to_do,
+                **kwargs,
            )

    def _apply_cross_attention_controlled_conditioning__diffusers(
@ -362,6 +368,7 @@ class InvokeAIDiffuserComponent:
        unconditioning,
        conditioning,
        cross_attention_control_types_to_do,
+        **kwargs,
    ):
        context: Context = self.cross_attention_control_context

@ -377,6 +384,7 @@ class InvokeAIDiffuserComponent:
            sigma,
            unconditioning,
            {"swap_cross_attn_context": cross_attn_processor_context},
+            **kwargs,
        )

        # do requested cross attention types for conditioning (positive prompt)
@ -388,6 +396,7 @@ class InvokeAIDiffuserComponent:
            sigma,
            conditioning,
            {"swap_cross_attn_context": cross_attn_processor_context},
+            **kwargs,
        )
        return unconditioned_next_x, conditioned_next_x

@ -398,6 +407,7 @@ class InvokeAIDiffuserComponent:
        unconditioning,
        conditioning,
        cross_attention_control_types_to_do,
+        **kwargs,
    ):
        # print('pct', percent_through, ': doing cross attention control on', cross_attention_control_types_to_do)
        # slower non-batched path (20% slower on mac MPS)
@ -411,13 +421,13 @@ class InvokeAIDiffuserComponent:
        context: Context = self.cross_attention_control_context

        try:
-            unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning)
+            unconditioned_next_x = self.model_forward_callback(x, sigma, unconditioning, **kwargs)

            # process x using the original prompt, saving the attention maps
            # print("saving attention maps for", cross_attention_control_types_to_do)
            for ca_type in cross_attention_control_types_to_do:
                context.request_save_attention_maps(ca_type)
-            _ = self.model_forward_callback(x, sigma, conditioning)
+            _ = self.model_forward_callback(x, sigma, conditioning, **kwargs,)
            context.clear_requests(cleanup=False)

            # process x again, using the saved attention maps to control where self.edited_conditioning will be applied
@ -428,7 +438,7 @@ class InvokeAIDiffuserComponent:
                self.conditioning.cross_attention_control_args.edited_conditioning
            )
            conditioned_next_x = self.model_forward_callback(
-                x, sigma, edited_conditioning
+                x, sigma, edited_conditioning, **kwargs,
            )
            context.clear_requests(cleanup=True)

--- a/invokeai/frontend/web/src/common/util/parseMetadata.ts
+++ b/invokeai/frontend/web/src/common/util/parseMetadata.ts
@ -1,5 +1,10 @@
 import { forEach, size } from 'lodash-es';
-import { ImageField, LatentsField, ConditioningField } from 'services/api';
+import {
+  ImageField,
+  LatentsField,
+  ConditioningField,
+  ControlField,
+} from 'services/api';

 const OBJECT_TYPESTRING = '[object Object]';
 const STRING_TYPESTRING = '[object String]';
@ -98,6 +103,24 @@ const parseConditioningField = (
  };
 };

+const parseControlField = (controlField: unknown): ControlField | undefined => {
+  // Must be an object
+  if (!isObject(controlField)) {
+    return;
+  }
+
+  // A ControlField must have a `control`
+  if (!('control' in controlField)) {
+    return;
+  }
+  // console.log(typeof controlField.control);
+
+  // Build a valid ControlField
+  return {
+    control: controlField.control,
+  };
+};
+
 type NodeMetadata = {
  [key: string]:
    | string
@ -105,7 +128,8 @@ type NodeMetadata = {
    | boolean
    | ImageField
    | LatentsField
-    | ConditioningField;
+    | ConditioningField
+    | ControlField;
 };

 type InvokeAIMetadata = {
@ -131,7 +155,7 @@ export const parseNodeMetadata = (
      return;
    }

-    // the only valid object types are ImageField, LatentsField and ConditioningField
+    // the only valid object types are ImageField, LatentsField, ConditioningField, ControlField
    if (isObject(nodeItem)) {
      if ('image_name' in nodeItem || 'image_type' in nodeItem) {
        const imageField = parseImageField(nodeItem);
@ -156,6 +180,14 @@ export const parseNodeMetadata = (
        }
        return;
      }
+
+      if ('control' in nodeItem) {
+        const controlField = parseControlField(nodeItem);
+        if (controlField) {
+          parsed[nodeKey] = controlField;
+        }
+        return;
+      }
    }

    // otherwise we accept any string, number or boolean
--- a/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx
@ -7,6 +7,7 @@ import EnumInputFieldComponent from './fields/EnumInputFieldComponent';
 import ImageInputFieldComponent from './fields/ImageInputFieldComponent';
 import LatentsInputFieldComponent from './fields/LatentsInputFieldComponent';
 import ConditioningInputFieldComponent from './fields/ConditioningInputFieldComponent';
+import ControlInputFieldComponent from './fields/ControlInputFieldComponent';
 import ModelInputFieldComponent from './fields/ModelInputFieldComponent';
 import NumberInputFieldComponent from './fields/NumberInputFieldComponent';
 import StringInputFieldComponent from './fields/StringInputFieldComponent';
@ -97,6 +98,16 @@ const InputFieldComponent = (props: InputFieldComponentProps) => {
    );
  }

+  if (type === 'control' && template.type === 'control') {
+    return (
+      <ControlInputFieldComponent
+        nodeId={nodeId}
+        field={field}
+        template={template}
+      />
+    );
+  }
+
  if (type === 'model' && template.type === 'model') {
    return (
      <ModelInputFieldComponent
--- a/invokeai/frontend/web/src/features/nodes/components/fields/ControlInputFieldComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/fields/ControlInputFieldComponent.tsx
@ -0,0 +1,16 @@
+import {
+  ControlInputFieldTemplate,
+  ControlInputFieldValue,
+} from 'features/nodes/types/types';
+import { memo } from 'react';
+import { FieldComponentProps } from './types';
+
+const ControlInputFieldComponent = (
+  props: FieldComponentProps<ControlInputFieldValue, ControlInputFieldTemplate>
+) => {
+  const { nodeId, field } = props;
+
+  return null;
+};
+
+export default memo(ControlInputFieldComponent);
--- a/invokeai/frontend/web/src/features/nodes/types/constants.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts
@ -4,6 +4,7 @@ export const HANDLE_TOOLTIP_OPEN_DELAY = 500;

 export const FIELD_TYPE_MAP: Record<string, FieldType> = {
  integer: 'integer',
+  float: 'float',
  number: 'float',
  string: 'string',
  boolean: 'boolean',
@ -15,6 +16,8 @@ export const FIELD_TYPE_MAP: Record<string, FieldType> = {
  array: 'array',
  item: 'item',
  ColorField: 'color',
+  ControlField: 'control',
+  control: 'control',
 };

 const COLOR_TOKEN_VALUE = 500;
@ -22,6 +25,9 @@ const COLOR_TOKEN_VALUE = 500;
 const getColorTokenCssVariable = (color: string) =>
  `var(--invokeai-colors-${color}-${COLOR_TOKEN_VALUE})`;

+// @ts-ignore
+// @ts-ignore
+// @ts-ignore
 export const FIELDS: Record<FieldType, FieldUIConfig> = {
  integer: {
    color: 'red',
@ -71,6 +77,12 @@ export const FIELDS: Record<FieldType, FieldUIConfig> = {
    title: 'Conditioning',
    description: 'Conditioning may be passed between nodes.',
  },
+  control: {
+    color: 'cyan',
+    colorCssVar: getColorTokenCssVariable('cyan'), // TODO: no free color left
+    title: 'Control',
+    description: 'Control info passed between nodes.',
+  },
  model: {
    color: 'teal',
    colorCssVar: getColorTokenCssVariable('teal'),
--- a/invokeai/frontend/web/src/features/nodes/types/types.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/types.ts
@ -61,6 +61,7 @@ export type FieldType =
  | 'image'
  | 'latents'
  | 'conditioning'
+  | 'control'
  | 'model'
  | 'array'
  | 'item'
@ -82,6 +83,7 @@ export type InputFieldValue =
  | ImageInputFieldValue
  | LatentsInputFieldValue
  | ConditioningInputFieldValue
+  | ControlInputFieldValue
  | EnumInputFieldValue
  | ModelInputFieldValue
  | ArrayInputFieldValue
@ -102,6 +104,7 @@ export type InputFieldTemplate =
  | ImageInputFieldTemplate
  | LatentsInputFieldTemplate
  | ConditioningInputFieldTemplate
+  | ControlInputFieldTemplate
  | EnumInputFieldTemplate
  | ModelInputFieldTemplate
  | ArrayInputFieldTemplate
@ -177,6 +180,11 @@ export type LatentsInputFieldValue = FieldValueBase & {

 export type ConditioningInputFieldValue = FieldValueBase & {
  type: 'conditioning';
+  value?: string;
+};
+
+export type ControlInputFieldValue = FieldValueBase & {
+  type: 'control';
  value?: undefined;
 };

@ -262,6 +270,11 @@ export type ConditioningInputFieldTemplate = InputFieldTemplateBase & {
  type: 'conditioning';
 };

+export type ControlInputFieldTemplate = InputFieldTemplateBase & {
+  default: undefined;
+  type: 'control';
+};
+
 export type EnumInputFieldTemplate = InputFieldTemplateBase & {
  default: string | number;
  type: 'enum';
--- a/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts
@ -10,6 +10,7 @@ import {
  IntegerInputFieldTemplate,
  LatentsInputFieldTemplate,
  ConditioningInputFieldTemplate,
+  ControlInputFieldTemplate,
  StringInputFieldTemplate,
  ModelInputFieldTemplate,
  ArrayInputFieldTemplate,
@ -215,6 +216,21 @@ const buildConditioningInputFieldTemplate = ({
  return template;
 };

+const buildControlInputFieldTemplate = ({
+  schemaObject,
+  baseField,
+}: BuildInputFieldArg): ControlInputFieldTemplate => {
+  const template: ControlInputFieldTemplate = {
+    ...baseField,
+    type: 'control',
+    inputRequirement: 'always',
+    inputKind: 'connection',
+    default: schemaObject.default ?? undefined,
+  };
+
+  return template;
+};
+
 const buildEnumInputFieldTemplate = ({
  schemaObject,
  baseField,
@ -286,9 +302,20 @@ export const getFieldType = (
  if (typeHints && name in typeHints) {
    rawFieldType = typeHints[name];
  } else if (!schemaObject.type) {
-    rawFieldType = refObjectToFieldType(
-      schemaObject.allOf![0] as OpenAPIV3.ReferenceObject
-    );
+    // if schemaObject has no type, then it should have one of allOf, anyOf, oneOf
+    if (schemaObject.allOf) {
+      rawFieldType = refObjectToFieldType(
+        schemaObject.allOf![0] as OpenAPIV3.ReferenceObject
+      );
+    } else if (schemaObject.anyOf) {
+      rawFieldType = refObjectToFieldType(
+        schemaObject.anyOf![0] as OpenAPIV3.ReferenceObject
+      );
+    } else if (schemaObject.oneOf) {
+      rawFieldType = refObjectToFieldType(
+        schemaObject.oneOf![0] as OpenAPIV3.ReferenceObject
+      );
+    }
  } else if (schemaObject.enum) {
    rawFieldType = 'enum';
  } else if (schemaObject.type) {
@ -331,6 +358,9 @@ export const buildInputFieldTemplate = (
  if (['conditioning'].includes(fieldType)) {
    return buildConditioningInputFieldTemplate({ schemaObject, baseField });
  }
+  if (['control'].includes(fieldType)) {
+    return buildControlInputFieldTemplate({ schemaObject, baseField });
+  }
  if (['model'].includes(fieldType)) {
    return buildModelInputFieldTemplate({ schemaObject, baseField });
  }
--- a/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts
@ -52,6 +52,10 @@ export const buildInputFieldValue = (
      fieldValue.value = undefined;
    }

+    if (template.type === 'control') {
+      fieldValue.value = undefined;
+    }
+
    if (template.type === 'model') {
      fieldValue.value = undefined;
    }
--- a/invokeai/frontend/web/src/services/api/models/CannyImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/CannyImageProcessorInvocation.ts
@ -0,0 +1,29 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Canny edge detection for ControlNet
+ */
+export type CannyImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'canny_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * low threshold of Canny pixel gradient
+   */
+  low_threshold?: number;
+  /**
+   * high threshold of Canny pixel gradient
+   */
+  high_threshold?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/ContentShuffleImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/ContentShuffleImageProcessorInvocation.ts
@ -0,0 +1,41 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies content shuffle processing to image
+ */
+export type ContentShuffleImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'content_shuffle_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+  /**
+   * content shuffle h parameter
+   */
+  'h'?: number;
+  /**
+   * content shuffle w parameter
+   */
+  'w'?: number;
+  /**
+   * cont
+   */
+  'f'?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/ControlField.ts
+++ b/invokeai/frontend/web/src/services/api/models/ControlField.ts
@ -0,0 +1,29 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+export type ControlField = {
+  /**
+   * processed image
+   */
+  image: ImageField;
+  /**
+   * control model used
+   */
+  control_model: string;
+  /**
+   * weight given to controlnet
+   */
+  control_weight: number;
+  /**
+   * % of total steps at which controlnet is first applied
+   */
+  begin_step_percent: number;
+  /**
+   * % of total steps at which controlnet is last applied
+   */
+  end_step_percent: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/ControlNetInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/ControlNetInvocation.ts
@ -0,0 +1,37 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Collects ControlNet info to pass to other nodes
+ */
+export type ControlNetInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'controlnet';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * control model used
+   */
+  control_model?: 'lllyasviel/sd-controlnet-canny' | 'lllyasviel/sd-controlnet-depth' | 'lllyasviel/sd-controlnet-hed' | 'lllyasviel/sd-controlnet-seg' | 'lllyasviel/sd-controlnet-openpose' | 'lllyasviel/sd-controlnet-scribble' | 'lllyasviel/sd-controlnet-normal' | 'lllyasviel/sd-controlnet-mlsd' | 'lllyasviel/control_v11p_sd15_canny' | 'lllyasviel/control_v11p_sd15_openpose' | 'lllyasviel/control_v11p_sd15_seg' | 'lllyasviel/control_v11f1p_sd15_depth' | 'lllyasviel/control_v11p_sd15_normalbae' | 'lllyasviel/control_v11p_sd15_scribble' | 'lllyasviel/control_v11p_sd15_mlsd' | 'lllyasviel/control_v11p_sd15_softedge' | 'lllyasviel/control_v11p_sd15s2_lineart_anime' | 'lllyasviel/control_v11p_sd15_lineart' | 'lllyasviel/control_v11p_sd15_inpaint' | 'lllyasviel/control_v11e_sd15_shuffle' | 'lllyasviel/control_v11e_sd15_ip2p' | 'lllyasviel/control_v11f1e_sd15_tile' | 'thibaud/controlnet-sd21-openpose-diffusers' | 'thibaud/controlnet-sd21-canny-diffusers' | 'thibaud/controlnet-sd21-depth-diffusers' | 'thibaud/controlnet-sd21-scribble-diffusers' | 'thibaud/controlnet-sd21-hed-diffusers' | 'thibaud/controlnet-sd21-zoedepth-diffusers' | 'thibaud/controlnet-sd21-color-diffusers' | 'thibaud/controlnet-sd21-openposev2-diffusers' | 'thibaud/controlnet-sd21-lineart-diffusers' | 'thibaud/controlnet-sd21-normalbae-diffusers' | 'thibaud/controlnet-sd21-ade20k-diffusers' | 'CrucibleAI/ControlNetMediaPipeFace';
+  /**
+   * weight given to controlnet
+   */
+  control_weight?: number;
+  /**
+   * % of total steps at which controlnet is first applied
+   */
+  begin_step_percent?: number;
+  /**
+   * % of total steps at which controlnet is last applied
+   */
+  end_step_percent?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/ControlOutput.ts
+++ b/invokeai/frontend/web/src/services/api/models/ControlOutput.ts
@ -0,0 +1,17 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ControlField } from './ControlField';
+
+/**
+ * node output for ControlNet info
+ */
+export type ControlOutput = {
+  type?: 'control_output';
+  /**
+   * The control info dict
+   */
+  control?: ControlField;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/HedImageprocessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/HedImageprocessorInvocation.ts
@ -0,0 +1,33 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies HED edge detection to image
+ */
+export type HedImageprocessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'hed_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+  /**
+   * whether to use scribble mode
+   */
+  scribble?: boolean;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/ImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/ImageProcessorInvocation.ts
@ -0,0 +1,21 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Base class for invocations that preprocess images for ControlNet
+ */
+export type ImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/ImageToImageInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/ImageToImageInvocation.ts
@ -49,6 +49,18 @@ export type ImageToImageInvocation = {
   * The model to use (currently ignored)
   */
  model?: string;
+  /**
+   * Whether or not to produce progress images during generation
+   */
+  progress_images?: boolean;
+  /**
+   * The control model to use
+   */
+  control_model?: string;
+  /**
+   * The processed control image
+   */
+  control_image?: ImageField;
  /**
   * The input image
   */
--- a/invokeai/frontend/web/src/services/api/models/InpaintInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/InpaintInvocation.ts
@ -50,6 +50,18 @@ export type InpaintInvocation = {
   * The model to use (currently ignored)
   */
  model?: string;
+  /**
+   * Whether or not to produce progress images during generation
+   */
+  progress_images?: boolean;
+  /**
+   * The control model to use
+   */
+  control_model?: string;
+  /**
+   * The processed control image
+   */
+  control_image?: ImageField;
  /**
   * The input image
   */
--- a/invokeai/frontend/web/src/services/api/models/LineartAnimeImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/LineartAnimeImageProcessorInvocation.ts
@ -0,0 +1,29 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies line art anime processing to image
+ */
+export type LineartAnimeImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'lineart_anime_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/LineartImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/LineartImageProcessorInvocation.ts
@ -0,0 +1,33 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies line art processing to image
+ */
+export type LineartImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'lineart_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+  /**
+   * whether to use coarse mode
+   */
+  coarse?: boolean;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/MidasDepthImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/MidasDepthImageProcessorInvocation.ts
@ -0,0 +1,29 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies Midas depth processing to image
+ */
+export type MidasDepthImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'midas_depth_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * Midas parameter a = amult * PI
+   */
+  a_mult?: number;
+  /**
+   * Midas parameter bg_th
+   */
+  bg_th?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/MlsdImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/MlsdImageProcessorInvocation.ts
@ -0,0 +1,37 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies MLSD processing to image
+ */
+export type MlsdImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'mlsd_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+  /**
+   * MLSD parameter thr_v
+   */
+  thr_v?: number;
+  /**
+   * MLSD parameter thr_d
+   */
+  thr_d?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/NormalbaeImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/NormalbaeImageProcessorInvocation.ts
@ -0,0 +1,29 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies NormalBae processing to image
+ */
+export type NormalbaeImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'normalbae_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/OpenposeImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/OpenposeImageProcessorInvocation.ts
@ -0,0 +1,33 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies Openpose processing to image
+ */
+export type OpenposeImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'openpose_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * whether to use hands and face mode
+   */
+  hand_and_face?: boolean;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/PidiImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/PidiImageProcessorInvocation.ts
@ -0,0 +1,37 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+
+import type { ImageField } from './ImageField';
+
+/**
+ * Applies PIDI processing to image
+ */
+export type PidiImageProcessorInvocation = {
+  /**
+   * The id of this node. Must be unique among all nodes.
+   */
+  id: string;
+  type?: 'pidi_image_processor';
+  /**
+   * image to process
+   */
+  image?: ImageField;
+  /**
+   * pixel resolution for edge detection
+   */
+  detect_resolution?: number;
+  /**
+   * pixel resolution for output image
+   */
+  image_resolution?: number;
+  /**
+   * whether to use safe mode
+   */
+  safe?: boolean;
+  /**
+   * whether to use scribble mode
+   */
+  scribble?: boolean;
+};
+
--- a/invokeai/frontend/web/src/services/api/models/TextToImageInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/models/TextToImageInvocation.ts
@ -2,6 +2,8 @@
 /* tslint:disable */
 /* eslint-disable */

+import type { ImageField } from './ImageField';
+
 /**
 * Generates an image using text2img.
 */
@ -47,5 +49,17 @@ export type TextToImageInvocation = {
   * The model to use (currently ignored)
   */
  model?: string;
+  /**
+   * Whether or not to produce progress images during generation
+   */
+  progress_images?: boolean;
+  /**
+   * The control model to use
+   */
+  control_model?: string;
+  /**
+   * The processed control image
+   */
+  control_image?: ImageField;
 };

--- a/invokeai/frontend/web/src/services/api/schemas/$CannyImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$CannyImageProcessorInvocation.ts
@ -0,0 +1,31 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $CannyImageProcessorInvocation = {
+  description: `Canny edge detection for ControlNet`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    low_threshold: {
+      type: 'number',
+      description: `low threshold of Canny pixel gradient`,
+    },
+    high_threshold: {
+      type: 'number',
+      description: `high threshold of Canny pixel gradient`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$ContentShuffleImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$ContentShuffleImageProcessorInvocation.ts
@ -0,0 +1,43 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $ContentShuffleImageProcessorInvocation = {
+  description: `Applies content shuffle processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+    'h': {
+      type: 'number',
+      description: `content shuffle h parameter`,
+    },
+    'w': {
+      type: 'number',
+      description: `content shuffle w parameter`,
+    },
+    'f': {
+      type: 'number',
+      description: `cont`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$ControlField.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$ControlField.ts
@ -0,0 +1,37 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $ControlField = {
+  properties: {
+    image: {
+      type: 'all-of',
+      description: `processed image`,
+      contains: [{
+        type: 'ImageField',
+      }],
+      isRequired: true,
+    },
+    control_model: {
+      type: 'string',
+      description: `control model used`,
+      isRequired: true,
+    },
+    control_weight: {
+      type: 'number',
+      description: `weight given to controlnet`,
+      isRequired: true,
+    },
+    begin_step_percent: {
+      type: 'number',
+      description: `% of total steps at which controlnet is first applied`,
+      isRequired: true,
+      maximum: 1,
+    },
+    end_step_percent: {
+      type: 'number',
+      description: `% of total steps at which controlnet is last applied`,
+      isRequired: true,
+      maximum: 1,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$ControlNetInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$ControlNetInvocation.ts
@ -0,0 +1,41 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $ControlNetInvocation = {
+  description: `Collects ControlNet info to pass to other nodes`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    control_model: {
+      type: 'Enum',
+    },
+    control_weight: {
+      type: 'number',
+      description: `weight given to controlnet`,
+      maximum: 1,
+    },
+    begin_step_percent: {
+      type: 'number',
+      description: `% of total steps at which controlnet is first applied`,
+      maximum: 1,
+    },
+    end_step_percent: {
+      type: 'number',
+      description: `% of total steps at which controlnet is last applied`,
+      maximum: 1,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$ControlOutput.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$ControlOutput.ts
@ -0,0 +1,28 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $ControlOutput = {
+  description: `node output for ControlNet info`,
+  properties: {
+    type: {
+      type: 'Enum',
+    },
+    control: {
+      type: 'all-of',
+      description: `The control info dict`,
+      contains: [{
+        type: 'ControlField',
+      }],
+    },
+    width: {
+      type: 'number',
+      description: `The width of the noise in pixels`,
+      isRequired: true,
+    },
+    height: {
+      type: 'number',
+      description: `The height of the noise in pixels`,
+      isRequired: true,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$HedImageprocessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$HedImageprocessorInvocation.ts
@ -0,0 +1,35 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $HedImageprocessorInvocation = {
+  description: `Applies HED edge detection to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+    scribble: {
+      type: 'boolean',
+      description: `whether to use scribble mode`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$ImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$ImageProcessorInvocation.ts
@ -0,0 +1,23 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $ImageProcessorInvocation = {
+  description: `Base class for invocations that preprocess images for ControlNet`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$LineartAnimeImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$LineartAnimeImageProcessorInvocation.ts
@ -0,0 +1,31 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $LineartAnimeImageProcessorInvocation = {
+  description: `Applies line art anime processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$LineartImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$LineartImageProcessorInvocation.ts
@ -0,0 +1,35 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $LineartImageProcessorInvocation = {
+  description: `Applies line art processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+    coarse: {
+      type: 'boolean',
+      description: `whether to use coarse mode`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$MidasDepthImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$MidasDepthImageProcessorInvocation.ts
@ -0,0 +1,31 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $MidasDepthImageProcessorInvocation = {
+  description: `Applies Midas depth processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    a_mult: {
+      type: 'number',
+      description: `Midas parameter a = amult * PI`,
+    },
+    bg_th: {
+      type: 'number',
+      description: `Midas parameter bg_th`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$MlsdImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$MlsdImageProcessorInvocation.ts
@ -0,0 +1,39 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $MlsdImageProcessorInvocation = {
+  description: `Applies MLSD processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+    thr_v: {
+      type: 'number',
+      description: `MLSD parameter thr_v`,
+    },
+    thr_d: {
+      type: 'number',
+      description: `MLSD parameter thr_d`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$NormalbaeImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$NormalbaeImageProcessorInvocation.ts
@ -0,0 +1,31 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $NormalbaeImageProcessorInvocation = {
+  description: `Applies NormalBae processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$OpenposeImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$OpenposeImageProcessorInvocation.ts
@ -0,0 +1,35 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $OpenposeImageProcessorInvocation = {
+  description: `Applies Openpose processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    hand_and_face: {
+      type: 'boolean',
+      description: `whether to use hands and face mode`,
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$PidiImageProcessorInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$PidiImageProcessorInvocation.ts
@ -0,0 +1,39 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $PidiImageProcessorInvocation = {
+  description: `Applies PIDI processing to image`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+    image: {
+      type: 'all-of',
+      description: `image to process`,
+      contains: [{
+        type: 'ImageField',
+      }],
+    },
+    detect_resolution: {
+      type: 'number',
+      description: `pixel resolution for edge detection`,
+    },
+    image_resolution: {
+      type: 'number',
+      description: `pixel resolution for output image`,
+    },
+    safe: {
+      type: 'boolean',
+      description: `whether to use safe mode`,
+    },
+    scribble: {
+      type: 'boolean',
+      description: `whether to use scribble mode`,
+    },
+  },
+} as const;
--- a/invokeai/frontend/web/src/services/api/schemas/$RandomIntInvocation.ts
+++ b/invokeai/frontend/web/src/services/api/schemas/$RandomIntInvocation.ts
@ -0,0 +1,16 @@
+/* istanbul ignore file */
+/* tslint:disable */
+/* eslint-disable */
+export const $RandomIntInvocation = {
+  description: `Outputs a single random integer.`,
+  properties: {
+    id: {
+      type: 'string',
+      description: `The id of this node. Must be unique among all nodes.`,
+      isRequired: true,
+    },
+    type: {
+      type: 'Enum',
+    },
+  },
+} as const;
--- a/pyproject.toml
+++ b/pyproject.toml
@ -39,6 +39,8 @@ dependencies = [
  "click",
  "clip_anytorch",          # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
  "compel~=1.1.5",
+  "controlnet-aux>=0.0.4",
+  "timm==0.6.13",           # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
  "datasets",
  "diffusers[torch]~=0.16.1",
  "dnspython==2.2.1",
@ -54,6 +56,7 @@ dependencies = [
  "flaskwebgui==1.0.3",
  "gfpgan==1.3.8",
  "huggingface-hub>=0.11.1",
+  "mediapipe",             # needed for "mediapipeface" controlnet model
  "npyscreen",
  "numpy<1.24",
  "omegaconf",
--- a/scripts/controlnet_legacy_txt2img_example.py
+++ b/scripts/controlnet_legacy_txt2img_example.py
@ -0,0 +1,54 @@
+import os
+import torch
+import cv2
+import numpy as np
+from PIL import Image
+
+from diffusers.utils import load_image
+from diffusers.models.controlnet import ControlNetModel
+from invokeai.backend.generator import Txt2Img
+from invokeai.backend.model_management import ModelManager
+
+
+print("loading 'Girl with a Pearl Earring' image")
+image = load_image(
+    "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png"
+)
+image.show()
+
+print("preprocessing image with Canny edge detection")
+image_np = np.array(image)
+low_threshold = 100
+high_threshold = 200
+canny_np = cv2.Canny(image_np, low_threshold, high_threshold)
+canny_image = Image.fromarray(canny_np)
+canny_image.show()
+
+# using invokeai model management for base model
+print("loading base model stable-diffusion-1.5")
+model_config_path = os.getcwd() + "/../configs/models.yaml"
+model_manager = ModelManager(model_config_path)
+model = model_manager.get_model('stable-diffusion-1.5')
+
+print("loading control model lllyasviel/sd-controlnet-canny")
+canny_controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny",
+                                                   torch_dtype=torch.float16).to("cuda")
+
+print("testing Txt2Img() constructor with control_model arg")
+txt2img_canny = Txt2Img(model, control_model=canny_controlnet)
+
+print("testing Txt2Img.generate() with control_image arg")
+outputs = txt2img_canny.generate(
+    prompt="old man",
+    control_image=canny_image,
+    control_weight=1.0,
+    seed=0,
+    num_steps=30,
+    precision="float16",
+)
+generate_output = next(outputs)
+out_image = generate_output.image
+out_image.show()
+
+
+