Added support for ControlNet and MultiControlNet to legacy non-nodal Txt2Img in backend/generator. Although backend/generator will likely disappear by v3.x, right now they are very useful for testing core ControlNet and MultiControlNet functionality while node codebase is rapidly evolving.

2024-08-30 20:32:17 +00:00 · 2023-04-29 00:51:04 -07:00 · 2023-04-29 00:51:04 -07:00 · a91dee87d0
commit a91dee87d0
parent 5ff98a4179
2 changed files with 54 additions and 5 deletions
--- a/invokeai/backend/generator/base.py
+++ b/invokeai/backend/generator/base.py
@ -75,9 +75,11 @@ class InvokeAIGenerator(metaclass=ABCMeta):
    def __init__(self,
                 model_info: dict,
                 params: InvokeAIGeneratorBasicParams=InvokeAIGeneratorBasicParams(),
                 **kwargs,
                 ):
        self.model_info=model_info
        self.params=params
        self.kwargs = kwargs
    def generate(self,
                 prompt: str='',
@ -118,9 +120,12 @@ class InvokeAIGenerator(metaclass=ABCMeta):
            model=model,
            scheduler_name=generator_args.get('scheduler')
        )
-        uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt,model=model)
+
        # get conditioning from prompt via Compel package
        uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt, model=model)
        gen_class = self._generator_class()
-        generator = gen_class(model, self.params.precision)
+        generator = gen_class(model, self.params.precision, **self.kwargs)
        if self.params.variation_amount > 0:
            generator.set_variation(generator_args.get('seed'),
                                    generator_args.get('variation_amount'),
@ -276,7 +281,7 @@ class Generator:
    precision: str
    model: DiffusionPipeline
-    def __init__(self, model: DiffusionPipeline, precision: str):
+    def __init__(self, model: DiffusionPipeline, precision: str, **kwargs):
        self.model = model
        self.precision = precision
        self.seed = None
--- a/invokeai/backend/generator/txt2img.py
+++ b/invokeai/backend/generator/txt2img.py
@ -4,6 +4,10 @@ invokeai.backend.generator.txt2img inherits from invokeai.backend.generator
 import PIL.Image
 import torch
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from diffusers.models.controlnet import ControlNetModel, ControlNetOutput
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_controlnet import MultiControlNetModel
 from ..stable_diffusion import (
    ConditioningData,
    PostprocessingSettings,
@ -13,8 +17,13 @@ from .base import Generator
 class Txt2Img(Generator):
-    def __init__(self, model, precision):
+    def __init__(self, model, precision,
-        super().__init__(model, precision)
+                 control_model: Optional[Union[ControlNetModel, List[ControlNetModel]]] = None,
                 **kwargs):
        self.control_model = control_model
        if isinstance(self.control_model, list):
            self.control_model = MultiControlNetModel(self.control_model)
        super().__init__(model, precision, **kwargs)
    @torch.no_grad()
    def get_make_image(
@ -42,9 +51,12 @@ class Txt2Img(Generator):
        kwargs are 'width' and 'height'
        """
        self.perlin = perlin
        control_image = kwargs.get("control_image", None)
        do_classifier_free_guidance = cfg_scale > 1.0
        # noinspection PyTypeChecker
        pipeline: StableDiffusionGeneratorPipeline = self.model
        pipeline.control_model = self.control_model
        pipeline.scheduler = sampler
        uc, c, extra_conditioning_info = conditioning
@ -61,6 +73,37 @@ class Txt2Img(Generator):
            ),
        ).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)
        # FIXME: still need to test with different widths, heights, devices, dtypes
        #        and add in batch_size, num_images_per_prompt?
        if control_image is not None:
            if isinstance(self.control_model, ControlNetModel):
                control_image = pipeline.prepare_control_image(
                    image=control_image,
                    do_classifier_free_guidance=do_classifier_free_guidance,
                    width=width,
                    height=height,
                    # batch_size=batch_size * num_images_per_prompt,
                    # num_images_per_prompt=num_images_per_prompt,
                    device=self.control_model.device,
                    dtype=self.control_model.dtype,
                )
            elif isinstance(self.control_model, MultiControlNetModel):
                images = []
                for image_ in control_image:
                    image_ = self.model.prepare_control_image(
                        image=image_,
                        do_classifier_free_guidance=do_classifier_free_guidance,
                        width=width,
                        height=height,
                        # batch_size=batch_size * num_images_per_prompt,
                        # num_images_per_prompt=num_images_per_prompt,
                        device=self.control_model.device,
                        dtype=self.control_model.dtype,
                    )
                    images.append(image_)
                control_image = images
            kwargs["control_image"] = control_image
        def make_image(x_T: torch.Tensor, _: int) -> PIL.Image.Image:
            pipeline_output = pipeline.image_from_embeddings(
                latents=torch.zeros_like(x_T, dtype=self.torch_dtype()),
@ -68,6 +111,7 @@ class Txt2Img(Generator):
                num_inference_steps=steps,
                conditioning_data=conditioning_data,
                callback=step_callback,
                **kwargs,
            )
            if (