diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 7c3ce7a819..86565366d9 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -16,7 +16,7 @@ from ...backend.util.devices import torch_dtype from ...backend.model_management import ModelType from ...backend.model_management.models import ModelNotFoundException from ...backend.model_management.lora import ModelPatcher -from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent +from ...backend.stable_diffusion import InvokeAIDiffuserComponent, BasicConditioningInfo, SDXLConditioningInfo from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext from .model import ClipField from dataclasses import dataclass @@ -29,28 +29,9 @@ class ConditioningField(BaseModel): schema_extra = {"required": ["conditioning_name"]} -@dataclass -class BasicConditioningInfo: - # type: Literal["basic_conditioning"] = "basic_conditioning" - embeds: torch.Tensor - extra_conditioning: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo] - # weight: float - # mode: ConditioningAlgo - - -@dataclass -class SDXLConditioningInfo(BasicConditioningInfo): - # type: Literal["sdxl_conditioning"] = "sdxl_conditioning" - pooled_embeds: torch.Tensor - add_time_ids: torch.Tensor - - -ConditioningInfoType = Annotated[Union[BasicConditioningInfo, SDXLConditioningInfo], Field(discriminator="type")] - - @dataclass class ConditioningFieldData: - conditionings: List[Union[BasicConditioningInfo, SDXLConditioningInfo]] + conditionings: List[BasicConditioningInfo] # unconditioned: Optional[torch.Tensor] @@ -176,7 +157,15 @@ class CompelInvocation(BaseInvocation): class SDXLPromptInvocationBase: - def run_clip_raw(self, context, clip_field, prompt, get_pooled, lora_prefix): + def run_clip_compel( + self, + context: InvocationContext, + clip_field: ClipField, + prompt: str, + get_pooled: bool, + lora_prefix: str, + zero_on_empty: bool, + ): tokenizer_info = context.services.model_manager.get_model( **clip_field.tokenizer.dict(), context=context, @@ -186,82 +175,21 @@ class SDXLPromptInvocationBase: context=context, ) - def _lora_loader(): - for lora in clip_field.loras: - lora_info = context.services.model_manager.get_model(**lora.dict(exclude={"weight"}), context=context) - yield (lora_info.context.model, lora.weight) - del lora_info - return - - # loras = [(context.services.model_manager.get_model(**lora.dict(exclude={"weight"})).context.model, lora.weight) for lora in self.clip.loras] - - ti_list = [] - for trigger in re.findall(r"<[a-zA-Z0-9., _-]+>", prompt): - name = trigger[1:-1] - try: - ti_list.append( - ( - name, - context.services.model_manager.get_model( - model_name=name, - base_model=clip_field.text_encoder.base_model, - model_type=ModelType.TextualInversion, - context=context, - ).context.model, - ) - ) - except ModelNotFoundException: - # print(e) - # import traceback - # print(traceback.format_exc()) - print(f'Warn: trigger: "{trigger}" not found') - - with ModelPatcher.apply_lora( - text_encoder_info.context.model, _lora_loader(), lora_prefix - ), ModelPatcher.apply_ti(tokenizer_info.context.model, text_encoder_info.context.model, ti_list) as ( - tokenizer, - ti_manager, - ), ModelPatcher.apply_clip_skip( - text_encoder_info.context.model, clip_field.skipped_layers - ), text_encoder_info as text_encoder: - text_inputs = tokenizer( - prompt, - padding="max_length", - max_length=tokenizer.model_max_length, - truncation=True, - return_tensors="pt", - ) - text_input_ids = text_inputs.input_ids - prompt_embeds = text_encoder( - text_input_ids.to(text_encoder.device), - output_hidden_states=True, + # return zero on empty + if prompt == "" and zero_on_empty: + cpu_text_encoder = text_encoder_info.context.model + c = torch.zeros( + (1, cpu_text_encoder.config.max_position_embeddings, cpu_text_encoder.config.hidden_size), + dtype=text_encoder_info.context.cache.precision, ) if get_pooled: - c_pooled = prompt_embeds[0] + c_pooled = torch.zeros( + (1, cpu_text_encoder.config.hidden_size), + dtype=c.dtype, + ) else: c_pooled = None - c = prompt_embeds.hidden_states[-2] - - del tokenizer - del text_encoder - del tokenizer_info - del text_encoder_info - - c = c.detach().to("cpu") - if c_pooled is not None: - c_pooled = c_pooled.detach().to("cpu") - - return c, c_pooled, None - - def run_clip_compel(self, context, clip_field, prompt, get_pooled, lora_prefix): - tokenizer_info = context.services.model_manager.get_model( - **clip_field.tokenizer.dict(), - context=context, - ) - text_encoder_info = context.services.model_manager.get_model( - **clip_field.text_encoder.dict(), - context=context, - ) + return c, c_pooled, None def _lora_loader(): for lora in clip_field.loras: @@ -366,11 +294,17 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase): @torch.no_grad() def invoke(self, context: InvocationContext) -> CompelOutput: - c1, c1_pooled, ec1 = self.run_clip_compel(context, self.clip, self.prompt, False, "lora_te1_") + c1, c1_pooled, ec1 = self.run_clip_compel( + context, self.clip, self.prompt, False, "lora_te1_", zero_on_empty=True + ) if self.style.strip() == "": - c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.prompt, True, "lora_te2_") + c2, c2_pooled, ec2 = self.run_clip_compel( + context, self.clip2, self.prompt, True, "lora_te2_", zero_on_empty=True + ) else: - c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True, "lora_te2_") + c2, c2_pooled, ec2 = self.run_clip_compel( + context, self.clip2, self.style, True, "lora_te2_", zero_on_empty=True + ) original_size = (self.original_height, self.original_width) crop_coords = (self.crop_top, self.crop_left) @@ -425,118 +359,7 @@ class SDXLRefinerCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase @torch.no_grad() def invoke(self, context: InvocationContext) -> CompelOutput: # TODO: if there will appear lora for refiner - write proper prefix - c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True, "") - - original_size = (self.original_height, self.original_width) - crop_coords = (self.crop_top, self.crop_left) - - add_time_ids = torch.tensor([original_size + crop_coords + (self.aesthetic_score,)]) - - conditioning_data = ConditioningFieldData( - conditionings=[ - SDXLConditioningInfo( - embeds=c2, - pooled_embeds=c2_pooled, - add_time_ids=add_time_ids, - extra_conditioning=ec2, # or None - ) - ] - ) - - conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning" - context.services.latents.save(conditioning_name, conditioning_data) - - return CompelOutput( - conditioning=ConditioningField( - conditioning_name=conditioning_name, - ), - ) - - -class SDXLRawPromptInvocation(BaseInvocation, SDXLPromptInvocationBase): - """Pass unmodified prompt to conditioning without compel processing.""" - - type: Literal["sdxl_raw_prompt"] = "sdxl_raw_prompt" - - prompt: str = Field(default="", description="Prompt") - style: str = Field(default="", description="Style prompt") - original_width: int = Field(1024, description="") - original_height: int = Field(1024, description="") - crop_top: int = Field(0, description="") - crop_left: int = Field(0, description="") - target_width: int = Field(1024, description="") - target_height: int = Field(1024, description="") - clip: ClipField = Field(None, description="Clip to use") - clip2: ClipField = Field(None, description="Clip2 to use") - - # Schema customisation - class Config(InvocationConfig): - schema_extra = { - "ui": {"title": "SDXL Prompt (Raw)", "tags": ["prompt", "compel"], "type_hints": {"model": "model"}}, - } - - @torch.no_grad() - def invoke(self, context: InvocationContext) -> CompelOutput: - c1, c1_pooled, ec1 = self.run_clip_raw(context, self.clip, self.prompt, False, "lora_te1_") - if self.style.strip() == "": - c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.prompt, True, "lora_te2_") - else: - c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.style, True, "lora_te2_") - - original_size = (self.original_height, self.original_width) - crop_coords = (self.crop_top, self.crop_left) - target_size = (self.target_height, self.target_width) - - add_time_ids = torch.tensor([original_size + crop_coords + target_size]) - - conditioning_data = ConditioningFieldData( - conditionings=[ - SDXLConditioningInfo( - embeds=torch.cat([c1, c2], dim=-1), - pooled_embeds=c2_pooled, - add_time_ids=add_time_ids, - extra_conditioning=ec1, - ) - ] - ) - - conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning" - context.services.latents.save(conditioning_name, conditioning_data) - - return CompelOutput( - conditioning=ConditioningField( - conditioning_name=conditioning_name, - ), - ) - - -class SDXLRefinerRawPromptInvocation(BaseInvocation, SDXLPromptInvocationBase): - """Parse prompt using compel package to conditioning.""" - - type: Literal["sdxl_refiner_raw_prompt"] = "sdxl_refiner_raw_prompt" - - style: str = Field(default="", description="Style prompt") # TODO: ? - original_width: int = Field(1024, description="") - original_height: int = Field(1024, description="") - crop_top: int = Field(0, description="") - crop_left: int = Field(0, description="") - aesthetic_score: float = Field(6.0, description="") - clip2: ClipField = Field(None, description="Clip to use") - - # Schema customisation - class Config(InvocationConfig): - schema_extra = { - "ui": { - "title": "SDXL Refiner Prompt (Raw)", - "tags": ["prompt", "compel"], - "type_hints": {"model": "model"}, - }, - } - - @torch.no_grad() - def invoke(self, context: InvocationContext) -> CompelOutput: - # TODO: if there will appear lora for refiner - write proper prefix - c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.style, True, "") + c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True, "", zero_on_empty=False) original_size = (self.original_height, self.original_width) crop_coords = (self.crop_top, self.crop_left) diff --git a/invokeai/app/invocations/generate.py b/invokeai/app/invocations/generate.py deleted file mode 100644 index c9d82ae8de..0000000000 --- a/invokeai/app/invocations/generate.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654) - -from contextlib import contextmanager, ContextDecorator -from functools import partial -from typing import Literal, Optional, get_args - -from pydantic import Field - -from invokeai.app.models.image import ColorField, ImageCategory, ImageField, ResourceOrigin -from invokeai.app.util.misc import SEED_MAX, get_random_seed -from invokeai.backend.generator.inpaint import infill_methods -from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext -from .compel import ConditioningField -from .image import ImageOutput -from .model import UNetField, VaeField -from ..util.step_callback import stable_diffusion_step_callback -from ...backend.generator import Inpaint, InvokeAIGenerator -from ...backend.model_management.lora import ModelPatcher -from ...backend.stable_diffusion import PipelineIntermediateState -from ...backend.stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline - -SAMPLER_NAME_VALUES = Literal[tuple(InvokeAIGenerator.schedulers())] -INFILL_METHODS = Literal[tuple(infill_methods())] -DEFAULT_INFILL_METHOD = "patchmatch" if "patchmatch" in get_args(INFILL_METHODS) else "tile" - - -from .latent import get_scheduler - - -class OldModelContext(ContextDecorator): - model: StableDiffusionGeneratorPipeline - - def __init__(self, model): - self.model = model - - def __enter__(self): - return self.model - - def __exit__(self, *exc): - return False - - -class OldModelInfo: - name: str - hash: str - context: OldModelContext - - def __init__(self, name: str, hash: str, model: StableDiffusionGeneratorPipeline): - self.name = name - self.hash = hash - self.context = OldModelContext( - model=model, - ) - - -class InpaintInvocation(BaseInvocation): - """Generates an image using inpaint.""" - - type: Literal["inpaint"] = "inpaint" - - positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation") - negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation") - seed: int = Field( - ge=0, le=SEED_MAX, description="The seed to use (omit for random)", default_factory=get_random_seed - ) - steps: int = Field(default=30, gt=0, description="The number of steps to use to generate the image") - width: int = Field( - default=512, - multiple_of=8, - gt=0, - description="The width of the resulting image", - ) - height: int = Field( - default=512, - multiple_of=8, - gt=0, - description="The height of the resulting image", - ) - cfg_scale: float = Field( - default=7.5, - ge=1, - description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", - ) - scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use") - unet: UNetField = Field(default=None, description="UNet model") - vae: VaeField = Field(default=None, description="Vae model") - - # Inputs - image: Optional[ImageField] = Field(description="The input image") - strength: float = Field(default=0.75, gt=0, le=1, description="The strength of the original image") - fit: bool = Field( - default=True, - description="Whether or not the result should be fit to the aspect ratio of the input image", - ) - - # Inputs - mask: Optional[ImageField] = Field(description="The mask") - seam_size: int = Field(default=96, ge=1, description="The seam inpaint size (px)") - seam_blur: int = Field(default=16, ge=0, description="The seam inpaint blur radius (px)") - seam_strength: float = Field(default=0.75, gt=0, le=1, description="The seam inpaint strength") - seam_steps: int = Field(default=30, ge=1, description="The number of steps to use for seam inpaint") - tile_size: int = Field(default=32, ge=1, description="The tile infill method size (px)") - infill_method: INFILL_METHODS = Field( - default=DEFAULT_INFILL_METHOD, - description="The method used to infill empty regions (px)", - ) - inpaint_width: Optional[int] = Field( - default=None, - multiple_of=8, - gt=0, - description="The width of the inpaint region (px)", - ) - inpaint_height: Optional[int] = Field( - default=None, - multiple_of=8, - gt=0, - description="The height of the inpaint region (px)", - ) - inpaint_fill: Optional[ColorField] = Field( - default=ColorField(r=127, g=127, b=127, a=255), - description="The solid infill method color", - ) - inpaint_replace: float = Field( - default=0.0, - ge=0.0, - le=1.0, - description="The amount by which to replace masked areas with latent noise", - ) - - # Schema customisation - class Config(InvocationConfig): - schema_extra = { - "ui": {"tags": ["stable-diffusion", "image"], "title": "Inpaint"}, - } - - def dispatch_progress( - self, - context: InvocationContext, - source_node_id: str, - intermediate_state: PipelineIntermediateState, - ) -> None: - stable_diffusion_step_callback( - context=context, - intermediate_state=intermediate_state, - node=self.dict(), - source_node_id=source_node_id, - ) - - def get_conditioning(self, context, unet): - positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name) - c = positive_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype) - extra_conditioning_info = positive_cond_data.conditionings[0].extra_conditioning - - negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name) - uc = negative_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype) - - return (uc, c, extra_conditioning_info) - - @contextmanager - def load_model_old_way(self, context, scheduler): - def _lora_loader(): - for lora in self.unet.loras: - lora_info = context.services.model_manager.get_model( - **lora.dict(exclude={"weight"}), - context=context, - ) - yield (lora_info.context.model, lora.weight) - del lora_info - return - - unet_info = context.services.model_manager.get_model( - **self.unet.unet.dict(), - context=context, - ) - vae_info = context.services.model_manager.get_model( - **self.vae.vae.dict(), - context=context, - ) - - with vae_info as vae, ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()), unet_info as unet: - device = context.services.model_manager.mgr.cache.execution_device - dtype = context.services.model_manager.mgr.cache.precision - - vae.to(dtype=unet.dtype) - - pipeline = StableDiffusionGeneratorPipeline( - vae=vae, - text_encoder=None, - tokenizer=None, - unet=unet, - scheduler=scheduler, - safety_checker=None, - feature_extractor=None, - requires_safety_checker=False, - ) - - yield OldModelInfo( - name=self.unet.unet.model_name, - hash="", - model=pipeline, - ) - - def invoke(self, context: InvocationContext) -> ImageOutput: - image = None if self.image is None else context.services.images.get_pil_image(self.image.image_name) - mask = None if self.mask is None else context.services.images.get_pil_image(self.mask.image_name) - - # Get the source node id (we are invoking the prepared node) - graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id) - source_node_id = graph_execution_state.prepared_source_mapping[self.id] - - scheduler = get_scheduler( - context=context, - scheduler_info=self.unet.scheduler, - scheduler_name=self.scheduler, - ) - - with self.load_model_old_way(context, scheduler) as model: - conditioning = self.get_conditioning(context, model.context.model.unet) - - outputs = Inpaint(model).generate( - conditioning=conditioning, - scheduler=scheduler, - init_image=image, - mask_image=mask, - step_callback=partial(self.dispatch_progress, context, source_node_id), - **self.dict( - exclude={"positive_conditioning", "negative_conditioning", "scheduler", "image", "mask"} - ), # Shorthand for passing all of the parameters above manually - ) - - # Outputs is an infinite iterator that will return a new InvokeAIGeneratorOutput object - # each time it is called. We only need the first one. - generator_output = next(outputs) - - image_dto = context.services.images.create( - image=generator_output.image, - image_origin=ResourceOrigin.INTERNAL, - image_category=ImageCategory.GENERAL, - session_id=context.graph_execution_state_id, - node_id=self.id, - is_intermediate=self.is_intermediate, - ) - - return ImageOutput( - image=ImageField(image_name=image_dto.image_name), - width=image_dto.width, - height=image_dto.height, - ) diff --git a/invokeai/app/invocations/image.py b/invokeai/app/invocations/image.py index 10efaf776d..2c47020207 100644 --- a/invokeai/app/invocations/image.py +++ b/invokeai/app/invocations/image.py @@ -1,29 +1,19 @@ # Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654) -from typing import Literal, Optional - -import numpy -import cv2 -from PIL import Image, ImageFilter, ImageOps, ImageChops -from pydantic import Field from pathlib import Path -from typing import Union +from typing import Literal, Optional, Union + +import cv2 +import numpy +from PIL import Image, ImageChops, ImageFilter, ImageOps +from pydantic import Field + from invokeai.app.invocations.metadata import CoreMetadata -from ..models.image import ( - ImageCategory, - ImageField, - ResourceOrigin, - PILInvocationConfig, - ImageOutput, - MaskOutput, -) -from .baseinvocation import ( - BaseInvocation, - InvocationContext, - InvocationConfig, -) -from invokeai.backend.image_util.safety_checker import SafetyChecker from invokeai.backend.image_util.invisible_watermark import InvisibleWatermark +from invokeai.backend.image_util.safety_checker import SafetyChecker + +from ..models.image import ImageCategory, ImageField, ImageOutput, MaskOutput, PILInvocationConfig, ResourceOrigin +from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext class LoadImageInvocation(BaseInvocation): @@ -143,9 +133,10 @@ class ImagePasteInvocation(BaseInvocation, PILInvocationConfig): def invoke(self, context: InvocationContext) -> ImageOutput: base_image = context.services.images.get_pil_image(self.base_image.image_name) image = context.services.images.get_pil_image(self.image.image_name) - mask = ( - None if self.mask is None else ImageOps.invert(context.services.images.get_pil_image(self.mask.image_name)) - ) + mask = None + if self.mask is not None: + mask = context.services.images.get_pil_image(self.mask.image_name) + mask = ImageOps.invert(mask.convert("L")) # TODO: probably shouldn't invert mask here... should user be required to do it? min_x = min(0, self.x) @@ -653,6 +644,195 @@ class ImageWatermarkInvocation(BaseInvocation, PILInvocationConfig): ) +class MaskEdgeInvocation(BaseInvocation, PILInvocationConfig): + """Applies an edge mask to an image""" + + # fmt: off + type: Literal["mask_edge"] = "mask_edge" + + # Inputs + image: Optional[ImageField] = Field(default=None, description="The image to apply the mask to") + edge_size: int = Field(description="The size of the edge") + edge_blur: int = Field(description="The amount of blur on the edge") + low_threshold: int = Field(description="First threshold for the hysteresis procedure in Canny edge detection") + high_threshold: int = Field(description="Second threshold for the hysteresis procedure in Canny edge detection") + # fmt: on + + def invoke(self, context: InvocationContext) -> MaskOutput: + mask = context.services.images.get_pil_image(self.image.image_name) + + npimg = numpy.asarray(mask, dtype=numpy.uint8) + npgradient = numpy.uint8(255 * (1.0 - numpy.floor(numpy.abs(0.5 - numpy.float32(npimg) / 255.0) * 2.0))) + npedge = cv2.Canny(npimg, threshold1=self.low_threshold, threshold2=self.high_threshold) + npmask = npgradient + npedge + npmask = cv2.dilate(npmask, numpy.ones((3, 3), numpy.uint8), iterations=int(self.edge_size / 2)) + + new_mask = Image.fromarray(npmask) + + if self.edge_blur > 0: + new_mask = new_mask.filter(ImageFilter.BoxBlur(self.edge_blur)) + + new_mask = ImageOps.invert(new_mask) + + image_dto = context.services.images.create( + image=new_mask, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.MASK, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + ) + + return MaskOutput( + mask=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) + + +class MaskCombineInvocation(BaseInvocation, PILInvocationConfig): + """Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`.""" + + # fmt: off + type: Literal["mask_combine"] = "mask_combine" + + # Inputs + mask1: ImageField = Field(default=None, description="The first mask to combine") + mask2: ImageField = Field(default=None, description="The second image to combine") + # fmt: on + + class Config(InvocationConfig): + schema_extra = { + "ui": {"title": "Mask Combine", "tags": ["mask", "combine"]}, + } + + def invoke(self, context: InvocationContext) -> ImageOutput: + mask1 = context.services.images.get_pil_image(self.mask1.image_name).convert("L") + mask2 = context.services.images.get_pil_image(self.mask2.image_name).convert("L") + + combined_mask = ImageChops.multiply(mask1, mask2) + + image_dto = context.services.images.create( + image=combined_mask, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.GENERAL, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + ) + + return ImageOutput( + image=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) + + +class ColorCorrectInvocation(BaseInvocation, PILInvocationConfig): + """ + Shifts the colors of a target image to match the reference image, optionally + using a mask to only color-correct certain regions of the target image. + """ + + type: Literal["color_correct"] = "color_correct" + + image: Optional[ImageField] = Field(default=None, description="The image to color-correct") + reference: Optional[ImageField] = Field(default=None, description="Reference image for color-correction") + mask: Optional[ImageField] = Field(default=None, description="Mask to use when applying color-correction") + mask_blur_radius: float = Field(default=8, description="Mask blur radius") + + def invoke(self, context: InvocationContext) -> ImageOutput: + pil_init_mask = None + if self.mask is not None: + pil_init_mask = context.services.images.get_pil_image(self.mask.image_name).convert("L") + + init_image = context.services.images.get_pil_image(self.reference.image_name) + + result = context.services.images.get_pil_image(self.image.image_name).convert("RGBA") + + # if init_image is None or init_mask is None: + # return result + + # Get the original alpha channel of the mask if there is one. + # Otherwise it is some other black/white image format ('1', 'L' or 'RGB') + # pil_init_mask = ( + # init_mask.getchannel("A") + # if init_mask.mode == "RGBA" + # else init_mask.convert("L") + # ) + pil_init_image = init_image.convert("RGBA") # Add an alpha channel if one doesn't exist + + # Build an image with only visible pixels from source to use as reference for color-matching. + init_rgb_pixels = numpy.asarray(init_image.convert("RGB"), dtype=numpy.uint8) + init_a_pixels = numpy.asarray(pil_init_image.getchannel("A"), dtype=numpy.uint8) + init_mask_pixels = numpy.asarray(pil_init_mask, dtype=numpy.uint8) + + # Get numpy version of result + np_image = numpy.asarray(result.convert("RGB"), dtype=numpy.uint8) + + # Mask and calculate mean and standard deviation + mask_pixels = init_a_pixels * init_mask_pixels > 0 + np_init_rgb_pixels_masked = init_rgb_pixels[mask_pixels, :] + np_image_masked = np_image[mask_pixels, :] + + if np_init_rgb_pixels_masked.size > 0: + init_means = np_init_rgb_pixels_masked.mean(axis=0) + init_std = np_init_rgb_pixels_masked.std(axis=0) + gen_means = np_image_masked.mean(axis=0) + gen_std = np_image_masked.std(axis=0) + + # Color correct + np_matched_result = np_image.copy() + np_matched_result[:, :, :] = ( + ( + ( + (np_matched_result[:, :, :].astype(numpy.float32) - gen_means[None, None, :]) + / gen_std[None, None, :] + ) + * init_std[None, None, :] + + init_means[None, None, :] + ) + .clip(0, 255) + .astype(numpy.uint8) + ) + matched_result = Image.fromarray(np_matched_result, mode="RGB") + else: + matched_result = Image.fromarray(np_image, mode="RGB") + + # Blur the mask out (into init image) by specified amount + if self.mask_blur_radius > 0: + nm = numpy.asarray(pil_init_mask, dtype=numpy.uint8) + nmd = cv2.erode( + nm, + kernel=numpy.ones((3, 3), dtype=numpy.uint8), + iterations=int(self.mask_blur_radius / 2), + ) + pmd = Image.fromarray(nmd, mode="L") + blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(self.mask_blur_radius)) + else: + blurred_init_mask = pil_init_mask + + multiplied_blurred_init_mask = ImageChops.multiply(blurred_init_mask, result.split()[-1]) + + # Paste original on color-corrected generation (using blurred mask) + matched_result.paste(init_image, (0, 0), mask=multiplied_blurred_init_mask) + + image_dto = context.services.images.create( + image=matched_result, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.GENERAL, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + ) + + return ImageOutput( + image=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) + + class ImageHueAdjustmentInvocation(BaseInvocation): """Adjusts the Hue of an image.""" diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index c15f84ddd0..c66c9c6214 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -5,6 +5,7 @@ from typing import List, Literal, Optional, Union import einops import torch +import torchvision.transforms as T from diffusers.image_processor import VaeImageProcessor from diffusers.models.attention_processor import ( AttnProcessor2_0, @@ -12,20 +13,16 @@ from diffusers.models.attention_processor import ( LoRAXFormersAttnProcessor, XFormersAttnProcessor, ) -from diffusers.schedulers import SchedulerMixin as Scheduler +from diffusers.schedulers import DPMSolverSDEScheduler, SchedulerMixin as Scheduler from pydantic import BaseModel, Field, validator +from torchvision.transforms.functional import resize as tv_resize from invokeai.app.invocations.metadata import CoreMetadata from invokeai.app.util.controlnet_utils import prepare_control_image from invokeai.app.util.step_callback import stable_diffusion_step_callback from invokeai.backend.model_management.models import ModelType, SilenceWarnings -from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext -from .compel import ConditioningField -from .controlnet_image_processors import ControlField -from .image import ImageOutput -from .model import ModelInfo, UNetField, VaeField -from ..models.image import ImageCategory, ImageField, ResourceOrigin -from ...backend.model_management import ModelPatcher + +from ...backend.model_management import BaseModelType, ModelPatcher from ...backend.stable_diffusion import PipelineIntermediateState from ...backend.stable_diffusion.diffusers_pipeline import ( ConditioningData, @@ -35,7 +32,13 @@ from ...backend.stable_diffusion.diffusers_pipeline import ( ) from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import PostprocessingSettings from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP -from ...backend.util.devices import choose_torch_device, torch_dtype, choose_precision +from ...backend.util.devices import choose_precision, choose_torch_device, torch_dtype +from ..models.image import ImageCategory, ImageField, ResourceOrigin +from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext +from .compel import ConditioningField +from .controlnet_image_processors import ControlField +from .image import ImageOutput +from .model import ModelInfo, UNetField, VaeField DEFAULT_PRECISION = choose_precision(choose_torch_device()) @@ -44,6 +47,7 @@ class LatentsField(BaseModel): """A latents field used for passing latents between invocations""" latents_name: Optional[str] = Field(default=None, description="The name of the latents") + seed: Optional[int] = Field(description="Seed used to generate this latents") class Config: schema_extra = {"required": ["latents_name"]} @@ -62,9 +66,9 @@ class LatentsOutput(BaseInvocationOutput): # fmt: on -def build_latents_output(latents_name: str, latents: torch.Tensor): +def build_latents_output(latents_name: str, latents: torch.Tensor, seed: Optional[int]): return LatentsOutput( - latents=LatentsField(latents_name=latents_name), + latents=LatentsField(latents_name=latents_name, seed=seed), width=latents.size()[3] * 8, height=latents.size()[2] * 8, ) @@ -77,6 +81,7 @@ def get_scheduler( context: InvocationContext, scheduler_info: ModelInfo, scheduler_name: str, + seed: int, ) -> Scheduler: scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP["ddim"]) orig_scheduler_info = context.services.model_manager.get_model( @@ -93,6 +98,11 @@ def get_scheduler( **scheduler_extra_config, "_backup": scheduler_config, } + + # make dpmpp_sde reproducable(seed can be passed only in initializer) + if scheduler_class is DPMSolverSDEScheduler: + scheduler_config["noise_sampler_seed"] = seed + scheduler = scheduler_class.from_config(scheduler_config) # hack copied over from generate.py @@ -101,25 +111,31 @@ def get_scheduler( return scheduler -# Text to image -class TextToLatentsInvocation(BaseInvocation): - """Generates latents from conditionings.""" +class DenoiseLatentsInvocation(BaseInvocation): + """Denoises noisy latents to decodable images""" - type: Literal["t2l"] = "t2l" + type: Literal["denoise_latents"] = "denoise_latents" # Inputs - # fmt: off positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation") negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation") noise: Optional[LatentsField] = Field(description="The noise to use") - steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") - cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", ) - scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" ) + steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") + cfg_scale: Union[float, List[float]] = Field( + default=7.5, + ge=1, + description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", + ) + denoising_start: float = Field(default=0.0, ge=0, le=1, description="") + denoising_end: float = Field(default=1.0, ge=0, le=1, description="") + scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use") unet: UNetField = Field(default=None, description="UNet submodel") control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use") - # seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", ) - # seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'") - # fmt: on + latents: Optional[LatentsField] = Field(description="The latents to use as a base image") + mask: Optional[ImageField] = Field( + None, + description="Mask", + ) @validator("cfg_scale") def ge_one(cls, v): @@ -137,12 +153,11 @@ class TextToLatentsInvocation(BaseInvocation): class Config(InvocationConfig): schema_extra = { "ui": { - "title": "Text To Latents", - "tags": ["latents"], + "title": "Denoise Latents", + "tags": ["denoise", "latents"], "type_hints": { "model": "model", "control": "control", - # "cfg_scale": "float", "cfg_scale": "number", }, }, @@ -154,12 +169,14 @@ class TextToLatentsInvocation(BaseInvocation): context: InvocationContext, source_node_id: str, intermediate_state: PipelineIntermediateState, + base_model: BaseModelType, ) -> None: stable_diffusion_step_callback( context=context, intermediate_state=intermediate_state, node=self.dict(), source_node_id=source_node_id, + base_model=base_model, ) def get_conditioning_data( @@ -167,13 +184,14 @@ class TextToLatentsInvocation(BaseInvocation): context: InvocationContext, scheduler, unet, + seed, ) -> ConditioningData: positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name) - c = positive_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype) - extra_conditioning_info = positive_cond_data.conditionings[0].extra_conditioning + c = positive_cond_data.conditionings[0].to(device=unet.device, dtype=unet.dtype) + extra_conditioning_info = c.extra_conditioning negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name) - uc = negative_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype) + uc = negative_cond_data.conditionings[0].to(device=unet.device, dtype=unet.dtype) conditioning_data = ConditioningData( unconditioned_embeddings=uc, @@ -193,7 +211,8 @@ class TextToLatentsInvocation(BaseInvocation): # for ddim scheduler eta=0.0, # ddim_eta # for ancestral and sde schedulers - generator=torch.Generator(device=unet.device).manual_seed(0), + # flip all bits to have noise different from initial + generator=torch.Generator(device=unet.device).manual_seed(seed ^ 0xFFFFFFFF), ) return conditioning_data @@ -304,110 +323,83 @@ class TextToLatentsInvocation(BaseInvocation): # MultiControlNetModel has been refactored out, just need list[ControlNetData] return control_data - @torch.no_grad() - def invoke(self, context: InvocationContext) -> LatentsOutput: - with SilenceWarnings(): - noise = context.services.latents.get(self.noise.latents_name) + # original idea by https://github.com/AmericanPresidentJimmyCarter + # TODO: research more for second order schedulers timesteps + def init_scheduler(self, scheduler, device, steps, denoising_start, denoising_end): + num_inference_steps = steps + if scheduler.config.get("cpu_only", False): + scheduler.set_timesteps(num_inference_steps, device="cpu") + timesteps = scheduler.timesteps.to(device=device) + else: + scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = scheduler.timesteps - # Get the source node id (we are invoking the prepared node) - graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id) - source_node_id = graph_execution_state.prepared_source_mapping[self.id] + # apply denoising_start + t_start_val = int(round(scheduler.config.num_train_timesteps * (1 - denoising_start))) + t_start_idx = len(list(filter(lambda ts: ts >= t_start_val, timesteps))) + timesteps = timesteps[t_start_idx:] + if scheduler.order == 2 and t_start_idx > 0: + timesteps = timesteps[1:] - def step_callback(state: PipelineIntermediateState): - self.dispatch_progress(context, source_node_id, state) + # save start timestep to apply noise + init_timestep = timesteps[:1] - def _lora_loader(): - for lora in self.unet.loras: - lora_info = context.services.model_manager.get_model( - **lora.dict(exclude={"weight"}), - context=context, - ) - yield (lora_info.context.model, lora.weight) - del lora_info - return + # apply denoising_end + t_end_val = int(round(scheduler.config.num_train_timesteps * (1 - denoising_end))) + t_end_idx = len(list(filter(lambda ts: ts >= t_end_val, timesteps))) + if scheduler.order == 2 and t_end_idx > 0: + t_end_idx += 1 + timesteps = timesteps[:t_end_idx] - unet_info = context.services.model_manager.get_model( - **self.unet.unet.dict(), - context=context, - ) - with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet( - unet_info.context.model, _lora_loader() - ), unet_info as unet: - noise = noise.to(device=unet.device, dtype=unet.dtype) + # calculate step count based on scheduler order + num_inference_steps = len(timesteps) + if scheduler.order == 2: + num_inference_steps += num_inference_steps % 2 + num_inference_steps = num_inference_steps // 2 - scheduler = get_scheduler( - context=context, - scheduler_info=self.unet.scheduler, - scheduler_name=self.scheduler, - ) + return num_inference_steps, timesteps, init_timestep - pipeline = self.create_pipeline(unet, scheduler) - conditioning_data = self.get_conditioning_data(context, scheduler, unet) + def prep_mask_tensor(self, mask, context, lantents): + if mask is None: + return None - control_data = self.prep_control_data( - model=pipeline, - context=context, - control_input=self.control, - latents_shape=noise.shape, - # do_classifier_free_guidance=(self.cfg_scale >= 1.0)) - do_classifier_free_guidance=True, - exit_stack=exit_stack, - ) - - # TODO: Verify the noise is the right size - result_latents, result_attention_map_saver = pipeline.latents_from_embeddings( - latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)), - noise=noise, - num_inference_steps=self.steps, - conditioning_data=conditioning_data, - control_data=control_data, # list[ControlNetData] - callback=step_callback, - ) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - result_latents = result_latents.to("cpu") - torch.cuda.empty_cache() - - name = f"{context.graph_execution_state_id}__{self.id}" - context.services.latents.save(name, result_latents) - return build_latents_output(latents_name=name, latents=result_latents) - - -class LatentsToLatentsInvocation(TextToLatentsInvocation): - """Generates latents using latents as base image.""" - - type: Literal["l2l"] = "l2l" - - # Inputs - latents: Optional[LatentsField] = Field(description="The latents to use as a base image") - strength: float = Field(default=0.7, ge=0, le=1, description="The strength of the latents to use") - - # Schema customisation - class Config(InvocationConfig): - schema_extra = { - "ui": { - "title": "Latent To Latents", - "tags": ["latents"], - "type_hints": { - "model": "model", - "control": "control", - "cfg_scale": "number", - }, - }, - } + mask_image = context.services.images.get_pil_image(mask.image_name) + if mask_image.mode != "L": + # FIXME: why do we get passed an RGB image here? We can only use single-channel. + mask_image = mask_image.convert("L") + mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) + if mask_tensor.dim() == 3: + mask_tensor = mask_tensor.unsqueeze(0) + mask_tensor = tv_resize(mask_tensor, lantents.shape[-2:], T.InterpolationMode.BILINEAR) + return 1 - mask_tensor @torch.no_grad() def invoke(self, context: InvocationContext) -> LatentsOutput: with SilenceWarnings(): # this quenches NSFW nag from diffusers - noise = context.services.latents.get(self.noise.latents_name) - latent = context.services.latents.get(self.latents.latents_name) + seed = None + noise = None + if self.noise is not None: + noise = context.services.latents.get(self.noise.latents_name) + seed = self.noise.seed + + if self.latents is not None: + latents = context.services.latents.get(self.latents.latents_name) + if seed is None: + seed = self.latents.seed + else: + latents = torch.zeros_like(noise) + + if seed is None: + seed = 0 + + mask = self.prep_mask_tensor(self.mask, context, latents) # Get the source node id (we are invoking the prepared node) graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id) source_node_id = graph_execution_state.prepared_source_mapping[self.id] def step_callback(state: PipelineIntermediateState): - self.dispatch_progress(context, source_node_id, state) + self.dispatch_progress(context, source_node_id, state, self.unet.unet.base_model) def _lora_loader(): for lora in self.unet.loras: @@ -426,44 +418,48 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation): with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet( unet_info.context.model, _lora_loader() ), unet_info as unet: - noise = noise.to(device=unet.device, dtype=unet.dtype) - latent = latent.to(device=unet.device, dtype=unet.dtype) + latents = latents.to(device=unet.device, dtype=unet.dtype) + if noise is not None: + noise = noise.to(device=unet.device, dtype=unet.dtype) + if mask is not None: + mask = mask.to(device=unet.device, dtype=unet.dtype) scheduler = get_scheduler( context=context, scheduler_info=self.unet.scheduler, scheduler_name=self.scheduler, + seed=seed, ) pipeline = self.create_pipeline(unet, scheduler) - conditioning_data = self.get_conditioning_data(context, scheduler, unet) + conditioning_data = self.get_conditioning_data(context, scheduler, unet, seed) control_data = self.prep_control_data( model=pipeline, context=context, control_input=self.control, - latents_shape=noise.shape, + latents_shape=latents.shape, # do_classifier_free_guidance=(self.cfg_scale >= 1.0)) do_classifier_free_guidance=True, exit_stack=exit_stack, ) - # TODO: Verify the noise is the right size - initial_latents = ( - latent if self.strength < 1.0 else torch.zeros_like(latent, device=unet.device, dtype=latent.dtype) - ) - - timesteps, _ = pipeline.get_img2img_timesteps( - self.steps, - self.strength, + num_inference_steps, timesteps, init_timestep = self.init_scheduler( + scheduler, device=unet.device, + steps=self.steps, + denoising_start=self.denoising_start, + denoising_end=self.denoising_end, ) result_latents, result_attention_map_saver = pipeline.latents_from_embeddings( - latents=initial_latents, + latents=latents, timesteps=timesteps, + init_timestep=init_timestep, noise=noise, - num_inference_steps=self.steps, + seed=seed, + mask=mask, + num_inference_steps=num_inference_steps, conditioning_data=conditioning_data, control_data=control_data, # list[ControlNetData] callback=step_callback, @@ -475,7 +471,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation): name = f"{context.graph_execution_state_id}__{self.id}" context.services.latents.save(name, result_latents) - return build_latents_output(latents_name=name, latents=result_latents) + return build_latents_output(latents_name=name, latents=result_latents, seed=seed) # Latent to image @@ -617,7 +613,7 @@ class ResizeLatentsInvocation(BaseInvocation): name = f"{context.graph_execution_state_id}__{self.id}" # context.services.latents.set(name, resized_latents) context.services.latents.save(name, resized_latents) - return build_latents_output(latents_name=name, latents=resized_latents) + return build_latents_output(latents_name=name, latents=resized_latents, seed=self.latents.seed) class ScaleLatentsInvocation(BaseInvocation): @@ -659,7 +655,7 @@ class ScaleLatentsInvocation(BaseInvocation): name = f"{context.graph_execution_state_id}__{self.id}" # context.services.latents.set(name, resized_latents) context.services.latents.save(name, resized_latents) - return build_latents_output(latents_name=name, latents=resized_latents) + return build_latents_output(latents_name=name, latents=resized_latents, seed=self.latents.seed) class ImageToLatentsInvocation(BaseInvocation): @@ -740,4 +736,4 @@ class ImageToLatentsInvocation(BaseInvocation): name = f"{context.graph_execution_state_id}__{self.id}" latents = latents.to("cpu") context.services.latents.save(name, latents) - return build_latents_output(latents_name=name, latents=latents) + return build_latents_output(latents_name=name, latents=latents, seed=None) diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index 4f51bf10b8..d0549f8539 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -67,7 +67,10 @@ class CoreMetadata(BaseModelExcludeNull): ) refiner_steps: Union[int, None] = Field(default=None, description="The number of steps used for the refiner") refiner_scheduler: Union[str, None] = Field(default=None, description="The scheduler used for the refiner") - refiner_aesthetic_store: Union[float, None] = Field( + refiner_positive_aesthetic_store: Union[float, None] = Field( + default=None, description="The aesthetic score used for the refiner" + ) + refiner_negative_aesthetic_store: Union[float, None] = Field( default=None, description="The aesthetic score used for the refiner" ) refiner_start: Union[float, None] = Field(default=None, description="The start value used for refiner denoising") @@ -136,7 +139,10 @@ class MetadataAccumulatorInvocation(BaseInvocation): ) refiner_steps: Union[int, None] = Field(default=None, description="The number of steps used for the refiner") refiner_scheduler: Union[str, None] = Field(default=None, description="The scheduler used for the refiner") - refiner_aesthetic_store: Union[float, None] = Field( + refiner_positive_aesthetic_score: Union[float, None] = Field( + default=None, description="The aesthetic score used for the refiner" + ) + refiner_negative_aesthetic_score: Union[float, None] = Field( default=None, description="The aesthetic score used for the refiner" ) refiner_start: Union[float, None] = Field(default=None, description="The start value used for refiner denoising") diff --git a/invokeai/app/invocations/noise.py b/invokeai/app/invocations/noise.py index fff0f29f14..db64e5b6e5 100644 --- a/invokeai/app/invocations/noise.py +++ b/invokeai/app/invocations/noise.py @@ -71,9 +71,9 @@ class NoiseOutput(BaseInvocationOutput): # fmt: on -def build_noise_output(latents_name: str, latents: torch.Tensor): +def build_noise_output(latents_name: str, latents: torch.Tensor, seed: int): return NoiseOutput( - noise=LatentsField(latents_name=latents_name), + noise=LatentsField(latents_name=latents_name, seed=seed), width=latents.size()[3] * 8, height=latents.size()[2] * 8, ) @@ -132,4 +132,4 @@ class NoiseInvocation(BaseInvocation): ) name = f"{context.graph_execution_state_id}__{self.id}" context.services.latents.save(name, noise) - return build_noise_output(latents_name=name, latents=noise) + return build_noise_output(latents_name=name, latents=noise, seed=self.seed) diff --git a/invokeai/app/invocations/onnx.py b/invokeai/app/invocations/onnx.py index fe9a64552e..4f04a4f023 100644 --- a/invokeai/app/invocations/onnx.py +++ b/invokeai/app/invocations/onnx.py @@ -212,6 +212,7 @@ class ONNXTextToLatentsInvocation(BaseInvocation): context=context, scheduler_info=self.unet.scheduler, scheduler_name=self.scheduler, + seed=0, # TODO: refactor this node ) def torch2numpy(latent: torch.Tensor): diff --git a/invokeai/app/invocations/sdxl.py b/invokeai/app/invocations/sdxl.py index 5bcd85db28..a5a1c2c641 100644 --- a/invokeai/app/invocations/sdxl.py +++ b/invokeai/app/invocations/sdxl.py @@ -1,17 +1,10 @@ import torch -import inspect -from tqdm import tqdm -from typing import List, Literal, Optional, Union +from typing import Literal +from pydantic import Field -from pydantic import Field, validator - -from ...backend.model_management import ModelType, SubModelType, ModelPatcher -from invokeai.app.util.step_callback import stable_diffusion_xl_step_callback +from ...backend.model_management import ModelType, SubModelType from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext - from .model import UNetField, ClipField, VaeField, MainModelField, ModelInfo -from .compel import ConditioningField -from .latent import LatentsField, SAMPLER_NAME_VALUES, LatentsOutput, get_scheduler, build_latents_output class SDXLModelLoaderOutput(BaseInvocationOutput): @@ -201,526 +194,3 @@ class SDXLRefinerModelLoaderInvocation(BaseInvocation): ), ), ) - - -# Text to image -class SDXLTextToLatentsInvocation(BaseInvocation): - """Generates latents from conditionings.""" - - type: Literal["t2l_sdxl"] = "t2l_sdxl" - - # Inputs - # fmt: off - positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation") - negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation") - noise: Optional[LatentsField] = Field(description="The noise to use") - steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") - cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", ) - scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" ) - unet: UNetField = Field(default=None, description="UNet submodel") - denoising_end: float = Field(default=1.0, gt=0, le=1, description="") - # control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use") - # seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", ) - # seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'") - # fmt: on - - @validator("cfg_scale") - def ge_one(cls, v): - """validate that all cfg_scale values are >= 1""" - if isinstance(v, list): - for i in v: - if i < 1: - raise ValueError("cfg_scale must be greater than 1") - else: - if v < 1: - raise ValueError("cfg_scale must be greater than 1") - return v - - # Schema customisation - class Config(InvocationConfig): - schema_extra = { - "ui": { - "title": "SDXL Text To Latents", - "tags": ["latents"], - "type_hints": { - "model": "model", - # "cfg_scale": "float", - "cfg_scale": "number", - }, - }, - } - - def dispatch_progress( - self, - context: InvocationContext, - source_node_id: str, - sample, - step, - total_steps, - ) -> None: - stable_diffusion_xl_step_callback( - context=context, - node=self.dict(), - source_node_id=source_node_id, - sample=sample, - step=step, - total_steps=total_steps, - ) - - # based on - # https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L375 - @torch.no_grad() - def invoke(self, context: InvocationContext) -> LatentsOutput: - graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id) - source_node_id = graph_execution_state.prepared_source_mapping[self.id] - latents = context.services.latents.get(self.noise.latents_name) - - positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name) - prompt_embeds = positive_cond_data.conditionings[0].embeds - pooled_prompt_embeds = positive_cond_data.conditionings[0].pooled_embeds - add_time_ids = positive_cond_data.conditionings[0].add_time_ids - - negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name) - negative_prompt_embeds = negative_cond_data.conditionings[0].embeds - negative_pooled_prompt_embeds = negative_cond_data.conditionings[0].pooled_embeds - add_neg_time_ids = negative_cond_data.conditionings[0].add_time_ids - - scheduler = get_scheduler( - context=context, - scheduler_info=self.unet.scheduler, - scheduler_name=self.scheduler, - ) - - num_inference_steps = self.steps - - def _lora_loader(): - for lora in self.unet.loras: - lora_info = context.services.model_manager.get_model( - **lora.dict(exclude={"weight"}), - context=context, - ) - yield (lora_info.context.model, lora.weight) - del lora_info - return - - unet_info = context.services.model_manager.get_model(**self.unet.unet.dict(), context=context) - do_classifier_free_guidance = True - cross_attention_kwargs = None - with ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()), unet_info as unet: - scheduler.set_timesteps(num_inference_steps, device=unet.device) - timesteps = scheduler.timesteps - - latents = latents.to(device=unet.device, dtype=unet.dtype) * scheduler.init_noise_sigma - - extra_step_kwargs = dict() - if "eta" in set(inspect.signature(scheduler.step).parameters.keys()): - extra_step_kwargs.update( - eta=0.0, - ) - if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): - extra_step_kwargs.update( - generator=torch.Generator(device=unet.device).manual_seed(0), - ) - - num_warmup_steps = len(timesteps) - self.steps * scheduler.order - - # apply denoising_end - skipped_final_steps = int(round((1 - self.denoising_end) * self.steps)) - num_inference_steps = num_inference_steps - skipped_final_steps - timesteps = timesteps[: num_warmup_steps + scheduler.order * num_inference_steps] - - if not context.services.configuration.sequential_guidance: - prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) - add_text_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0) - add_time_ids = torch.cat([add_neg_time_ids, add_time_ids], dim=0) - - prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype) - add_text_embeds = add_text_embeds.to(device=unet.device, dtype=unet.dtype) - add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype) - latents = latents.to(device=unet.device, dtype=unet.dtype) - - with tqdm(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - - latent_model_input = scheduler.scale_model_input(latent_model_input, t) - - # predict the noise residual - added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids} - noise_pred = unet( - latent_model_input, - t, - encoder_hidden_states=prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - added_cond_kwargs=added_cond_kwargs, - return_dict=False, - )[0] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond) - # del noise_pred_uncond - # del noise_pred_text - - # if do_classifier_free_guidance and guidance_rescale > 0.0: - # # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - # noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0] - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0): - progress_bar.update() - self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps) - # if callback is not None and i % callback_steps == 0: - # callback(i, t, latents) - else: - negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype) - negative_prompt_embeds = negative_prompt_embeds.to(device=unet.device, dtype=unet.dtype) - add_neg_time_ids = add_neg_time_ids.to(device=unet.device, dtype=unet.dtype) - pooled_prompt_embeds = pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype) - prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype) - add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype) - latents = latents.to(device=unet.device, dtype=unet.dtype) - - with tqdm(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # expand the latents if we are doing classifier free guidance - # latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - - latent_model_input = scheduler.scale_model_input(latents, t) - - # import gc - # gc.collect() - # torch.cuda.empty_cache() - - # predict the noise residual - - added_cond_kwargs = {"text_embeds": negative_pooled_prompt_embeds, "time_ids": add_neg_time_ids} - noise_pred_uncond = unet( - latent_model_input, - t, - encoder_hidden_states=negative_prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - added_cond_kwargs=added_cond_kwargs, - return_dict=False, - )[0] - - added_cond_kwargs = {"text_embeds": pooled_prompt_embeds, "time_ids": add_time_ids} - noise_pred_text = unet( - latent_model_input, - t, - encoder_hidden_states=prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - added_cond_kwargs=added_cond_kwargs, - return_dict=False, - )[0] - - # perform guidance - noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond) - - # del noise_pred_text - # del noise_pred_uncond - # import gc - # gc.collect() - # torch.cuda.empty_cache() - - # if do_classifier_free_guidance and guidance_rescale > 0.0: - # # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - # noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0] - - # del noise_pred - # import gc - # gc.collect() - # torch.cuda.empty_cache() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0): - progress_bar.update() - self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps) - # if callback is not None and i % callback_steps == 0: - # callback(i, t, latents) - - ################# - - latents = latents.to("cpu") - torch.cuda.empty_cache() - - name = f"{context.graph_execution_state_id}__{self.id}" - context.services.latents.save(name, latents) - return build_latents_output(latents_name=name, latents=latents) - - -class SDXLLatentsToLatentsInvocation(BaseInvocation): - """Generates latents from conditionings.""" - - type: Literal["l2l_sdxl"] = "l2l_sdxl" - - # Inputs - # fmt: off - positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation") - negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation") - noise: Optional[LatentsField] = Field(description="The noise to use") - steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image") - cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", ) - scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" ) - unet: UNetField = Field(default=None, description="UNet submodel") - latents: Optional[LatentsField] = Field(description="Initial latents") - - denoising_start: float = Field(default=0.0, ge=0, le=1, description="") - denoising_end: float = Field(default=1.0, ge=0, le=1, description="") - - # control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use") - # seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", ) - # seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'") - # fmt: on - - @validator("cfg_scale") - def ge_one(cls, v): - """validate that all cfg_scale values are >= 1""" - if isinstance(v, list): - for i in v: - if i < 1: - raise ValueError("cfg_scale must be greater than 1") - else: - if v < 1: - raise ValueError("cfg_scale must be greater than 1") - return v - - # Schema customisation - class Config(InvocationConfig): - schema_extra = { - "ui": { - "title": "SDXL Latents to Latents", - "tags": ["latents"], - "type_hints": { - "model": "model", - # "cfg_scale": "float", - "cfg_scale": "number", - }, - }, - } - - def dispatch_progress( - self, - context: InvocationContext, - source_node_id: str, - sample, - step, - total_steps, - ) -> None: - stable_diffusion_xl_step_callback( - context=context, - node=self.dict(), - source_node_id=source_node_id, - sample=sample, - step=step, - total_steps=total_steps, - ) - - # based on - # https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L375 - @torch.no_grad() - def invoke(self, context: InvocationContext) -> LatentsOutput: - graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id) - source_node_id = graph_execution_state.prepared_source_mapping[self.id] - latents = context.services.latents.get(self.latents.latents_name) - - positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name) - prompt_embeds = positive_cond_data.conditionings[0].embeds - pooled_prompt_embeds = positive_cond_data.conditionings[0].pooled_embeds - add_time_ids = positive_cond_data.conditionings[0].add_time_ids - - negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name) - negative_prompt_embeds = negative_cond_data.conditionings[0].embeds - negative_pooled_prompt_embeds = negative_cond_data.conditionings[0].pooled_embeds - add_neg_time_ids = negative_cond_data.conditionings[0].add_time_ids - - scheduler = get_scheduler( - context=context, - scheduler_info=self.unet.scheduler, - scheduler_name=self.scheduler, - ) - - unet_info = context.services.model_manager.get_model( - **self.unet.unet.dict(), - context=context, - ) - - def _lora_loader(): - for lora in self.unet.loras: - lora_info = context.services.model_manager.get_model( - **lora.dict(exclude={"weight"}), - context=context, - ) - yield (lora_info.context.model, lora.weight) - del lora_info - return - - do_classifier_free_guidance = True - cross_attention_kwargs = None - with ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()), unet_info as unet: - # apply denoising_start - num_inference_steps = self.steps - scheduler.set_timesteps(num_inference_steps, device=unet.device) - - t_start = int(round(self.denoising_start * num_inference_steps)) - timesteps = scheduler.timesteps[t_start * scheduler.order :] - num_inference_steps = num_inference_steps - t_start - - # apply noise(if provided) - if self.noise is not None and timesteps.shape[0] > 0: - noise = context.services.latents.get(self.noise.latents_name) - latents = scheduler.add_noise(latents, noise, timesteps[:1]) - del noise - - # apply scheduler extra args - extra_step_kwargs = dict() - if "eta" in set(inspect.signature(scheduler.step).parameters.keys()): - extra_step_kwargs.update( - eta=0.0, - ) - if "generator" in set(inspect.signature(scheduler.step).parameters.keys()): - extra_step_kwargs.update( - generator=torch.Generator(device=unet.device).manual_seed(0), - ) - - num_warmup_steps = max(len(timesteps) - num_inference_steps * scheduler.order, 0) - - # apply denoising_end - skipped_final_steps = int(round((1 - self.denoising_end) * self.steps)) - num_inference_steps = num_inference_steps - skipped_final_steps - timesteps = timesteps[: num_warmup_steps + scheduler.order * num_inference_steps] - - if not context.services.configuration.sequential_guidance: - prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) - add_text_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0) - add_time_ids = torch.cat([add_neg_time_ids, add_time_ids], dim=0) - - prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype) - add_text_embeds = add_text_embeds.to(device=unet.device, dtype=unet.dtype) - add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype) - latents = latents.to(device=unet.device, dtype=unet.dtype) - - with tqdm(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - - latent_model_input = scheduler.scale_model_input(latent_model_input, t) - - # predict the noise residual - added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids} - noise_pred = unet( - latent_model_input, - t, - encoder_hidden_states=prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - added_cond_kwargs=added_cond_kwargs, - return_dict=False, - )[0] - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond) - # del noise_pred_uncond - # del noise_pred_text - - # if do_classifier_free_guidance and guidance_rescale > 0.0: - # # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - # noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0] - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0): - progress_bar.update() - self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps) - # if callback is not None and i % callback_steps == 0: - # callback(i, t, latents) - else: - negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype) - negative_prompt_embeds = negative_prompt_embeds.to(device=unet.device, dtype=unet.dtype) - add_neg_time_ids = add_neg_time_ids.to(device=unet.device, dtype=unet.dtype) - pooled_prompt_embeds = pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype) - prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype) - add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype) - latents = latents.to(device=unet.device, dtype=unet.dtype) - - with tqdm(total=num_inference_steps) as progress_bar: - for i, t in enumerate(timesteps): - # expand the latents if we are doing classifier free guidance - # latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - - latent_model_input = scheduler.scale_model_input(latents, t) - - # import gc - # gc.collect() - # torch.cuda.empty_cache() - - # predict the noise residual - - added_cond_kwargs = {"text_embeds": negative_pooled_prompt_embeds, "time_ids": add_time_ids} - noise_pred_uncond = unet( - latent_model_input, - t, - encoder_hidden_states=negative_prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - added_cond_kwargs=added_cond_kwargs, - return_dict=False, - )[0] - - added_cond_kwargs = {"text_embeds": pooled_prompt_embeds, "time_ids": add_time_ids} - noise_pred_text = unet( - latent_model_input, - t, - encoder_hidden_states=prompt_embeds, - cross_attention_kwargs=cross_attention_kwargs, - added_cond_kwargs=added_cond_kwargs, - return_dict=False, - )[0] - - # perform guidance - noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond) - - # del noise_pred_text - # del noise_pred_uncond - # import gc - # gc.collect() - # torch.cuda.empty_cache() - - # if do_classifier_free_guidance and guidance_rescale > 0.0: - # # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf - # noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale) - - # compute the previous noisy sample x_t -> x_t-1 - latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0] - - # del noise_pred - # import gc - # gc.collect() - # torch.cuda.empty_cache() - - # call the callback, if provided - if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0): - progress_bar.update() - self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps) - # if callback is not None and i % callback_steps == 0: - # callback(i, t, latents) - - ################# - - latents = latents.to("cpu") - torch.cuda.empty_cache() - - name = f"{context.graph_execution_state_id}__{self.id}" - context.services.latents.save(name, latents) - return build_latents_output(latents_name=name, latents=latents) diff --git a/invokeai/app/services/default_graphs.py b/invokeai/app/services/default_graphs.py index cafb6f0339..caee5b631e 100644 --- a/invokeai/app/services/default_graphs.py +++ b/invokeai/app/services/default_graphs.py @@ -1,4 +1,4 @@ -from ..invocations.latent import LatentsToImageInvocation, TextToLatentsInvocation +from ..invocations.latent import LatentsToImageInvocation, DenoiseLatentsInvocation from ..invocations.image import ImageNSFWBlurInvocation from ..invocations.noise import NoiseInvocation from ..invocations.compel import CompelInvocation @@ -23,7 +23,7 @@ def create_text_to_image() -> LibraryGraph: "3": NoiseInvocation(id="3"), "4": CompelInvocation(id="4"), "5": CompelInvocation(id="5"), - "6": TextToLatentsInvocation(id="6"), + "6": DenoiseLatentsInvocation(id="6"), "7": LatentsToImageInvocation(id="7"), "8": ImageNSFWBlurInvocation(id="8"), }, diff --git a/invokeai/app/services/events.py b/invokeai/app/services/events.py index 30fa89bd29..a266fe4f18 100644 --- a/invokeai/app/services/events.py +++ b/invokeai/app/services/events.py @@ -35,6 +35,7 @@ class EventServiceBase: source_node_id: str, progress_image: Optional[ProgressImage], step: int, + order: int, total_steps: int, ) -> None: """Emitted when there is generation progress""" @@ -46,6 +47,7 @@ class EventServiceBase: source_node_id=source_node_id, progress_image=progress_image.dict() if progress_image is not None else None, step=step, + order=order, total_steps=total_steps, ), ) diff --git a/invokeai/app/util/step_callback.py b/invokeai/app/util/step_callback.py index 994d83e705..3cadfa1c12 100644 --- a/invokeai/app/util/step_callback.py +++ b/invokeai/app/util/step_callback.py @@ -4,9 +4,9 @@ from invokeai.app.models.exceptions import CanceledException from invokeai.app.models.image import ProgressImage from ..invocations.baseinvocation import InvocationContext from ...backend.util.util import image_to_dataURL -from ...backend.generator.base import Generator from ...backend.stable_diffusion import PipelineIntermediateState from invokeai.app.services.config import InvokeAIAppConfig +from ...backend.model_management.models import BaseModelType def sample_to_lowres_estimated_image(samples, latent_rgb_factors, smooth_matrix=None): @@ -29,6 +29,7 @@ def stable_diffusion_step_callback( intermediate_state: PipelineIntermediateState, node: dict, source_node_id: str, + base_model: BaseModelType, ): if context.services.queue.is_canceled(context.graph_execution_state_id): raise CanceledException @@ -56,23 +57,50 @@ def stable_diffusion_step_callback( # TODO: only output a preview image when requested - # origingally adapted from code by @erucipe and @keturn here: - # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 + if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]: + # fast latents preview matrix for sdxl + # generated by @StAlKeR7779 + sdxl_latent_rgb_factors = torch.tensor( + [ + # R G B + [0.3816, 0.4930, 0.5320], + [-0.3753, 0.1631, 0.1739], + [0.1770, 0.3588, -0.2048], + [-0.4350, -0.2644, -0.4289], + ], + dtype=sample.dtype, + device=sample.device, + ) - # these updated numbers for v1.5 are from @torridgristle - v1_5_latent_rgb_factors = torch.tensor( - [ - # R G B - [0.3444, 0.1385, 0.0670], # L1 - [0.1247, 0.4027, 0.1494], # L2 - [-0.3192, 0.2513, 0.2103], # L3 - [-0.1307, -0.1874, -0.7445], # L4 - ], - dtype=sample.dtype, - device=sample.device, - ) + sdxl_smooth_matrix = torch.tensor( + [ + [0.0358, 0.0964, 0.0358], + [0.0964, 0.4711, 0.0964], + [0.0358, 0.0964, 0.0358], + ], + dtype=sample.dtype, + device=sample.device, + ) - image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors) + image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix) + else: + # origingally adapted from code by @erucipe and @keturn here: + # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 + + # these updated numbers for v1.5 are from @torridgristle + v1_5_latent_rgb_factors = torch.tensor( + [ + # R G B + [0.3444, 0.1385, 0.0670], # L1 + [0.1247, 0.4027, 0.1494], # L2 + [-0.3192, 0.2513, 0.2103], # L3 + [-0.1307, -0.1874, -0.7445], # L4 + ], + dtype=sample.dtype, + device=sample.device, + ) + + image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors) (width, height) = image.size width *= 8 @@ -86,59 +114,6 @@ def stable_diffusion_step_callback( source_node_id=source_node_id, progress_image=ProgressImage(width=width, height=height, dataURL=dataURL), step=intermediate_state.step, - total_steps=node["steps"], - ) - - -def stable_diffusion_xl_step_callback( - context: InvocationContext, - node: dict, - source_node_id: str, - sample, - step, - total_steps, -): - if context.services.queue.is_canceled(context.graph_execution_state_id): - raise CanceledException - - sdxl_latent_rgb_factors = torch.tensor( - [ - # R G B - [0.3816, 0.4930, 0.5320], - [-0.3753, 0.1631, 0.1739], - [0.1770, 0.3588, -0.2048], - [-0.4350, -0.2644, -0.4289], - ], - dtype=sample.dtype, - device=sample.device, - ) - - sdxl_smooth_matrix = torch.tensor( - [ - # [ 0.0478, 0.1285, 0.0478], - # [ 0.1285, 0.2948, 0.1285], - # [ 0.0478, 0.1285, 0.0478], - [0.0358, 0.0964, 0.0358], - [0.0964, 0.4711, 0.0964], - [0.0358, 0.0964, 0.0358], - ], - dtype=sample.dtype, - device=sample.device, - ) - - image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix) - - (width, height) = image.size - width *= 8 - height *= 8 - - dataURL = image_to_dataURL(image, image_format="JPEG") - - context.services.events.emit_generator_progress( - graph_execution_state_id=context.graph_execution_state_id, - node=node, - source_node_id=source_node_id, - progress_image=ProgressImage(width=width, height=height, dataURL=dataURL), - step=step, - total_steps=total_steps, + order=intermediate_state.order, + total_steps=intermediate_state.total_steps, ) diff --git a/invokeai/backend/__init__.py b/invokeai/backend/__init__.py index aa2a1f1ca6..a4e6f180ae 100644 --- a/invokeai/backend/__init__.py +++ b/invokeai/backend/__init__.py @@ -1,6 +1,5 @@ """ Initialization file for invokeai.backend """ -from .generator import InvokeAIGeneratorBasicParams, InvokeAIGenerator, InvokeAIGeneratorOutput, Img2Img, Inpaint from .model_management import ModelManager, ModelCache, BaseModelType, ModelType, SubModelType, ModelInfo from .model_management.models import SilenceWarnings diff --git a/invokeai/backend/generator/__init__.py b/invokeai/backend/generator/__init__.py deleted file mode 100644 index 8a7f1c9167..0000000000 --- a/invokeai/backend/generator/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -Initialization file for the invokeai.generator package -""" -from .base import ( - InvokeAIGenerator, - InvokeAIGeneratorBasicParams, - InvokeAIGeneratorOutput, - Img2Img, - Inpaint, - Generator, -) -from .inpaint import infill_methods diff --git a/invokeai/backend/generator/base.py b/invokeai/backend/generator/base.py deleted file mode 100644 index af3231a7d1..0000000000 --- a/invokeai/backend/generator/base.py +++ /dev/null @@ -1,559 +0,0 @@ -""" -Base class for invokeai.backend.generator.* -including img2img, txt2img, and inpaint -""" -from __future__ import annotations - -import itertools -import dataclasses -import diffusers -import os -import random -import traceback -from abc import ABCMeta -from argparse import Namespace -from contextlib import nullcontext - -import cv2 -import numpy as np -import torch -from PIL import Image, ImageChops, ImageFilter -from accelerate.utils import set_seed -from diffusers import DiffusionPipeline -from tqdm import trange -from typing import Callable, List, Iterator, Optional, Type, Union -from dataclasses import dataclass, field -from diffusers.schedulers import SchedulerMixin as Scheduler - -import invokeai.backend.util.logging as logger -from ..image_util import configure_model_padding -from ..util.util import rand_perlin_2d -from ..stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ..stable_diffusion.schedulers import SCHEDULER_MAP - -downsampling = 8 - - -@dataclass -class InvokeAIGeneratorBasicParams: - seed: Optional[int] = None - width: int = 512 - height: int = 512 - cfg_scale: float = 7.5 - steps: int = 20 - ddim_eta: float = 0.0 - scheduler: str = "ddim" - precision: str = "float16" - perlin: float = 0.0 - threshold: float = 0.0 - seamless: bool = False - seamless_axes: List[str] = field(default_factory=lambda: ["x", "y"]) - h_symmetry_time_pct: Optional[float] = None - v_symmetry_time_pct: Optional[float] = None - variation_amount: float = 0.0 - with_variations: list = field(default_factory=list) - - -@dataclass -class InvokeAIGeneratorOutput: - """ - InvokeAIGeneratorOutput is a dataclass that contains the outputs of a generation - operation, including the image, its seed, the model name used to generate the image - and the model hash, as well as all the generate() parameters that went into - generating the image (in .params, also available as attributes) - """ - - image: Image.Image - seed: int - model_hash: str - attention_maps_images: List[Image.Image] - params: Namespace - - -# we are interposing a wrapper around the original Generator classes so that -# old code that calls Generate will continue to work. -class InvokeAIGenerator(metaclass=ABCMeta): - def __init__( - self, - model_info: dict, - params: InvokeAIGeneratorBasicParams = InvokeAIGeneratorBasicParams(), - **kwargs, - ): - self.model_info = model_info - self.params = params - self.kwargs = kwargs - - def generate( - self, - conditioning: tuple, - scheduler, - callback: Optional[Callable] = None, - step_callback: Optional[Callable] = None, - iterations: int = 1, - **keyword_args, - ) -> Iterator[InvokeAIGeneratorOutput]: - """ - Return an iterator across the indicated number of generations. - Each time the iterator is called it will return an InvokeAIGeneratorOutput - object. Use like this: - - outputs = txt2img.generate(prompt='banana sushi', iterations=5) - for result in outputs: - print(result.image, result.seed) - - In the typical case of wanting to get just a single image, iterations - defaults to 1 and do: - - output = next(txt2img.generate(prompt='banana sushi') - - Pass None to get an infinite iterator. - - outputs = txt2img.generate(prompt='banana sushi', iterations=None) - for o in outputs: - print(o.image, o.seed) - - """ - generator_args = dataclasses.asdict(self.params) - generator_args.update(keyword_args) - - model_info = self.model_info - model_name = model_info.name - model_hash = model_info.hash - with model_info.context as model: - gen_class = self._generator_class() - generator = gen_class(model, self.params.precision, **self.kwargs) - if self.params.variation_amount > 0: - generator.set_variation( - generator_args.get("seed"), - generator_args.get("variation_amount"), - generator_args.get("with_variations"), - ) - - if isinstance(model, DiffusionPipeline): - for component in [model.unet, model.vae]: - configure_model_padding( - component, generator_args.get("seamless", False), generator_args.get("seamless_axes") - ) - else: - configure_model_padding( - model, generator_args.get("seamless", False), generator_args.get("seamless_axes") - ) - - iteration_count = range(iterations) if iterations else itertools.count(start=0, step=1) - for i in iteration_count: - results = generator.generate( - conditioning=conditioning, - step_callback=step_callback, - sampler=scheduler, - **generator_args, - ) - output = InvokeAIGeneratorOutput( - image=results[0][0], - seed=results[0][1], - attention_maps_images=results[0][2], - model_hash=model_hash, - params=Namespace(model_name=model_name, **generator_args), - ) - if callback: - callback(output) - yield output - - @classmethod - def schedulers(self) -> List[str]: - """ - Return list of all the schedulers that we currently handle. - """ - return list(SCHEDULER_MAP.keys()) - - def load_generator(self, model: StableDiffusionGeneratorPipeline, generator_class: Type[Generator]): - return generator_class(model, self.params.precision) - - @classmethod - def _generator_class(cls) -> Type[Generator]: - """ - In derived classes return the name of the generator to apply. - If you don't override will return the name of the derived - class, which nicely parallels the generator class names. - """ - return Generator - - -# ------------------------------------ -class Img2Img(InvokeAIGenerator): - def generate( - self, init_image: Union[Image.Image, torch.FloatTensor], strength: float = 0.75, **keyword_args - ) -> Iterator[InvokeAIGeneratorOutput]: - return super().generate(init_image=init_image, strength=strength, **keyword_args) - - @classmethod - def _generator_class(cls): - from .img2img import Img2Img - - return Img2Img - - -# ------------------------------------ -# Takes all the arguments of Img2Img and adds the mask image and the seam/infill stuff -class Inpaint(Img2Img): - def generate( - self, - mask_image: Union[Image.Image, torch.FloatTensor], - # Seam settings - when 0, doesn't fill seam - seam_size: int = 96, - seam_blur: int = 16, - seam_strength: float = 0.7, - seam_steps: int = 30, - tile_size: int = 32, - inpaint_replace=False, - infill_method=None, - inpaint_width=None, - inpaint_height=None, - inpaint_fill: tuple(int) = (0x7F, 0x7F, 0x7F, 0xFF), - **keyword_args, - ) -> Iterator[InvokeAIGeneratorOutput]: - return super().generate( - mask_image=mask_image, - seam_size=seam_size, - seam_blur=seam_blur, - seam_strength=seam_strength, - seam_steps=seam_steps, - tile_size=tile_size, - inpaint_replace=inpaint_replace, - infill_method=infill_method, - inpaint_width=inpaint_width, - inpaint_height=inpaint_height, - inpaint_fill=inpaint_fill, - **keyword_args, - ) - - @classmethod - def _generator_class(cls): - from .inpaint import Inpaint - - return Inpaint - - -class Generator: - downsampling_factor: int - latent_channels: int - precision: str - model: DiffusionPipeline - - def __init__(self, model: DiffusionPipeline, precision: str, **kwargs): - self.model = model - self.precision = precision - self.seed = None - self.latent_channels = model.unet.config.in_channels - self.downsampling_factor = downsampling # BUG: should come from model or config - self.perlin = 0.0 - self.threshold = 0 - self.variation_amount = 0 - self.with_variations = [] - self.use_mps_noise = False - self.free_gpu_mem = None - - # this is going to be overridden in img2img.py, txt2img.py and inpaint.py - def get_make_image(self, **kwargs): - """ - Returns a function returning an image derived from the prompt and the initial image - Return value depends on the seed at the time you call it - """ - raise NotImplementedError("image_iterator() must be implemented in a descendent class") - - def set_variation(self, seed, variation_amount, with_variations): - self.seed = seed - self.variation_amount = variation_amount - self.with_variations = with_variations - - def generate( - self, - width, - height, - sampler, - init_image=None, - iterations=1, - seed=None, - image_callback=None, - step_callback=None, - threshold=0.0, - perlin=0.0, - h_symmetry_time_pct=None, - v_symmetry_time_pct=None, - free_gpu_mem: bool = False, - **kwargs, - ): - scope = nullcontext - self.free_gpu_mem = free_gpu_mem - attention_maps_images = [] - attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image()) - make_image = self.get_make_image( - sampler=sampler, - init_image=init_image, - width=width, - height=height, - step_callback=step_callback, - threshold=threshold, - perlin=perlin, - h_symmetry_time_pct=h_symmetry_time_pct, - v_symmetry_time_pct=v_symmetry_time_pct, - attention_maps_callback=attention_maps_callback, - **kwargs, - ) - results = [] - seed = seed if seed is not None and seed >= 0 else self.new_seed() - first_seed = seed - seed, initial_noise = self.generate_initial_noise(seed, width, height) - - # There used to be an additional self.model.ema_scope() here, but it breaks - # the inpaint-1.5 model. Not sure what it did.... ? - with scope(self.model.device.type): - for n in trange(iterations, desc="Generating"): - x_T = None - if self.variation_amount > 0: - set_seed(seed) - target_noise = self.get_noise(width, height) - x_T = self.slerp(self.variation_amount, initial_noise, target_noise) - elif initial_noise is not None: - # i.e. we specified particular variations - x_T = initial_noise - else: - set_seed(seed) - try: - x_T = self.get_noise(width, height) - except: - logger.error("An error occurred while getting initial noise") - print(traceback.format_exc()) - - # Pass on the seed in case a layer beneath us needs to generate noise on its own. - image = make_image(x_T, seed) - - results.append([image, seed, attention_maps_images]) - - if image_callback is not None: - attention_maps_image = None if len(attention_maps_images) == 0 else attention_maps_images[-1] - image_callback( - image, - seed, - first_seed=first_seed, - attention_maps_image=attention_maps_image, - ) - - seed = self.new_seed() - - # Free up memory from the last generation. - clear_cuda_cache = kwargs["clear_cuda_cache"] if "clear_cuda_cache" in kwargs else None - if clear_cuda_cache is not None: - clear_cuda_cache() - - return results - - def sample_to_image(self, samples) -> Image.Image: - """ - Given samples returned from a sampler, converts - it into a PIL Image - """ - with torch.inference_mode(): - image = self.model.decode_latents(samples) - return self.model.numpy_to_pil(image)[0] - - def repaste_and_color_correct( - self, - result: Image.Image, - init_image: Image.Image, - init_mask: Image.Image, - mask_blur_radius: int = 8, - ) -> Image.Image: - if init_image is None or init_mask is None: - return result - - # Get the original alpha channel of the mask if there is one. - # Otherwise it is some other black/white image format ('1', 'L' or 'RGB') - pil_init_mask = init_mask.getchannel("A") if init_mask.mode == "RGBA" else init_mask.convert("L") - pil_init_image = init_image.convert("RGBA") # Add an alpha channel if one doesn't exist - - # Build an image with only visible pixels from source to use as reference for color-matching. - init_rgb_pixels = np.asarray(init_image.convert("RGB"), dtype=np.uint8) - init_a_pixels = np.asarray(pil_init_image.getchannel("A"), dtype=np.uint8) - init_mask_pixels = np.asarray(pil_init_mask, dtype=np.uint8) - - # Get numpy version of result - np_image = np.asarray(result, dtype=np.uint8) - - # Mask and calculate mean and standard deviation - mask_pixels = init_a_pixels * init_mask_pixels > 0 - np_init_rgb_pixels_masked = init_rgb_pixels[mask_pixels, :] - np_image_masked = np_image[mask_pixels, :] - - if np_init_rgb_pixels_masked.size > 0: - init_means = np_init_rgb_pixels_masked.mean(axis=0) - init_std = np_init_rgb_pixels_masked.std(axis=0) - gen_means = np_image_masked.mean(axis=0) - gen_std = np_image_masked.std(axis=0) - - # Color correct - np_matched_result = np_image.copy() - np_matched_result[:, :, :] = ( - ( - ( - (np_matched_result[:, :, :].astype(np.float32) - gen_means[None, None, :]) - / gen_std[None, None, :] - ) - * init_std[None, None, :] - + init_means[None, None, :] - ) - .clip(0, 255) - .astype(np.uint8) - ) - matched_result = Image.fromarray(np_matched_result, mode="RGB") - else: - matched_result = Image.fromarray(np_image, mode="RGB") - - # Blur the mask out (into init image) by specified amount - if mask_blur_radius > 0: - nm = np.asarray(pil_init_mask, dtype=np.uint8) - nmd = cv2.erode( - nm, - kernel=np.ones((3, 3), dtype=np.uint8), - iterations=int(mask_blur_radius / 2), - ) - pmd = Image.fromarray(nmd, mode="L") - blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(mask_blur_radius)) - else: - blurred_init_mask = pil_init_mask - - multiplied_blurred_init_mask = ImageChops.multiply(blurred_init_mask, self.pil_image.split()[-1]) - - # Paste original on color-corrected generation (using blurred mask) - matched_result.paste(init_image, (0, 0), mask=multiplied_blurred_init_mask) - return matched_result - - @staticmethod - def sample_to_lowres_estimated_image(samples): - # origingally adapted from code by @erucipe and @keturn here: - # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 - - # these updated numbers for v1.5 are from @torridgristle - v1_5_latent_rgb_factors = torch.tensor( - [ - # R G B - [0.3444, 0.1385, 0.0670], # L1 - [0.1247, 0.4027, 0.1494], # L2 - [-0.3192, 0.2513, 0.2103], # L3 - [-0.1307, -0.1874, -0.7445], # L4 - ], - dtype=samples.dtype, - device=samples.device, - ) - - latent_image = samples[0].permute(1, 2, 0) @ v1_5_latent_rgb_factors - latents_ubyte = ( - ((latent_image + 1) / 2).clamp(0, 1).mul(0xFF).byte() # change scale from -1..1 to 0..1 # to 0..255 - ).cpu() - - return Image.fromarray(latents_ubyte.numpy()) - - def generate_initial_noise(self, seed, width, height): - initial_noise = None - if self.variation_amount > 0 or len(self.with_variations) > 0: - # use fixed initial noise plus random noise per iteration - set_seed(seed) - initial_noise = self.get_noise(width, height) - for v_seed, v_weight in self.with_variations: - seed = v_seed - set_seed(seed) - next_noise = self.get_noise(width, height) - initial_noise = self.slerp(v_weight, initial_noise, next_noise) - if self.variation_amount > 0: - random.seed() # reset RNG to an actually random state, so we can get a random seed for variations - seed = random.randrange(0, np.iinfo(np.uint32).max) - return (seed, initial_noise) - - def get_perlin_noise(self, width, height): - fixdevice = "cpu" if (self.model.device.type == "mps") else self.model.device - # limit noise to only the diffusion image channels, not the mask channels - input_channels = min(self.latent_channels, 4) - # round up to the nearest block of 8 - temp_width = int((width + 7) / 8) * 8 - temp_height = int((height + 7) / 8) * 8 - noise = torch.stack( - [ - rand_perlin_2d((temp_height, temp_width), (8, 8), device=self.model.device).to(fixdevice) - for _ in range(input_channels) - ], - dim=0, - ).to(self.model.device) - return noise[0:4, 0:height, 0:width] - - def new_seed(self): - self.seed = random.randrange(0, np.iinfo(np.uint32).max) - return self.seed - - def slerp(self, t, v0, v1, DOT_THRESHOLD=0.9995): - """ - Spherical linear interpolation - Args: - t (float/np.ndarray): Float value between 0.0 and 1.0 - v0 (np.ndarray): Starting vector - v1 (np.ndarray): Final vector - DOT_THRESHOLD (float): Threshold for considering the two vectors as - colineal. Not recommended to alter this. - Returns: - v2 (np.ndarray): Interpolation vector between v0 and v1 - """ - inputs_are_torch = False - if not isinstance(v0, np.ndarray): - inputs_are_torch = True - v0 = v0.detach().cpu().numpy() - if not isinstance(v1, np.ndarray): - inputs_are_torch = True - v1 = v1.detach().cpu().numpy() - - dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1))) - if np.abs(dot) > DOT_THRESHOLD: - v2 = (1 - t) * v0 + t * v1 - else: - theta_0 = np.arccos(dot) - sin_theta_0 = np.sin(theta_0) - theta_t = theta_0 * t - sin_theta_t = np.sin(theta_t) - s0 = np.sin(theta_0 - theta_t) / sin_theta_0 - s1 = sin_theta_t / sin_theta_0 - v2 = s0 * v0 + s1 * v1 - - if inputs_are_torch: - v2 = torch.from_numpy(v2).to(self.model.device) - - return v2 - - # this is a handy routine for debugging use. Given a generated sample, - # convert it into a PNG image and store it at the indicated path - def save_sample(self, sample, filepath): - image = self.sample_to_image(sample) - dirname = os.path.dirname(filepath) or "." - if not os.path.exists(dirname): - logger.info(f"creating directory {dirname}") - os.makedirs(dirname, exist_ok=True) - image.save(filepath, "PNG") - - def torch_dtype(self) -> torch.dtype: - return torch.float16 if self.precision == "float16" else torch.float32 - - # returns a tensor filled with random numbers from a normal distribution - def get_noise(self, width, height): - device = self.model.device - # limit noise to only the diffusion image channels, not the mask channels - input_channels = min(self.latent_channels, 4) - x = torch.randn( - [ - 1, - input_channels, - height // self.downsampling_factor, - width // self.downsampling_factor, - ], - dtype=self.torch_dtype(), - device=device, - ) - if self.perlin > 0.0: - perlin_noise = self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor) - x = (1 - self.perlin) * x + self.perlin * perlin_noise - return x diff --git a/invokeai/backend/generator/img2img.py b/invokeai/backend/generator/img2img.py deleted file mode 100644 index 8aaaff5deb..0000000000 --- a/invokeai/backend/generator/img2img.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -invokeai.backend.generator.img2img descends from .generator -""" - -from .base import Generator - - -class Img2Img(Generator): - def get_make_image( - self, - sampler, - steps, - cfg_scale, - ddim_eta, - conditioning, - init_image, - strength, - step_callback=None, - threshold=0.0, - warmup=0.2, - perlin=0.0, - h_symmetry_time_pct=None, - v_symmetry_time_pct=None, - attention_maps_callback=None, - **kwargs, - ): - """ - Returns a function returning an image derived from the prompt and the initial image - Return value depends on the seed at the time you call it. - """ - raise NotImplementedError("replaced by invokeai.app.invocations.latent.LatentsToLatentsInvocation") diff --git a/invokeai/backend/generator/inpaint.py b/invokeai/backend/generator/inpaint.py deleted file mode 100644 index 494f213d11..0000000000 --- a/invokeai/backend/generator/inpaint.py +++ /dev/null @@ -1,387 +0,0 @@ -""" -invokeai.backend.generator.inpaint descends from .generator -""" -from __future__ import annotations - -import math -from typing import Tuple, Union, Optional - -import cv2 -import numpy as np -import torch -from PIL import Image, ImageChops, ImageFilter, ImageOps - -from ..image_util import PatchMatch, debug_image -from ..stable_diffusion.diffusers_pipeline import ( - ConditioningData, - StableDiffusionGeneratorPipeline, - image_resized_to_grid_as_tensor, -) -from .img2img import Img2Img - - -def infill_methods() -> list[str]: - methods = [ - "tile", - "solid", - ] - if PatchMatch.patchmatch_available(): - methods.insert(0, "patchmatch") - return methods - - -class Inpaint(Img2Img): - def __init__(self, model, precision): - self.inpaint_height = 0 - self.inpaint_width = 0 - self.enable_image_debugging = False - self.init_latent = None - self.pil_image = None - self.pil_mask = None - self.mask_blur_radius = 0 - self.infill_method = None - super().__init__(model, precision) - - # Outpaint support code - def get_tile_images(self, image: np.ndarray, width=8, height=8): - _nrows, _ncols, depth = image.shape - _strides = image.strides - - nrows, _m = divmod(_nrows, height) - ncols, _n = divmod(_ncols, width) - if _m != 0 or _n != 0: - return None - - return np.lib.stride_tricks.as_strided( - np.ravel(image), - shape=(nrows, ncols, height, width, depth), - strides=(height * _strides[0], width * _strides[1], *_strides), - writeable=False, - ) - - def infill_patchmatch(self, im: Image.Image) -> Image.Image: - if im.mode != "RGBA": - return im - - # Skip patchmatch if patchmatch isn't available - if not PatchMatch.patchmatch_available(): - return im - - # Patchmatch (note, we may want to expose patch_size? Increasing it significantly impacts performance though) - im_patched_np = PatchMatch.inpaint(im.convert("RGB"), ImageOps.invert(im.split()[-1]), patch_size=3) - im_patched = Image.fromarray(im_patched_np, mode="RGB") - return im_patched - - def tile_fill_missing(self, im: Image.Image, tile_size: int = 16, seed: Optional[int] = None) -> Image.Image: - # Only fill if there's an alpha layer - if im.mode != "RGBA": - return im - - a = np.asarray(im, dtype=np.uint8) - - tile_size_tuple = (tile_size, tile_size) - - # Get the image as tiles of a specified size - tiles = self.get_tile_images(a, *tile_size_tuple).copy() - - # Get the mask as tiles - tiles_mask = tiles[:, :, :, :, 3] - - # Find any mask tiles with any fully transparent pixels (we will be replacing these later) - tmask_shape = tiles_mask.shape - tiles_mask = tiles_mask.reshape(math.prod(tiles_mask.shape)) - n, ny = (math.prod(tmask_shape[0:2])), math.prod(tmask_shape[2:]) - tiles_mask = tiles_mask > 0 - tiles_mask = tiles_mask.reshape((n, ny)).all(axis=1) - - # Get RGB tiles in single array and filter by the mask - tshape = tiles.shape - tiles_all = tiles.reshape((math.prod(tiles.shape[0:2]), *tiles.shape[2:])) - filtered_tiles = tiles_all[tiles_mask] - - if len(filtered_tiles) == 0: - return im - - # Find all invalid tiles and replace with a random valid tile - replace_count = (tiles_mask == False).sum() - rng = np.random.default_rng(seed=seed) - tiles_all[np.logical_not(tiles_mask)] = filtered_tiles[ - rng.choice(filtered_tiles.shape[0], replace_count), :, :, : - ] - - # Convert back to an image - tiles_all = tiles_all.reshape(tshape) - tiles_all = tiles_all.swapaxes(1, 2) - st = tiles_all.reshape( - ( - math.prod(tiles_all.shape[0:2]), - math.prod(tiles_all.shape[2:4]), - tiles_all.shape[4], - ) - ) - si = Image.fromarray(st, mode="RGBA") - - return si - - def mask_edge(self, mask: Image.Image, edge_size: int, edge_blur: int) -> Image.Image: - npimg = np.asarray(mask, dtype=np.uint8) - - # Detect any partially transparent regions - npgradient = np.uint8(255 * (1.0 - np.floor(np.abs(0.5 - np.float32(npimg) / 255.0) * 2.0))) - - # Detect hard edges - npedge = cv2.Canny(npimg, threshold1=100, threshold2=200) - - # Combine - npmask = npgradient + npedge - - # Expand - npmask = cv2.dilate(npmask, np.ones((3, 3), np.uint8), iterations=int(edge_size / 2)) - - new_mask = Image.fromarray(npmask) - - if edge_blur > 0: - new_mask = new_mask.filter(ImageFilter.BoxBlur(edge_blur)) - - return ImageOps.invert(new_mask) - - def seam_paint( - self, - im: Image.Image, - seam_size: int, - seam_blur: int, - seed, - steps, - cfg_scale, - ddim_eta, - conditioning, - strength, - noise, - infill_method, - step_callback, - ) -> Image.Image: - hard_mask = self.pil_image.split()[-1].copy() - mask = self.mask_edge(hard_mask, seam_size, seam_blur) - - make_image = self.get_make_image( - steps, - cfg_scale, - ddim_eta, - conditioning, - init_image=im.copy().convert("RGBA"), - mask_image=mask, - strength=strength, - mask_blur_radius=0, - seam_size=0, - step_callback=step_callback, - inpaint_width=im.width, - inpaint_height=im.height, - infill_method=infill_method, - ) - - seam_noise = self.get_noise(im.width, im.height) - - result = make_image(seam_noise, seed=None) - - return result - - @torch.no_grad() - def get_make_image( - self, - steps, - cfg_scale, - ddim_eta, - conditioning, - init_image: Union[Image.Image, torch.FloatTensor], - mask_image: Union[Image.Image, torch.FloatTensor], - strength: float, - mask_blur_radius: int = 8, - # Seam settings - when 0, doesn't fill seam - seam_size: int = 96, - seam_blur: int = 16, - seam_strength: float = 0.7, - seam_steps: int = 30, - tile_size: int = 32, - step_callback=None, - inpaint_replace=False, - enable_image_debugging=False, - infill_method=None, - inpaint_width=None, - inpaint_height=None, - inpaint_fill: Tuple[int, int, int, int] = (0x7F, 0x7F, 0x7F, 0xFF), - attention_maps_callback=None, - **kwargs, - ): - """ - Returns a function returning an image derived from the prompt and - the initial image + mask. Return value depends on the seed at - the time you call it. kwargs are 'init_latent' and 'strength' - """ - - self.enable_image_debugging = enable_image_debugging - infill_method = infill_method or infill_methods()[0] - self.infill_method = infill_method - - self.inpaint_width = inpaint_width - self.inpaint_height = inpaint_height - - if isinstance(init_image, Image.Image): - self.pil_image = init_image.copy() - - # Do infill - if infill_method == "patchmatch" and PatchMatch.patchmatch_available(): - init_filled = self.infill_patchmatch(self.pil_image.copy()) - elif infill_method == "tile": - init_filled = self.tile_fill_missing(self.pil_image.copy(), seed=self.seed, tile_size=tile_size) - elif infill_method == "solid": - solid_bg = Image.new("RGBA", init_image.size, inpaint_fill) - init_filled = Image.alpha_composite(solid_bg, init_image) - else: - raise ValueError(f"Non-supported infill type {infill_method}", infill_method) - init_filled.paste(init_image, (0, 0), init_image.split()[-1]) - - # Resize if requested for inpainting - if inpaint_width and inpaint_height: - init_filled = init_filled.resize((inpaint_width, inpaint_height)) - - debug_image(init_filled, "init_filled", debug_status=self.enable_image_debugging) - - # Create init tensor - init_image = image_resized_to_grid_as_tensor(init_filled.convert("RGB")) - - if isinstance(mask_image, Image.Image): - self.pil_mask = mask_image.copy() - debug_image( - mask_image, - "mask_image BEFORE multiply with pil_image", - debug_status=self.enable_image_debugging, - ) - - init_alpha = self.pil_image.getchannel("A") - if mask_image.mode != "L": - # FIXME: why do we get passed an RGB image here? We can only use single-channel. - mask_image = mask_image.convert("L") - mask_image = ImageChops.multiply(mask_image, init_alpha) - self.pil_mask = mask_image - - # Resize if requested for inpainting - if inpaint_width and inpaint_height: - mask_image = mask_image.resize((inpaint_width, inpaint_height)) - - debug_image( - mask_image, - "mask_image AFTER multiply with pil_image", - debug_status=self.enable_image_debugging, - ) - mask: torch.FloatTensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) - else: - mask: torch.FloatTensor = mask_image - - self.mask_blur_radius = mask_blur_radius - - # noinspection PyTypeChecker - pipeline: StableDiffusionGeneratorPipeline = self.model - - # todo: support cross-attention control - uc, c, _ = conditioning - conditioning_data = ConditioningData(uc, c, cfg_scale).add_scheduler_args_if_applicable( - pipeline.scheduler, eta=ddim_eta - ) - - def make_image(x_T: torch.Tensor, seed: int): - pipeline_output = pipeline.inpaint_from_embeddings( - init_image=init_image, - mask=1 - mask, # expects white means "paint here." - strength=strength, - num_inference_steps=steps, - conditioning_data=conditioning_data, - noise_func=self.get_noise_like, - callback=step_callback, - seed=seed, - ) - - if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: - attention_maps_callback(pipeline_output.attention_map_saver) - - result = self.postprocess_size_and_mask(pipeline.numpy_to_pil(pipeline_output.images)[0]) - - # Seam paint if this is our first pass (seam_size set to 0 during seam painting) - if seam_size > 0: - old_image = self.pil_image or init_image - old_mask = self.pil_mask or mask_image - - result = self.seam_paint( - result, - seam_size, - seam_blur, - seed, - seam_steps, - cfg_scale, - ddim_eta, - conditioning, - seam_strength, - x_T, - infill_method, - step_callback, - ) - - # Restore original settings - self.get_make_image( - steps, - cfg_scale, - ddim_eta, - conditioning, - old_image, - old_mask, - strength, - mask_blur_radius, - seam_size, - seam_blur, - seam_strength, - seam_steps, - tile_size, - step_callback, - inpaint_replace, - enable_image_debugging, - inpaint_width=inpaint_width, - inpaint_height=inpaint_height, - infill_method=infill_method, - **kwargs, - ) - - return result - - return make_image - - def sample_to_image(self, samples) -> Image.Image: - gen_result = super().sample_to_image(samples).convert("RGB") - return self.postprocess_size_and_mask(gen_result) - - def postprocess_size_and_mask(self, gen_result: Image.Image) -> Image.Image: - debug_image(gen_result, "gen_result", debug_status=self.enable_image_debugging) - - # Resize if necessary - if self.inpaint_width and self.inpaint_height: - gen_result = gen_result.resize(self.pil_image.size) - - if self.pil_image is None or self.pil_mask is None: - return gen_result - - corrected_result = self.repaste_and_color_correct( - gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius - ) - debug_image( - corrected_result, - "corrected_result", - debug_status=self.enable_image_debugging, - ) - - return corrected_result - - def get_noise_like(self, like: torch.Tensor): - device = like.device - x = torch.randn_like(like, device=device) - if self.perlin > 0.0: - shape = like.shape - x = (1 - self.perlin) * x + self.perlin * self.get_perlin_noise(shape[3], shape[2]) - return x diff --git a/invokeai/backend/stable_diffusion/__init__.py b/invokeai/backend/stable_diffusion/__init__.py index 37024ccace..21273c6201 100644 --- a/invokeai/backend/stable_diffusion/__init__.py +++ b/invokeai/backend/stable_diffusion/__init__.py @@ -8,4 +8,4 @@ from .diffusers_pipeline import ( ) from .diffusion import InvokeAIDiffuserComponent from .diffusion.cross_attention_map_saving import AttentionMapSaver -from .diffusion.shared_invokeai_diffusion import PostprocessingSettings +from .diffusion.shared_invokeai_diffusion import PostprocessingSettings, BasicConditioningInfo, SDXLConditioningInfo diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index c2c8165d02..aa809bc3ec 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -5,23 +5,19 @@ import inspect import math import secrets from dataclasses import dataclass, field -from typing import Any, Callable, Generic, List, Optional, Type, TypeVar, Union +from typing import Any, Callable, Generic, List, Optional, Type, Union import PIL.Image import einops import psutil import torch import torchvision.transforms as T -from accelerate.utils import set_seed from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.models.controlnet import ControlNetModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import ( StableDiffusionPipeline, ) -from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import ( - StableDiffusionImg2ImgPipeline, -) from diffusers.pipelines.stable_diffusion.safety_checker import ( StableDiffusionSafetyChecker, ) @@ -30,23 +26,23 @@ from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutpu from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.outputs import BaseOutput from pydantic import Field -from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer -from typing_extensions import ParamSpec from invokeai.app.services.config import InvokeAIAppConfig from .diffusion import ( AttentionMapSaver, InvokeAIDiffuserComponent, PostprocessingSettings, + BasicConditioningInfo, ) from ..util import normalize_device @dataclass class PipelineIntermediateState: - run_id: str step: int + order: int + total_steps: int timestep: int latents: torch.Tensor predicted_original: Optional[torch.Tensor] = None @@ -97,7 +93,6 @@ class AddsMaskGuidance: mask_latents: torch.FloatTensor scheduler: SchedulerMixin noise: torch.Tensor - _debug: Optional[Callable] = None def __call__(self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning) -> BaseOutput: output_class = step_output.__class__ # We'll create a new one with masked data. @@ -134,8 +129,6 @@ class AddsMaskGuidance: # mask_latents = self.scheduler.scale_model_input(mask_latents, t) mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size) masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)) - if self._debug: - self._debug(masked_input, f"t={t} lerped") return masked_input @@ -167,33 +160,6 @@ def is_inpainting_model(unet: UNet2DConditionModel): return unet.conv_in.in_channels == 9 -CallbackType = TypeVar("CallbackType") -ReturnType = TypeVar("ReturnType") -ParamType = ParamSpec("ParamType") - - -@dataclass(frozen=True) -class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]): - """Convert a generator to a function with a callback and a return value.""" - - generator_method: Callable[ParamType, ReturnType] - callback_arg_type: Type[CallbackType] - - def __call__( - self, - *args: ParamType.args, - callback: Callable[[CallbackType], Any] = None, - **kwargs: ParamType.kwargs, - ) -> ReturnType: - result = None - for result in self.generator_method(*args, **kwargs): - if callback is not None and isinstance(result, self.callback_arg_type): - callback(result) - if result is None: - raise AssertionError("why was that an empty generator?") - return result - - @dataclass class ControlNetData: model: ControlNetModel = Field(default=None) @@ -207,8 +173,8 @@ class ControlNetData: @dataclass class ConditioningData: - unconditioned_embeddings: torch.Tensor - text_embeddings: torch.Tensor + unconditioned_embeddings: BasicConditioningInfo + text_embeddings: BasicConditioningInfo guidance_scale: Union[float, List[float]] """ Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). @@ -284,7 +250,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): feature_extractor ([`CLIPFeatureExtractor`]): Model that extracts features from generated images to be used as inputs for the `safety_checker`. """ - ID_LENGTH = 8 def __init__( self, @@ -328,33 +293,41 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): if xformers is available, use it, otherwise use sliced attention. """ config = InvokeAIAppConfig.get_config() - if torch.cuda.is_available() and is_xformers_available() and not config.disable_xformers: - self.enable_xformers_memory_efficient_attention() + if self.unet.device.type == "cuda": + if is_xformers_available() and not config.disable_xformers: + self.enable_xformers_memory_efficient_attention() + return + elif hasattr(torch.nn.functional, "scaled_dot_product_attention"): + # diffusers enable sdp automatically + return + + if self.unet.device.type == "cpu" or self.unet.device.type == "mps": + mem_free = psutil.virtual_memory().free + elif self.unet.device.type == "cuda": + mem_free, _ = torch.cuda.mem_get_info(normalize_device(self.unet.device)) else: - if self.device.type == "cpu" or self.device.type == "mps": - mem_free = psutil.virtual_memory().free - elif self.device.type == "cuda": - mem_free, _ = torch.cuda.mem_get_info(normalize_device(self.device)) - else: - raise ValueError(f"unrecognized device {self.device}") - # input tensor of [1, 4, h/8, w/8] - # output tensor of [16, (h/8 * w/8), (h/8 * w/8)] - bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4 - max_size_required_for_baddbmm = ( - 16 - * latents.size(dim=2) - * latents.size(dim=3) - * latents.size(dim=2) - * latents.size(dim=3) - * bytes_per_element_needed_for_baddbmm_duplication - ) - if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): # 3.3 / 4.0 is from old Invoke code - self.enable_attention_slicing(slice_size="max") - elif torch.backends.mps.is_available(): - # diffusers recommends always enabling for mps - self.enable_attention_slicing(slice_size="max") - else: - self.disable_attention_slicing() + raise ValueError(f"unrecognized device {self.unet.device}") + # input tensor of [1, 4, h/8, w/8] + # output tensor of [16, (h/8 * w/8), (h/8 * w/8)] + bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4 + max_size_required_for_baddbmm = ( + 16 + * latents.size(dim=2) + * latents.size(dim=3) + * latents.size(dim=2) + * latents.size(dim=3) + * bytes_per_element_needed_for_baddbmm_duplication + ) + if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): # 3.3 / 4.0 is from old Invoke code + self.enable_attention_slicing(slice_size="max") + elif torch.backends.mps.is_available(): + # diffusers recommends always enabling for mps + self.enable_attention_slicing(slice_size="max") + else: + self.disable_attention_slicing() + + def to(self, torch_device: Optional[Union[str, torch.device]] = None, silence_dtype_warnings=False): + raise Exception("Should not be called") def latents_from_embeddings( self, @@ -362,35 +335,72 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): num_inference_steps: int, conditioning_data: ConditioningData, *, - noise: torch.Tensor, - timesteps=None, + noise: Optional[torch.Tensor], + timesteps: torch.Tensor, + init_timestep: torch.Tensor, additional_guidance: List[Callable] = None, - run_id=None, callback: Callable[[PipelineIntermediateState], None] = None, control_data: List[ControlNetData] = None, + mask: Optional[torch.Tensor] = None, + seed: Optional[int] = None, ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: - if self.scheduler.config.get("cpu_only", False): - scheduler_device = torch.device("cpu") - else: - scheduler_device = self.unet.device + if init_timestep.shape[0] == 0: + return latents, None - if timesteps is None: - self.scheduler.set_timesteps(num_inference_steps, device=scheduler_device) - timesteps = self.scheduler.timesteps - infer_latents_from_embeddings = GeneratorToCallbackinator( - self.generate_latents_from_embeddings, PipelineIntermediateState - ) - result: PipelineIntermediateState = infer_latents_from_embeddings( - latents, - timesteps, - conditioning_data, - noise=noise, - run_id=run_id, - additional_guidance=additional_guidance, - control_data=control_data, - callback=callback, - ) - return result.latents, result.attention_map_saver + if additional_guidance is None: + additional_guidance = [] + + orig_latents = latents.clone() + + batch_size = latents.shape[0] + batched_t = init_timestep.expand(batch_size) + + if noise is not None: + # latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers + latents = self.scheduler.add_noise(latents, noise, batched_t) + + if mask is not None: + if is_inpainting_model(self.unet): + # You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint + # (that's why there's a mask!) but it seems to really want that blanked out. + # masked_latents = latents * torch.where(mask < 0.5, 1, 0) TODO: inpaint/outpaint/infill + + # TODO: we should probably pass this in so we don't have to try/finally around setting it. + self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(self._unet_forward, mask, orig_latents) + else: + # if no noise provided, noisify unmasked area based on seed(or 0 as fallback) + if noise is None: + noise = torch.randn( + orig_latents.shape, + dtype=torch.float32, + device="cpu", + generator=torch.Generator(device="cpu").manual_seed(seed or 0), + ).to(device=orig_latents.device, dtype=orig_latents.dtype) + + latents = self.scheduler.add_noise(latents, noise, batched_t) + latents = torch.lerp( + orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype) + ) + + additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise)) + + try: + latents, attention_map_saver = self.generate_latents_from_embeddings( + latents, + timesteps, + conditioning_data, + additional_guidance=additional_guidance, + control_data=control_data, + callback=callback, + ) + finally: + self.invokeai_diffuser.model_forward_callback = self._unet_forward + + # restore unmasked part + if mask is not None: + latents = torch.lerp(orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)) + + return latents, attention_map_saver def generate_latents_from_embeddings( self, @@ -398,42 +408,40 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): timesteps, conditioning_data: ConditioningData, *, - noise: torch.Tensor, - run_id: str = None, additional_guidance: List[Callable] = None, control_data: List[ControlNetData] = None, + callback: Callable[[PipelineIntermediateState], None] = None, ): self._adjust_memory_efficient_attention(latents) - if run_id is None: - run_id = secrets.token_urlsafe(self.ID_LENGTH) if additional_guidance is None: additional_guidance = [] + + batch_size = latents.shape[0] + attention_map_saver: Optional[AttentionMapSaver] = None + + if timesteps.shape[0] == 0: + return latents, attention_map_saver + extra_conditioning_info = conditioning_data.extra with self.invokeai_diffuser.custom_attention_context( self.invokeai_diffuser.model, extra_conditioning_info=extra_conditioning_info, step_count=len(self.scheduler.timesteps), ): - yield PipelineIntermediateState( - run_id=run_id, - step=-1, - timestep=self.scheduler.config.num_train_timesteps, - latents=latents, - ) + if callback is not None: + callback( + PipelineIntermediateState( + step=-1, + order=self.scheduler.order, + total_steps=len(timesteps), + timestep=self.scheduler.config.num_train_timesteps, + latents=latents, + ) + ) - batch_size = latents.shape[0] - batched_t = torch.full( - (batch_size,), - timesteps[0], - dtype=timesteps.dtype, - device=self.unet.device, - ) - latents = self.scheduler.add_noise(latents, noise, batched_t) - - attention_map_saver: Optional[AttentionMapSaver] = None # print("timesteps:", timesteps) for i, t in enumerate(self.progress_bar(timesteps)): - batched_t.fill_(t) + batched_t = t.expand(batch_size) step_output = self.step( batched_t, latents, @@ -462,14 +470,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): # attention_map_saver = AttentionMapSaver(token_ids=attention_map_token_ids, latents_shape=latents.shape[-2:]) # self.invokeai_diffuser.setup_attention_map_saving(attention_map_saver) - yield PipelineIntermediateState( - run_id=run_id, - step=i, - timestep=int(t), - latents=latents, - predicted_original=predicted_original, - attention_map_saver=attention_map_saver, - ) + if callback is not None: + callback( + PipelineIntermediateState( + step=i, + order=self.scheduler.order, + total_steps=len(timesteps), + timestep=int(t), + latents=latents, + predicted_original=predicted_original, + attention_map_saver=attention_map_saver, + ) + ) return latents, attention_map_saver @@ -491,95 +503,39 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): # TODO: should this scaling happen here or inside self._unet_forward? # i.e. before or after passing it to InvokeAIDiffuserComponent - unet_latent_input = self.scheduler.scale_model_input(latents, timestep) + latent_model_input = self.scheduler.scale_model_input(latents, timestep) # default is no controlnet, so set controlnet processing output to None - down_block_res_samples, mid_block_res_sample = None, None - + controlnet_down_block_samples, controlnet_mid_block_sample = None, None if control_data is not None: - # control_data should be type List[ControlNetData] - # this loop covers both ControlNet (one ControlNetData in list) - # and MultiControlNet (multiple ControlNetData in list) - for i, control_datum in enumerate(control_data): - control_mode = control_datum.control_mode - # soft_injection and cfg_injection are the two ControlNet control_mode booleans - # that are combined at higher level to make control_mode enum - # soft_injection determines whether to do per-layer re-weighting adjustment (if True) - # or default weighting (if False) - soft_injection = control_mode == "more_prompt" or control_mode == "more_control" - # cfg_injection = determines whether to apply ControlNet to only the conditional (if True) - # or the default both conditional and unconditional (if False) - cfg_injection = control_mode == "more_control" or control_mode == "unbalanced" + controlnet_down_block_samples, controlnet_mid_block_sample = self.invokeai_diffuser.do_controlnet_step( + control_data=control_data, + sample=latent_model_input, + timestep=timestep, + step_index=step_index, + total_step_count=total_step_count, + conditioning_data=conditioning_data, + ) - first_control_step = math.floor(control_datum.begin_step_percent * total_step_count) - last_control_step = math.ceil(control_datum.end_step_percent * total_step_count) - # only apply controlnet if current step is within the controlnet's begin/end step range - if step_index >= first_control_step and step_index <= last_control_step: - if cfg_injection: - control_latent_input = unet_latent_input - else: - # expand the latents input to control model if doing classifier free guidance - # (which I think for now is always true, there is conditional elsewhere that stops execution if - # classifier_free_guidance is <= 1.0 ?) - control_latent_input = torch.cat([unet_latent_input] * 2) - - if cfg_injection: # only applying ControlNet to conditional instead of in unconditioned - encoder_hidden_states = conditioning_data.text_embeddings - encoder_attention_mask = None - else: - ( - encoder_hidden_states, - encoder_attention_mask, - ) = self.invokeai_diffuser._concat_conditionings_for_batch( - conditioning_data.unconditioned_embeddings, - conditioning_data.text_embeddings, - ) - if isinstance(control_datum.weight, list): - # if controlnet has multiple weights, use the weight for the current step - controlnet_weight = control_datum.weight[step_index] - else: - # if controlnet has a single weight, use it for all steps - controlnet_weight = control_datum.weight - - # controlnet(s) inference - down_samples, mid_sample = control_datum.model( - sample=control_latent_input, - timestep=timestep, - encoder_hidden_states=encoder_hidden_states, - controlnet_cond=control_datum.image_tensor, - conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale - encoder_attention_mask=encoder_attention_mask, - guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel - return_dict=False, - ) - if cfg_injection: - # Inferred ControlNet only for the conditional batch. - # To apply the output of ControlNet to both the unconditional and conditional batches, - # prepend zeros for unconditional batch - down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples] - mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample]) - - if down_block_res_samples is None and mid_block_res_sample is None: - down_block_res_samples, mid_block_res_sample = down_samples, mid_sample - else: - # add controlnet outputs together if have multiple controlnets - down_block_res_samples = [ - samples_prev + samples_curr - for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) - ] - mid_block_res_sample += mid_sample - - # predict the noise residual - noise_pred = self.invokeai_diffuser.do_diffusion_step( - x=unet_latent_input, - sigma=t, - unconditioning=conditioning_data.unconditioned_embeddings, - conditioning=conditioning_data.text_embeddings, - unconditional_guidance_scale=conditioning_data.guidance_scale, + uc_noise_pred, c_noise_pred = self.invokeai_diffuser.do_unet_step( + sample=latent_model_input, + timestep=t, # TODO: debug how handled batched and non batched timesteps step_index=step_index, total_step_count=total_step_count, - down_block_additional_residuals=down_block_res_samples, # from controlnet(s) - mid_block_additional_residual=mid_block_res_sample, # from controlnet(s) + conditioning_data=conditioning_data, + # extra: + down_block_additional_residuals=controlnet_down_block_samples, # from controlnet(s) + mid_block_additional_residual=controlnet_mid_block_sample, # from controlnet(s) + ) + + guidance_scale = conditioning_data.guidance_scale + if isinstance(guidance_scale, list): + guidance_scale = guidance_scale[step_index] + + noise_pred = self.invokeai_diffuser._combine( + uc_noise_pred, + c_noise_pred, + guidance_scale, ) # compute the previous noisy sample x_t -> x_t-1 @@ -621,126 +577,3 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): cross_attention_kwargs=cross_attention_kwargs, **kwargs, ).sample - - def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int): - img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) - assert img2img_pipeline.scheduler is self.scheduler - - if self.scheduler.config.get("cpu_only", False): - scheduler_device = torch.device("cpu") - else: - scheduler_device = self.unet.device - - img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=scheduler_device) - timesteps, adjusted_steps = img2img_pipeline.get_timesteps( - num_inference_steps, strength, device=scheduler_device - ) - # Workaround for low strength resulting in zero timesteps. - # TODO: submit upstream fix for zero-step img2img - if timesteps.numel() == 0: - timesteps = self.scheduler.timesteps[-1:] - adjusted_steps = timesteps.numel() - return timesteps, adjusted_steps - - def inpaint_from_embeddings( - self, - init_image: torch.FloatTensor, - mask: torch.FloatTensor, - strength: float, - num_inference_steps: int, - conditioning_data: ConditioningData, - *, - callback: Callable[[PipelineIntermediateState], None] = None, - run_id=None, - noise_func=None, - seed=None, - ) -> InvokeAIStableDiffusionPipelineOutput: - device = self.unet.device - latents_dtype = self.unet.dtype - - if isinstance(init_image, PIL.Image.Image): - init_image = image_resized_to_grid_as_tensor(init_image.convert("RGB")) - - init_image = init_image.to(device=device, dtype=latents_dtype) - mask = mask.to(device=device, dtype=latents_dtype) - - if init_image.dim() == 3: - init_image = init_image.unsqueeze(0) - - timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength) - - # 6. Prepare latent variables - # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents - # because we have our own noise function - init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) - if seed is not None: - set_seed(seed) - noise = noise_func(init_image_latents) - - if mask.dim() == 3: - mask = mask.unsqueeze(0) - latent_mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR).to( - device=device, dtype=latents_dtype - ) - - guidance: List[Callable] = [] - - if is_inpainting_model(self.unet): - # You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint - # (that's why there's a mask!) but it seems to really want that blanked out. - masked_init_image = init_image * torch.where(mask < 0.5, 1, 0) - masked_latents = self.non_noised_latents_from_image(masked_init_image, device=device, dtype=latents_dtype) - - # TODO: we should probably pass this in so we don't have to try/finally around setting it. - self.invokeai_diffuser.model_forward_callback = AddsMaskLatents( - self._unet_forward, latent_mask, masked_latents - ) - else: - guidance.append(AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise)) - - try: - result_latents, result_attention_maps = self.latents_from_embeddings( - latents=init_image_latents - if strength < 1.0 - else torch.zeros_like( - init_image_latents, device=init_image_latents.device, dtype=init_image_latents.dtype - ), - num_inference_steps=num_inference_steps, - conditioning_data=conditioning_data, - noise=noise, - timesteps=timesteps, - additional_guidance=guidance, - run_id=run_id, - callback=callback, - ) - finally: - self.invokeai_diffuser.model_forward_callback = self._unet_forward - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - torch.cuda.empty_cache() - - with torch.inference_mode(): - image = self.decode_latents(result_latents) - output = InvokeAIStableDiffusionPipelineOutput( - images=image, - nsfw_content_detected=[], - attention_map_saver=result_attention_maps, - ) - return output - - def non_noised_latents_from_image(self, init_image, *, device: torch.device, dtype): - init_image = init_image.to(device=device, dtype=dtype) - with torch.inference_mode(): - init_latent_dist = self.vae.encode(init_image).latent_dist - init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible! - - init_latents = 0.18215 * init_latents - return init_latents - - def debug_latents(self, latents, msg): - from invokeai.backend.image_util import debug_image - - with torch.inference_mode(): - decoded = self.numpy_to_pil(self.decode_latents(latents)) - for i, img in enumerate(decoded): - debug_image(img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True) diff --git a/invokeai/backend/stable_diffusion/diffusion/__init__.py b/invokeai/backend/stable_diffusion/diffusion/__init__.py index 6dd2817f29..76f338aeaf 100644 --- a/invokeai/backend/stable_diffusion/diffusion/__init__.py +++ b/invokeai/backend/stable_diffusion/diffusion/__init__.py @@ -3,4 +3,9 @@ Initialization file for invokeai.models.diffusion """ from .cross_attention_control import InvokeAICrossAttentionMixin from .cross_attention_map_saving import AttentionMapSaver -from .shared_invokeai_diffusion import InvokeAIDiffuserComponent, PostprocessingSettings +from .shared_invokeai_diffusion import ( + InvokeAIDiffuserComponent, + PostprocessingSettings, + BasicConditioningInfo, + SDXLConditioningInfo, +) diff --git a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py index c01cf82c57..e739855b9e 100644 --- a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py +++ b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py @@ -1,6 +1,8 @@ +from __future__ import annotations + from contextlib import contextmanager from dataclasses import dataclass -from math import ceil +import math from typing import Any, Callable, Dict, Optional, Union, List import numpy as np @@ -32,6 +34,29 @@ ModelForwardCallback: TypeAlias = Union[ ] +@dataclass +class BasicConditioningInfo: + embeds: torch.Tensor + extra_conditioning: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo] + # weight: float + # mode: ConditioningAlgo + + def to(self, device, dtype=None): + self.embeds = self.embeds.to(device=device, dtype=dtype) + return self + + +@dataclass +class SDXLConditioningInfo(BasicConditioningInfo): + pooled_embeds: torch.Tensor + add_time_ids: torch.Tensor + + def to(self, device, dtype=None): + self.pooled_embeds = self.pooled_embeds.to(device=device, dtype=dtype) + self.add_time_ids = self.add_time_ids.to(device=device, dtype=dtype) + return super().to(device=device, dtype=dtype) + + @dataclass(frozen=True) class PostprocessingSettings: threshold: float @@ -127,33 +152,125 @@ class InvokeAIDiffuserComponent: for _, module in tokens_cross_attention_modules: module.set_attention_slice_calculated_callback(None) - def do_diffusion_step( + def do_controlnet_step( self, - x: torch.Tensor, - sigma: torch.Tensor, - unconditioning: Union[torch.Tensor, dict], - conditioning: Union[torch.Tensor, dict], - # unconditional_guidance_scale: float, - unconditional_guidance_scale: Union[float, List[float]], - step_index: Optional[int] = None, - total_step_count: Optional[int] = None, + control_data, + sample: torch.Tensor, + timestep: torch.Tensor, + step_index: int, + total_step_count: int, + conditioning_data, + ): + down_block_res_samples, mid_block_res_sample = None, None + + # control_data should be type List[ControlNetData] + # this loop covers both ControlNet (one ControlNetData in list) + # and MultiControlNet (multiple ControlNetData in list) + for i, control_datum in enumerate(control_data): + control_mode = control_datum.control_mode + # soft_injection and cfg_injection are the two ControlNet control_mode booleans + # that are combined at higher level to make control_mode enum + # soft_injection determines whether to do per-layer re-weighting adjustment (if True) + # or default weighting (if False) + soft_injection = control_mode == "more_prompt" or control_mode == "more_control" + # cfg_injection = determines whether to apply ControlNet to only the conditional (if True) + # or the default both conditional and unconditional (if False) + cfg_injection = control_mode == "more_control" or control_mode == "unbalanced" + + first_control_step = math.floor(control_datum.begin_step_percent * total_step_count) + last_control_step = math.ceil(control_datum.end_step_percent * total_step_count) + # only apply controlnet if current step is within the controlnet's begin/end step range + if step_index >= first_control_step and step_index <= last_control_step: + if cfg_injection: + sample_model_input = sample + else: + # expand the latents input to control model if doing classifier free guidance + # (which I think for now is always true, there is conditional elsewhere that stops execution if + # classifier_free_guidance is <= 1.0 ?) + sample_model_input = torch.cat([sample] * 2) + + added_cond_kwargs = None + + if cfg_injection: # only applying ControlNet to conditional instead of in unconditioned + if type(conditioning_data.text_embeddings) is SDXLConditioningInfo: + added_cond_kwargs = { + "text_embeds": conditioning_data.text_embeddings.pooled_embeds, + "time_ids": conditioning_data.text_embeddings.add_time_ids, + } + encoder_hidden_states = conditioning_data.text_embeddings.embeds + encoder_attention_mask = None + else: + if type(conditioning_data.text_embeddings) is SDXLConditioningInfo: + added_cond_kwargs = { + "text_embeds": torch.cat( + [ + # TODO: how to pad? just by zeros? or even truncate? + conditioning_data.unconditioned_embeddings.pooled_embeds, + conditioning_data.text_embeddings.pooled_embeds, + ], + dim=0, + ), + "time_ids": torch.cat( + [ + conditioning_data.unconditioned_embeddings.add_time_ids, + conditioning_data.text_embeddings.add_time_ids, + ], + dim=0, + ), + } + ( + encoder_hidden_states, + encoder_attention_mask, + ) = self._concat_conditionings_for_batch( + conditioning_data.unconditioned_embeddings.embeds, + conditioning_data.text_embeddings.embeds, + ) + if isinstance(control_datum.weight, list): + # if controlnet has multiple weights, use the weight for the current step + controlnet_weight = control_datum.weight[step_index] + else: + # if controlnet has a single weight, use it for all steps + controlnet_weight = control_datum.weight + + # controlnet(s) inference + down_samples, mid_sample = control_datum.model( + sample=sample_model_input, + timestep=timestep, + encoder_hidden_states=encoder_hidden_states, + controlnet_cond=control_datum.image_tensor, + conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale + encoder_attention_mask=encoder_attention_mask, + guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel + return_dict=False, + ) + if cfg_injection: + # Inferred ControlNet only for the conditional batch. + # To apply the output of ControlNet to both the unconditional and conditional batches, + # prepend zeros for unconditional batch + down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples] + mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample]) + + if down_block_res_samples is None and mid_block_res_sample is None: + down_block_res_samples, mid_block_res_sample = down_samples, mid_sample + else: + # add controlnet outputs together if have multiple controlnets + down_block_res_samples = [ + samples_prev + samples_curr + for samples_prev, samples_curr in zip(down_block_res_samples, down_samples) + ] + mid_block_res_sample += mid_sample + + return down_block_res_samples, mid_block_res_sample + + def do_unet_step( + self, + sample: torch.Tensor, + timestep: torch.Tensor, + conditioning_data, # TODO: type + step_index: int, + total_step_count: int, **kwargs, ): - """ - :param x: current latents - :param sigma: aka t, passed to the internal model to control how much denoising will occur - :param unconditioning: embeddings for unconditioned output. for hybrid conditioning this is a dict of tensors [B x 77 x 768], otherwise a single tensor [B x 77 x 768] - :param conditioning: embeddings for conditioned output. for hybrid conditioning this is a dict of tensors [B x 77 x 768], otherwise a single tensor [B x 77 x 768] - :param unconditional_guidance_scale: aka CFG scale, controls how much effect the conditioning tensor has - :param step_index: counts upwards from 0 to (step_count-1) (as passed to setup_cross_attention_control, if using). May be called multiple times for a single step, therefore do not assume that its value will monotically increase. If None, will be estimated by comparing sigma against self.model.sigmas . - :return: the new latents after applying the model to x using unscaled unconditioning and CFG-scaled conditioning. - """ - - if isinstance(unconditional_guidance_scale, list): - guidance_scale = unconditional_guidance_scale[step_index] - else: - guidance_scale = unconditional_guidance_scale - cross_attention_control_types_to_do = [] context: Context = self.cross_attention_control_context if self.cross_attention_control_context is not None: @@ -163,25 +280,15 @@ class InvokeAIDiffuserComponent: ) wants_cross_attention_control = len(cross_attention_control_types_to_do) > 0 - wants_hybrid_conditioning = isinstance(conditioning, dict) - if wants_hybrid_conditioning: - unconditioned_next_x, conditioned_next_x = self._apply_hybrid_conditioning( - x, - sigma, - unconditioning, - conditioning, - **kwargs, - ) - elif wants_cross_attention_control: + if wants_cross_attention_control: ( unconditioned_next_x, conditioned_next_x, ) = self._apply_cross_attention_controlled_conditioning( - x, - sigma, - unconditioning, - conditioning, + sample, + timestep, + conditioning_data, cross_attention_control_types_to_do, **kwargs, ) @@ -190,10 +297,9 @@ class InvokeAIDiffuserComponent: unconditioned_next_x, conditioned_next_x, ) = self._apply_standard_conditioning_sequentially( - x, - sigma, - unconditioning, - conditioning, + sample, + timestep, + conditioning_data, **kwargs, ) @@ -202,21 +308,13 @@ class InvokeAIDiffuserComponent: unconditioned_next_x, conditioned_next_x, ) = self._apply_standard_conditioning( - x, - sigma, - unconditioning, - conditioning, + sample, + timestep, + conditioning_data, **kwargs, ) - combined_next_x = self._combine( - # unconditioned_next_x, conditioned_next_x, unconditional_guidance_scale - unconditioned_next_x, - conditioned_next_x, - guidance_scale, - ) - - return combined_next_x + return unconditioned_next_x, conditioned_next_x def do_latent_postprocessing( self, @@ -228,7 +326,6 @@ class InvokeAIDiffuserComponent: ) -> torch.Tensor: if postprocessing_settings is not None: percent_through = step_index / total_step_count - latents = self.apply_threshold(postprocessing_settings, latents, percent_through) latents = self.apply_symmetry(postprocessing_settings, latents, percent_through) return latents @@ -281,17 +378,40 @@ class InvokeAIDiffuserComponent: # methods below are called from do_diffusion_step and should be considered private to this class. - def _apply_standard_conditioning(self, x, sigma, unconditioning, conditioning, **kwargs): + def _apply_standard_conditioning(self, x, sigma, conditioning_data, **kwargs): # fast batched path x_twice = torch.cat([x] * 2) sigma_twice = torch.cat([sigma] * 2) - both_conditionings, encoder_attention_mask = self._concat_conditionings_for_batch(unconditioning, conditioning) + added_cond_kwargs = None + if type(conditioning_data.text_embeddings) is SDXLConditioningInfo: + added_cond_kwargs = { + "text_embeds": torch.cat( + [ + # TODO: how to pad? just by zeros? or even truncate? + conditioning_data.unconditioned_embeddings.pooled_embeds, + conditioning_data.text_embeddings.pooled_embeds, + ], + dim=0, + ), + "time_ids": torch.cat( + [ + conditioning_data.unconditioned_embeddings.add_time_ids, + conditioning_data.text_embeddings.add_time_ids, + ], + dim=0, + ), + } + + both_conditionings, encoder_attention_mask = self._concat_conditionings_for_batch( + conditioning_data.unconditioned_embeddings.embeds, conditioning_data.text_embeddings.embeds + ) both_results = self.model_forward_callback( x_twice, sigma_twice, both_conditionings, encoder_attention_mask=encoder_attention_mask, + added_cond_kwargs=added_cond_kwargs, **kwargs, ) unconditioned_next_x, conditioned_next_x = both_results.chunk(2) @@ -301,8 +421,7 @@ class InvokeAIDiffuserComponent: self, x: torch.Tensor, sigma, - unconditioning: torch.Tensor, - conditioning: torch.Tensor, + conditioning_data, **kwargs, ): # low-memory sequential path @@ -320,52 +439,46 @@ class InvokeAIDiffuserComponent: if mid_block_additional_residual is not None: uncond_mid_block, cond_mid_block = mid_block_additional_residual.chunk(2) + added_cond_kwargs = None + is_sdxl = type(conditioning_data.text_embeddings) is SDXLConditioningInfo + if is_sdxl: + added_cond_kwargs = { + "text_embeds": conditioning_data.unconditioned_embeddings.pooled_embeds, + "time_ids": conditioning_data.unconditioned_embeddings.add_time_ids, + } + unconditioned_next_x = self.model_forward_callback( x, sigma, - unconditioning, + conditioning_data.unconditioned_embeddings.embeds, down_block_additional_residuals=uncond_down_block, mid_block_additional_residual=uncond_mid_block, + added_cond_kwargs=added_cond_kwargs, **kwargs, ) + + if is_sdxl: + added_cond_kwargs = { + "text_embeds": conditioning_data.text_embeddings.pooled_embeds, + "time_ids": conditioning_data.text_embeddings.add_time_ids, + } + conditioned_next_x = self.model_forward_callback( x, sigma, - conditioning, + conditioning_data.text_embeddings.embeds, down_block_additional_residuals=cond_down_block, mid_block_additional_residual=cond_mid_block, + added_cond_kwargs=added_cond_kwargs, **kwargs, ) return unconditioned_next_x, conditioned_next_x - # TODO: looks unused - def _apply_hybrid_conditioning(self, x, sigma, unconditioning, conditioning, **kwargs): - assert isinstance(conditioning, dict) - assert isinstance(unconditioning, dict) - x_twice = torch.cat([x] * 2) - sigma_twice = torch.cat([sigma] * 2) - both_conditionings = dict() - for k in conditioning: - if isinstance(conditioning[k], list): - both_conditionings[k] = [ - torch.cat([unconditioning[k][i], conditioning[k][i]]) for i in range(len(conditioning[k])) - ] - else: - both_conditionings[k] = torch.cat([unconditioning[k], conditioning[k]]) - unconditioned_next_x, conditioned_next_x = self.model_forward_callback( - x_twice, - sigma_twice, - both_conditionings, - **kwargs, - ).chunk(2) - return unconditioned_next_x, conditioned_next_x - def _apply_cross_attention_controlled_conditioning( self, x: torch.Tensor, sigma, - unconditioning, - conditioning, + conditioning_data, cross_attention_control_types_to_do, **kwargs, ): @@ -391,26 +504,43 @@ class InvokeAIDiffuserComponent: mask=context.cross_attention_mask, cross_attention_types_to_do=[], ) + + added_cond_kwargs = None + is_sdxl = type(conditioning_data.text_embeddings) is SDXLConditioningInfo + if is_sdxl: + added_cond_kwargs = { + "text_embeds": conditioning_data.unconditioned_embeddings.pooled_embeds, + "time_ids": conditioning_data.unconditioned_embeddings.add_time_ids, + } + # no cross attention for unconditioning (negative prompt) unconditioned_next_x = self.model_forward_callback( x, sigma, - unconditioning, + conditioning_data.unconditioned_embeddings.embeds, {"swap_cross_attn_context": cross_attn_processor_context}, down_block_additional_residuals=uncond_down_block, mid_block_additional_residual=uncond_mid_block, + added_cond_kwargs=added_cond_kwargs, **kwargs, ) + if is_sdxl: + added_cond_kwargs = { + "text_embeds": conditioning_data.text_embeddings.pooled_embeds, + "time_ids": conditioning_data.text_embeddings.add_time_ids, + } + # do requested cross attention types for conditioning (positive prompt) cross_attn_processor_context.cross_attention_types_to_do = cross_attention_control_types_to_do conditioned_next_x = self.model_forward_callback( x, sigma, - conditioning, + conditioning_data.text_embeddings.embeds, {"swap_cross_attn_context": cross_attn_processor_context}, down_block_additional_residuals=cond_down_block, mid_block_additional_residual=cond_mid_block, + added_cond_kwargs=added_cond_kwargs, **kwargs, ) return unconditioned_next_x, conditioned_next_x @@ -421,63 +551,6 @@ class InvokeAIDiffuserComponent: combined_next_x = unconditioned_next_x + scaled_delta return combined_next_x - def apply_threshold( - self, - postprocessing_settings: PostprocessingSettings, - latents: torch.Tensor, - percent_through: float, - ) -> torch.Tensor: - if postprocessing_settings.threshold is None or postprocessing_settings.threshold == 0.0: - return latents - - threshold = postprocessing_settings.threshold - warmup = postprocessing_settings.warmup - - if percent_through < warmup: - current_threshold = threshold + threshold * 5 * (1 - (percent_through / warmup)) - else: - current_threshold = threshold - - if current_threshold <= 0: - return latents - - maxval = latents.max().item() - minval = latents.min().item() - - scale = 0.7 # default value from #395 - - if self.debug_thresholding: - std, mean = [i.item() for i in torch.std_mean(latents)] - outside = torch.count_nonzero((latents < -current_threshold) | (latents > current_threshold)) - logger.info(f"Threshold: %={percent_through} threshold={current_threshold:.3f} (of {threshold:.3f})") - logger.debug(f"min, mean, max = {minval:.3f}, {mean:.3f}, {maxval:.3f}\tstd={std}") - logger.debug(f"{outside / latents.numel() * 100:.2f}% values outside threshold") - - if maxval < current_threshold and minval > -current_threshold: - return latents - - num_altered = 0 - - # MPS torch.rand_like is fine because torch.rand_like is wrapped in generate.py! - - if maxval > current_threshold: - latents = torch.clone(latents) - maxval = np.clip(maxval * scale, 1, current_threshold) - num_altered += torch.count_nonzero(latents > maxval) - latents[latents > maxval] = torch.rand_like(latents[latents > maxval]) * maxval - - if minval < -current_threshold: - latents = torch.clone(latents) - minval = np.clip(minval * scale, -current_threshold, -1) - num_altered += torch.count_nonzero(latents < minval) - latents[latents < minval] = torch.rand_like(latents[latents < minval]) * minval - - if self.debug_thresholding: - logger.debug(f"min, , max = {minval:.3f}, , {maxval:.3f}\t(scaled by {scale})") - logger.debug(f"{num_altered / latents.numel() * 100:.2f}% values altered") - - return latents - def apply_symmetry( self, postprocessing_settings: PostprocessingSettings, @@ -539,18 +612,6 @@ class InvokeAIDiffuserComponent: self.last_percent_through = percent_through return latents.to(device=dev) - def estimate_percent_through(self, step_index, sigma): - if step_index is not None and self.cross_attention_control_context is not None: - # percent_through will never reach 1.0 (but this is intended) - return float(step_index) / float(self.cross_attention_control_context.step_count) - # find the best possible index of the current sigma in the sigma sequence - smaller_sigmas = torch.nonzero(self.model.sigmas <= sigma) - sigma_index = smaller_sigmas[-1].item() if smaller_sigmas.shape[0] > 0 else 0 - # flip because sigmas[0] is for the fully denoised image - # percent_through must be <1 - return 1.0 - float(sigma_index + 1) / float(self.model.sigmas.shape[0]) - # print('estimated percent_through', percent_through, 'from sigma', sigma.item()) - # todo: make this work @classmethod def apply_conjunction(cls, x, t, forward_func, uc, c_or_weighted_c_list, global_guidance_scale): @@ -564,7 +625,7 @@ class InvokeAIDiffuserComponent: # below is fugly omg conditionings = [uc] + [c for c, weight in weighted_cond_list] weights = [1] + [weight for c, weight in weighted_cond_list] - chunk_count = ceil(len(conditionings) / 2) + chunk_count = math.ceil(len(conditionings) / 2) deltas = None for chunk_index in range(chunk_count): offset = chunk_index * 2 diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 63380a19fa..fbae5b4a30 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -503,6 +503,9 @@ "hiresStrength": "High Res Strength", "imageFit": "Fit Initial Image To Output Size", "codeformerFidelity": "Fidelity", + "maskAdjustmentsHeader": "Mask Adjustments", + "maskBlur": "Mask Blur", + "maskBlurMethod": "Mask Blur Method", "seamSize": "Seam Size", "seamBlur": "Seam Blur", "seamStrength": "Seam Strength", diff --git a/invokeai/frontend/web/scripts/typegen.js b/invokeai/frontend/web/scripts/typegen.js index 015ae918ab..ec67c48f2d 100644 --- a/invokeai/frontend/web/scripts/typegen.js +++ b/invokeai/frontend/web/scripts/typegen.js @@ -1,7 +1,7 @@ import fs from 'node:fs'; import openapiTS from 'openapi-typescript'; -const OPENAPI_URL = 'http://localhost:9090/openapi.json'; +const OPENAPI_URL = 'http://127.0.0.1:9090/openapi.json'; const OUTPUT_FILE = 'src/services/api/schema.d.ts'; async function main() { diff --git a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/socketio/socketInvocationComplete.ts b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/socketio/socketInvocationComplete.ts index 30e0bedb54..5b3b9424b6 100644 --- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/socketio/socketInvocationComplete.ts +++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/socketio/socketInvocationComplete.ts @@ -7,6 +7,7 @@ import { imageSelected, } from 'features/gallery/store/gallerySlice'; import { IMAGE_CATEGORIES } from 'features/gallery/store/types'; +import { CANVAS_OUTPUT } from 'features/nodes/util/graphBuilders/constants'; import { progressImageSet } from 'features/system/store/systemSlice'; import { imagesApi } from 'services/api/endpoints/images'; import { isImageOutput } from 'services/api/guards'; @@ -52,7 +53,9 @@ export const addInvocationCompleteEventListener = () => { // Add canvas images to the staging area if ( - graph_execution_state_id === canvas.layerState.stagingArea.sessionId + graph_execution_state_id === + canvas.layerState.stagingArea.sessionId && + [CANVAS_OUTPUT].includes(data.source_node_id) ) { dispatch(addImageToStagingArea(imageDTO)); } diff --git a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/tabChanged.ts b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/tabChanged.ts index 6d3e599ae2..6791324fdd 100644 --- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/tabChanged.ts +++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/tabChanged.ts @@ -12,7 +12,10 @@ export const addTabChangedListener = () => { if (activeTabName === 'unifiedCanvas') { const currentBaseModel = getState().generation.model?.base_model; - if (currentBaseModel && ['sd-1', 'sd-2'].includes(currentBaseModel)) { + if ( + currentBaseModel && + ['sd-1', 'sd-2', 'sdxl'].includes(currentBaseModel) + ) { // if we're already on a valid model, no change needed return; } @@ -36,7 +39,9 @@ export const addTabChangedListener = () => { const validCanvasModels = mainModelsAdapter .getSelectors() .selectAll(models) - .filter((model) => ['sd-1', 'sd-2'].includes(model.base_model)); + .filter((model) => + ['sd-1', 'sd-2', 'sxdl'].includes(model.base_model) + ); const firstValidCanvasModel = validCanvasModels[0]; diff --git a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/userInvokedCanvas.ts b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/userInvokedCanvas.ts index 39bd742d7d..cd6791cc0b 100644 --- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/userInvokedCanvas.ts +++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/userInvokedCanvas.ts @@ -1,6 +1,7 @@ import { logger } from 'app/logging/logger'; import { userInvoked } from 'app/store/actions'; import openBase64ImageInTab from 'common/util/openBase64ImageInTab'; +import { parseify } from 'common/util/serialize'; import { canvasSessionIdChanged, stagingAreaInitialized, @@ -15,7 +16,6 @@ import { imagesApi } from 'services/api/endpoints/images'; import { sessionCreated } from 'services/api/thunks/session'; import { ImageDTO } from 'services/api/types'; import { startAppListening } from '..'; -import { parseify } from 'common/util/serialize'; /** * This listener is responsible invoking the canvas. This involves a number of steps: diff --git a/invokeai/frontend/web/src/features/canvas/store/canvasSlice.ts b/invokeai/frontend/web/src/features/canvas/store/canvasSlice.ts index f63ab2fd67..11f829221a 100644 --- a/invokeai/frontend/web/src/features/canvas/store/canvasSlice.ts +++ b/invokeai/frontend/web/src/features/canvas/store/canvasSlice.ts @@ -47,7 +47,7 @@ export const initialCanvasState: CanvasState = { boundingBoxCoordinates: { x: 0, y: 0 }, boundingBoxDimensions: { width: 512, height: 512 }, boundingBoxPreviewFill: { r: 0, g: 0, b: 0, a: 0.5 }, - boundingBoxScaleMethod: 'auto', + boundingBoxScaleMethod: 'none', brushColor: { r: 90, g: 90, b: 255, a: 1 }, brushSize: 50, canvasContainerDimensions: { width: 0, height: 0 }, diff --git a/invokeai/frontend/web/src/features/canvas/store/canvasTypes.ts b/invokeai/frontend/web/src/features/canvas/store/canvasTypes.ts index ba85a7e132..f2ba90b050 100644 --- a/invokeai/frontend/web/src/features/canvas/store/canvasTypes.ts +++ b/invokeai/frontend/web/src/features/canvas/store/canvasTypes.ts @@ -11,9 +11,9 @@ export const LAYER_NAMES = ['base', 'mask'] as const; export type CanvasLayer = (typeof LAYER_NAMES)[number]; export const BOUNDING_BOX_SCALES_DICT = [ + { label: 'None', value: 'none' }, { label: 'Auto', value: 'auto' }, { label: 'Manual', value: 'manual' }, - { label: 'None', value: 'none' }, ]; export const BOUNDING_BOX_SCALES = ['none', 'auto', 'manual'] as const; diff --git a/invokeai/frontend/web/src/features/lora/components/ParamLoraList.tsx b/invokeai/frontend/web/src/features/lora/components/ParamLoraList.tsx index f10084e585..5ba4e711ef 100644 --- a/invokeai/frontend/web/src/features/lora/components/ParamLoraList.tsx +++ b/invokeai/frontend/web/src/features/lora/components/ParamLoraList.tsx @@ -1,4 +1,4 @@ -import { Divider } from '@chakra-ui/react'; +import { Divider, Flex } from '@chakra-ui/react'; import { createSelector } from '@reduxjs/toolkit'; import { stateSelector } from 'app/store/store'; import { useAppSelector } from 'app/store/storeHooks'; @@ -20,10 +20,10 @@ const ParamLoraList = () => { return ( <> {lorasArray.map((lora, i) => ( - <> - {i > 0 && } - - + + {i > 0 && } + + ))} ); diff --git a/invokeai/frontend/web/src/features/lora/components/ParamLoraSelect.tsx b/invokeai/frontend/web/src/features/lora/components/ParamLoraSelect.tsx index 951037f9e3..2046d36ab2 100644 --- a/invokeai/frontend/web/src/features/lora/components/ParamLoraSelect.tsx +++ b/invokeai/frontend/web/src/features/lora/components/ParamLoraSelect.tsx @@ -54,6 +54,8 @@ const ParamLoRASelect = () => { }); }); + data.sort((a, b) => (a.label && !b.label ? 1 : -1)); + return data.sort((a, b) => (a.disabled && !b.disabled ? 1 : -1)); }, [loras, loraModels, currentMainModel?.base_model]); diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts index c47c7be8b4..adce34adf5 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts @@ -2,28 +2,31 @@ import { RootState } from 'app/store/store'; import { MetadataAccumulatorInvocation } from 'services/api/types'; import { NonNullableGraph } from '../../types/types'; import { - IMAGE_TO_LATENTS, + CANVAS_OUTPUT, LATENTS_TO_IMAGE, + MASK_BLUR, METADATA_ACCUMULATOR, - SDXL_LATENTS_TO_LATENTS, + SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH, + SDXL_CANVAS_INPAINT_GRAPH, + SDXL_CANVAS_OUTPAINT_GRAPH, + SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH, SDXL_MODEL_LOADER, - SDXL_REFINER_LATENTS_TO_LATENTS, + SDXL_REFINER_DENOISE_LATENTS, SDXL_REFINER_MODEL_LOADER, SDXL_REFINER_NEGATIVE_CONDITIONING, SDXL_REFINER_POSITIVE_CONDITIONING, } from './constants'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; export const addSDXLRefinerToGraph = ( state: RootState, graph: NonNullableGraph, baseNodeId: string ): void => { - const { positivePrompt, negativePrompt } = state.generation; const { refinerModel, - refinerAestheticScore, - positiveStylePrompt, - negativeStylePrompt, + refinerPositiveAestheticScore, + refinerNegativeAestheticScore, refinerSteps, refinerScheduler, refinerCFGScale, @@ -38,13 +41,20 @@ export const addSDXLRefinerToGraph = ( if (metadataAccumulator) { metadataAccumulator.refiner_model = refinerModel; - metadataAccumulator.refiner_aesthetic_store = refinerAestheticScore; + metadataAccumulator.refiner_positive_aesthetic_score = + refinerPositiveAestheticScore; + metadataAccumulator.refiner_negative_aesthetic_score = + refinerNegativeAestheticScore; metadataAccumulator.refiner_cfg_scale = refinerCFGScale; metadataAccumulator.refiner_scheduler = refinerScheduler; metadataAccumulator.refiner_start = refinerStart; metadataAccumulator.refiner_steps = refinerSteps; } + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, true); + // Unplug SDXL Latents Generation To Latents To Image graph.edges = graph.edges.filter( (e) => @@ -59,21 +69,6 @@ export const addSDXLRefinerToGraph = ( ) ); - // connect the VAE back to the i2l, which we just removed in the filter - // but only if we are doing l2l - if (baseNodeId === SDXL_LATENTS_TO_LATENTS) { - graph.edges.push({ - source: { - node_id: SDXL_MODEL_LOADER, - field: 'vae', - }, - destination: { - node_id: IMAGE_TO_LATENTS, - field: 'vae', - }, - }); - } - graph.nodes[SDXL_REFINER_MODEL_LOADER] = { type: 'sdxl_refiner_model_loader', id: SDXL_REFINER_MODEL_LOADER, @@ -82,20 +77,20 @@ export const addSDXLRefinerToGraph = ( graph.nodes[SDXL_REFINER_POSITIVE_CONDITIONING] = { type: 'sdxl_refiner_compel_prompt', id: SDXL_REFINER_POSITIVE_CONDITIONING, - style: `${positivePrompt} ${positiveStylePrompt}`, - aesthetic_score: refinerAestheticScore, + style: craftedPositiveStylePrompt, + aesthetic_score: refinerPositiveAestheticScore, }; graph.nodes[SDXL_REFINER_NEGATIVE_CONDITIONING] = { type: 'sdxl_refiner_compel_prompt', id: SDXL_REFINER_NEGATIVE_CONDITIONING, - style: `${negativePrompt} ${negativeStylePrompt}`, - aesthetic_score: refinerAestheticScore, + style: craftedNegativeStylePrompt, + aesthetic_score: refinerNegativeAestheticScore, }; - graph.nodes[SDXL_REFINER_LATENTS_TO_LATENTS] = { - type: 'l2l_sdxl', - id: SDXL_REFINER_LATENTS_TO_LATENTS, + graph.nodes[SDXL_REFINER_DENOISE_LATENTS] = { + type: 'denoise_latents', + id: SDXL_REFINER_DENOISE_LATENTS, cfg_scale: refinerCFGScale, - steps: refinerSteps / (1 - Math.min(refinerStart, 0.99)), + steps: refinerSteps, scheduler: refinerScheduler, denoising_start: refinerStart, denoising_end: 1, @@ -108,20 +103,10 @@ export const addSDXLRefinerToGraph = ( field: 'unet', }, destination: { - node_id: SDXL_REFINER_LATENTS_TO_LATENTS, + node_id: SDXL_REFINER_DENOISE_LATENTS, field: 'unet', }, }, - { - source: { - node_id: SDXL_REFINER_MODEL_LOADER, - field: 'vae', - }, - destination: { - node_id: LATENTS_TO_IMAGE, - field: 'vae', - }, - }, { source: { node_id: SDXL_REFINER_MODEL_LOADER, @@ -148,7 +133,7 @@ export const addSDXLRefinerToGraph = ( field: 'conditioning', }, destination: { - node_id: SDXL_REFINER_LATENTS_TO_LATENTS, + node_id: SDXL_REFINER_DENOISE_LATENTS, field: 'positive_conditioning', }, }, @@ -158,7 +143,7 @@ export const addSDXLRefinerToGraph = ( field: 'conditioning', }, destination: { - node_id: SDXL_REFINER_LATENTS_TO_LATENTS, + node_id: SDXL_REFINER_DENOISE_LATENTS, field: 'negative_conditioning', }, }, @@ -168,19 +153,52 @@ export const addSDXLRefinerToGraph = ( field: 'latents', }, destination: { - node_id: SDXL_REFINER_LATENTS_TO_LATENTS, + node_id: SDXL_REFINER_DENOISE_LATENTS, field: 'latents', }, - }, - { + } + ); + + if ( + graph.id === SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH || + graph.id === SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH + ) { + graph.edges.push({ source: { - node_id: SDXL_REFINER_LATENTS_TO_LATENTS, + node_id: SDXL_REFINER_DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'latents', + }, + }); + } else { + graph.edges.push({ + source: { + node_id: SDXL_REFINER_DENOISE_LATENTS, field: 'latents', }, destination: { node_id: LATENTS_TO_IMAGE, field: 'latents', }, - } - ); + }); + } + + if ( + graph.id === SDXL_CANVAS_INPAINT_GRAPH || + graph.id === SDXL_CANVAS_OUTPAINT_GRAPH + ) { + graph.edges.push({ + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: SDXL_REFINER_DENOISE_LATENTS, + field: 'mask', + }, + }); + } }; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts index 1472b3ea3f..360e07062a 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts @@ -2,14 +2,24 @@ import { RootState } from 'app/store/store'; import { NonNullableGraph } from 'features/nodes/types/types'; import { MetadataAccumulatorInvocation } from 'services/api/types'; import { + CANVAS_IMAGE_TO_IMAGE_GRAPH, + CANVAS_INPAINT_GRAPH, + CANVAS_OUTPAINT_GRAPH, + CANVAS_OUTPUT, + CANVAS_TEXT_TO_IMAGE_GRAPH, IMAGE_TO_IMAGE_GRAPH, IMAGE_TO_LATENTS, - INPAINT, - INPAINT_GRAPH, + INPAINT_IMAGE, LATENTS_TO_IMAGE, MAIN_MODEL_LOADER, METADATA_ACCUMULATOR, ONNX_MODEL_LOADER, + SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH, + SDXL_CANVAS_INPAINT_GRAPH, + SDXL_CANVAS_OUTPAINT_GRAPH, + SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH, + SDXL_IMAGE_TO_IMAGE_GRAPH, + SDXL_TEXT_TO_IMAGE_GRAPH, TEXT_TO_IMAGE_GRAPH, VAE_LOADER, } from './constants'; @@ -35,7 +45,13 @@ export const addVAEToGraph = ( }; } const isOnnxModel = modelLoaderNodeId == ONNX_MODEL_LOADER; - if (graph.id === TEXT_TO_IMAGE_GRAPH || graph.id === IMAGE_TO_IMAGE_GRAPH) { + + if ( + graph.id === TEXT_TO_IMAGE_GRAPH || + graph.id === IMAGE_TO_IMAGE_GRAPH || + graph.id === SDXL_TEXT_TO_IMAGE_GRAPH || + graph.id === SDXL_IMAGE_TO_IMAGE_GRAPH + ) { graph.edges.push({ source: { node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER, @@ -48,7 +64,30 @@ export const addVAEToGraph = ( }); } - if (graph.id === IMAGE_TO_IMAGE_GRAPH) { + if ( + graph.id === CANVAS_TEXT_TO_IMAGE_GRAPH || + graph.id === CANVAS_IMAGE_TO_IMAGE_GRAPH || + graph.id === SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH || + graph.id == SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH + ) { + graph.edges.push({ + source: { + node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER, + field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'vae', + }, + }); + } + + if ( + graph.id === IMAGE_TO_IMAGE_GRAPH || + graph.id === SDXL_IMAGE_TO_IMAGE_GRAPH || + graph.id === CANVAS_IMAGE_TO_IMAGE_GRAPH || + graph.id === SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH + ) { graph.edges.push({ source: { node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER, @@ -61,17 +100,34 @@ export const addVAEToGraph = ( }); } - if (graph.id === INPAINT_GRAPH) { - graph.edges.push({ - source: { - node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER, - field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae', + if ( + graph.id === CANVAS_INPAINT_GRAPH || + graph.id === CANVAS_OUTPAINT_GRAPH || + graph.id === SDXL_CANVAS_INPAINT_GRAPH || + graph.id === SDXL_CANVAS_OUTPAINT_GRAPH + ) { + graph.edges.push( + { + source: { + node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER, + field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae', + }, + destination: { + node_id: INPAINT_IMAGE, + field: 'vae', + }, }, - destination: { - node_id: INPAINT, - field: 'vae', - }, - }); + { + source: { + node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER, + field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'vae', + }, + } + ); } if (vae && metadataAccumulator) { diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasGraph.ts index 8a7716071f..d268a3990d 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasGraph.ts @@ -3,6 +3,11 @@ import { NonNullableGraph } from 'features/nodes/types/types'; import { ImageDTO } from 'services/api/types'; import { buildCanvasImageToImageGraph } from './buildCanvasImageToImageGraph'; import { buildCanvasInpaintGraph } from './buildCanvasInpaintGraph'; +import { buildCanvasOutpaintGraph } from './buildCanvasOutpaintGraph'; +import { buildCanvasSDXLImageToImageGraph } from './buildCanvasSDXLImageToImageGraph'; +import { buildCanvasSDXLInpaintGraph } from './buildCanvasSDXLInpaintGraph'; +import { buildCanvasSDXLOutpaintGraph } from './buildCanvasSDXLOutpaintGraph'; +import { buildCanvasSDXLTextToImageGraph } from './buildCanvasSDXLTextToImageGraph'; import { buildCanvasTextToImageGraph } from './buildCanvasTextToImageGraph'; export const buildCanvasGraph = ( @@ -14,17 +19,58 @@ export const buildCanvasGraph = ( let graph: NonNullableGraph; if (generationMode === 'txt2img') { - graph = buildCanvasTextToImageGraph(state); + if ( + state.generation.model && + state.generation.model.base_model === 'sdxl' + ) { + graph = buildCanvasSDXLTextToImageGraph(state); + } else { + graph = buildCanvasTextToImageGraph(state); + } } else if (generationMode === 'img2img') { if (!canvasInitImage) { throw new Error('Missing canvas init image'); } - graph = buildCanvasImageToImageGraph(state, canvasInitImage); - } else { + if ( + state.generation.model && + state.generation.model.base_model === 'sdxl' + ) { + graph = buildCanvasSDXLImageToImageGraph(state, canvasInitImage); + } else { + graph = buildCanvasImageToImageGraph(state, canvasInitImage); + } + } else if (generationMode === 'inpaint') { if (!canvasInitImage || !canvasMaskImage) { throw new Error('Missing canvas init and mask images'); } - graph = buildCanvasInpaintGraph(state, canvasInitImage, canvasMaskImage); + if ( + state.generation.model && + state.generation.model.base_model === 'sdxl' + ) { + graph = buildCanvasSDXLInpaintGraph( + state, + canvasInitImage, + canvasMaskImage + ); + } else { + graph = buildCanvasInpaintGraph(state, canvasInitImage, canvasMaskImage); + } + } else { + if (!canvasInitImage) { + throw new Error('Missing canvas init image'); + } + if ( + state.generation.model && + state.generation.model.base_model === 'sdxl' + ) { + graph = buildCanvasSDXLOutpaintGraph( + state, + canvasInitImage, + canvasMaskImage + ); + } else { + graph = buildCanvasOutpaintGraph(state, canvasInitImage, canvasMaskImage); + } } return graph; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasImageToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasImageToImageGraph.ts index 42f768c107..a68aeef392 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasImageToImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasImageToImageGraph.ts @@ -14,11 +14,11 @@ import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { + CANVAS_IMAGE_TO_IMAGE_GRAPH, + CANVAS_OUTPUT, CLIP_SKIP, - IMAGE_TO_IMAGE_GRAPH, + DENOISE_LATENTS, IMAGE_TO_LATENTS, - LATENTS_TO_IMAGE, - LATENTS_TO_LATENTS, MAIN_MODEL_LOADER, METADATA_ACCUMULATOR, NEGATIVE_CONDITIONING, @@ -73,8 +73,20 @@ export const buildCanvasImageToImageGraph = ( // copy-pasted graph from node editor, filled in with state values & friendly node ids const graph: NonNullableGraph = { - id: IMAGE_TO_IMAGE_GRAPH, + id: CANVAS_IMAGE_TO_IMAGE_GRAPH, nodes: { + [MAIN_MODEL_LOADER]: { + type: 'main_model_loader', + id: MAIN_MODEL_LOADER, + is_intermediate: true, + model, + }, + [CLIP_SKIP]: { + type: 'clip_skip', + id: CLIP_SKIP, + is_intermediate: true, + skipped_layers: clipSkip, + }, [POSITIVE_CONDITIONING]: { type: 'compel', id: POSITIVE_CONDITIONING, @@ -93,27 +105,6 @@ export const buildCanvasImageToImageGraph = ( is_intermediate: true, use_cpu, }, - [MAIN_MODEL_LOADER]: { - type: 'main_model_loader', - id: MAIN_MODEL_LOADER, - is_intermediate: true, - model, - }, - [CLIP_SKIP]: { - type: 'clip_skip', - id: CLIP_SKIP, - is_intermediate: true, - skipped_layers: clipSkip, - }, - [LATENTS_TO_LATENTS]: { - type: 'l2l', - id: LATENTS_TO_LATENTS, - is_intermediate: true, - cfg_scale, - scheduler, - steps, - strength, - }, [IMAGE_TO_LATENTS]: { type: 'i2l', id: IMAGE_TO_LATENTS, @@ -123,13 +114,34 @@ export const buildCanvasImageToImageGraph = ( // image_name: initialImage.image_name, // }, }, - [LATENTS_TO_IMAGE]: { + [DENOISE_LATENTS]: { + type: 'denoise_latents', + id: DENOISE_LATENTS, + is_intermediate: true, + cfg_scale, + scheduler, + steps, + denoising_start: 1 - strength, + denoising_end: 1, + }, + [CANVAS_OUTPUT]: { type: 'l2i', - id: LATENTS_TO_IMAGE, + id: CANVAS_OUTPUT, is_intermediate: !shouldAutoSave, }, }, edges: [ + // Connect Model Loader to CLIP Skip and UNet + { + source: { + node_id: MAIN_MODEL_LOADER, + field: 'unet', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'unet', + }, + }, { source: { node_id: MAIN_MODEL_LOADER, @@ -140,6 +152,7 @@ export const buildCanvasImageToImageGraph = ( field: 'clip', }, }, + // Connect CLIP Skip To Conditioning { source: { node_id: CLIP_SKIP, @@ -160,24 +173,25 @@ export const buildCanvasImageToImageGraph = ( field: 'clip', }, }, + // Connect Everything To Denoise Latents { source: { - node_id: LATENTS_TO_LATENTS, - field: 'latents', + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', }, destination: { - node_id: LATENTS_TO_IMAGE, - field: 'latents', + node_id: DENOISE_LATENTS, + field: 'positive_conditioning', }, }, { source: { - node_id: IMAGE_TO_LATENTS, - field: 'latents', + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', }, destination: { - node_id: LATENTS_TO_LATENTS, - field: 'latents', + node_id: DENOISE_LATENTS, + field: 'negative_conditioning', }, }, { @@ -186,38 +200,29 @@ export const buildCanvasImageToImageGraph = ( field: 'noise', }, destination: { - node_id: LATENTS_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'noise', }, }, { source: { - node_id: MAIN_MODEL_LOADER, - field: 'unet', + node_id: IMAGE_TO_LATENTS, + field: 'latents', }, destination: { - node_id: LATENTS_TO_LATENTS, - field: 'unet', + node_id: DENOISE_LATENTS, + field: 'latents', }, }, + // Decode the denoised latents to an image { source: { - node_id: NEGATIVE_CONDITIONING, - field: 'conditioning', + node_id: DENOISE_LATENTS, + field: 'latents', }, destination: { - node_id: LATENTS_TO_LATENTS, - field: 'negative_conditioning', - }, - }, - { - source: { - node_id: POSITIVE_CONDITIONING, - field: 'conditioning', - }, - destination: { - node_id: LATENTS_TO_LATENTS, - field: 'positive_conditioning', + node_id: CANVAS_OUTPUT, + field: 'latents', }, }, ], @@ -318,32 +323,32 @@ export const buildCanvasImageToImageGraph = ( field: 'metadata', }, destination: { - node_id: LATENTS_TO_IMAGE, + node_id: CANVAS_OUTPUT, field: 'metadata', }, }); // add LoRA support - addLoRAsToGraph(state, graph, LATENTS_TO_LATENTS); + addLoRAsToGraph(state, graph, DENOISE_LATENTS); // optionally add custom VAE - addVAEToGraph(state, graph); + addVAEToGraph(state, graph, MAIN_MODEL_LOADER); // add dynamic prompts - also sets up core iteration and seed addDynamicPromptsToGraph(state, graph); // add controlnet, mutating `graph` - addControlNetToLinearGraph(state, graph, LATENTS_TO_LATENTS); + addControlNetToLinearGraph(state, graph, DENOISE_LATENTS); // NSFW & watermark - must be last thing added to graph if (state.system.shouldUseNSFWChecker) { // must add before watermarker! - addNSFWCheckerToGraph(state, graph); + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); } if (state.system.shouldUseWatermarker) { // must add after nsfw checker! - addWatermarkerToGraph(state, graph); + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); } return graph; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasInpaintGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasInpaintGraph.ts index 3cec76757f..6b0da8e197 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasInpaintGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasInpaintGraph.ts @@ -2,22 +2,35 @@ import { logger } from 'app/logging/logger'; import { RootState } from 'app/store/store'; import { NonNullableGraph } from 'features/nodes/types/types'; import { + ImageBlurInvocation, ImageDTO, - InpaintInvocation, + ImageToLatentsInvocation, + NoiseInvocation, RandomIntInvocation, RangeOfSizeInvocation, } from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; import { addLoRAsToGraph } from './addLoRAsToGraph'; import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { + CANVAS_INPAINT_GRAPH, + CANVAS_OUTPUT, CLIP_SKIP, - INPAINT, - INPAINT_GRAPH, + COLOR_CORRECT, + DENOISE_LATENTS, + INPAINT_IMAGE, + INPAINT_IMAGE_RESIZE_DOWN, + INPAINT_IMAGE_RESIZE_UP, ITERATE, + LATENTS_TO_IMAGE, MAIN_MODEL_LOADER, + MASK_BLUR, + MASK_RESIZE_DOWN, + MASK_RESIZE_UP, NEGATIVE_CONDITIONING, + NOISE, POSITIVE_CONDITIONING, RANDOM_INT, RANGE_OF_SIZE, @@ -40,16 +53,14 @@ export const buildCanvasInpaintGraph = ( scheduler, steps, img2imgStrength: strength, - shouldFitToWidthHeight, iterations, seed, shouldRandomizeSeed, - seamSize, - seamBlur, - seamSteps, - seamStrength, - tileSize, - infillMethod, + vaePrecision, + shouldUseNoiseSettings, + shouldUseCpuNoise, + maskBlur, + maskBlurMethod, clipSkip, } = state.generation; @@ -68,40 +79,24 @@ export const buildCanvasInpaintGraph = ( shouldAutoSave, } = state.canvas; + const use_cpu = shouldUseNoiseSettings + ? shouldUseCpuNoise + : shouldUseCpuNoise; + const graph: NonNullableGraph = { - id: INPAINT_GRAPH, + id: CANVAS_INPAINT_GRAPH, nodes: { - [INPAINT]: { - is_intermediate: !shouldAutoSave, - type: 'inpaint', - id: INPAINT, - steps, - width, - height, - cfg_scale, - scheduler, - image: { - image_name: canvasInitImage.image_name, - }, - strength, - fit: shouldFitToWidthHeight, - mask: { - image_name: canvasMaskImage.image_name, - }, - seam_size: seamSize, - seam_blur: seamBlur, - seam_strength: seamStrength, - seam_steps: seamSteps, - tile_size: infillMethod === 'tile' ? tileSize : undefined, - infill_method: infillMethod as InpaintInvocation['infill_method'], - inpaint_width: - boundingBoxScaleMethod !== 'none' - ? scaledBoundingBoxDimensions.width - : undefined, - inpaint_height: - boundingBoxScaleMethod !== 'none' - ? scaledBoundingBoxDimensions.height - : undefined, + [MAIN_MODEL_LOADER]: { + type: 'main_model_loader', + id: MAIN_MODEL_LOADER, + is_intermediate: true, + model, + }, + [CLIP_SKIP]: { + type: 'clip_skip', + id: CLIP_SKIP, + is_intermediate: true, + skipped_layers: clipSkip, }, [POSITIVE_CONDITIONING]: { type: 'compel', @@ -115,17 +110,52 @@ export const buildCanvasInpaintGraph = ( is_intermediate: true, prompt: negativePrompt, }, - [MAIN_MODEL_LOADER]: { - type: 'main_model_loader', - id: MAIN_MODEL_LOADER, + [MASK_BLUR]: { + type: 'img_blur', + id: MASK_BLUR, is_intermediate: true, - model, + radius: maskBlur, + blur_type: maskBlurMethod, }, - [CLIP_SKIP]: { - type: 'clip_skip', - id: CLIP_SKIP, + [INPAINT_IMAGE]: { + type: 'i2l', + id: INPAINT_IMAGE, is_intermediate: true, - skipped_layers: clipSkip, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [NOISE]: { + type: 'noise', + id: NOISE, + use_cpu, + is_intermediate: true, + }, + [DENOISE_LATENTS]: { + type: 'denoise_latents', + id: DENOISE_LATENTS, + is_intermediate: true, + steps: steps, + cfg_scale: cfg_scale, + scheduler: scheduler, + denoising_start: 1 - strength, + denoising_end: 1, + }, + [LATENTS_TO_IMAGE]: { + type: 'l2i', + id: LATENTS_TO_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [COLOR_CORRECT]: { + type: 'color_correct', + id: COLOR_CORRECT, + is_intermediate: true, + reference: canvasInitImage, + }, + [CANVAS_OUTPUT]: { + type: 'img_paste', + id: CANVAS_OUTPUT, + is_intermediate: !shouldAutoSave, + base_image: canvasInitImage, }, [RANGE_OF_SIZE]: { type: 'range_of_size', @@ -143,13 +173,14 @@ export const buildCanvasInpaintGraph = ( }, }, edges: [ + // Connect Model Loader to CLIP Skip and UNet { source: { node_id: MAIN_MODEL_LOADER, field: 'unet', }, destination: { - node_id: INPAINT, + node_id: DENOISE_LATENTS, field: 'unet', }, }, @@ -163,6 +194,7 @@ export const buildCanvasInpaintGraph = ( field: 'clip', }, }, + // Connect CLIP Skip to Conditioning { source: { node_id: CLIP_SKIP, @@ -183,26 +215,58 @@ export const buildCanvasInpaintGraph = ( field: 'clip', }, }, + // Connect Everything To Inpaint Node + { + source: { + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'positive_conditioning', + }, + }, { source: { node_id: NEGATIVE_CONDITIONING, field: 'conditioning', }, destination: { - node_id: INPAINT, + node_id: DENOISE_LATENTS, field: 'negative_conditioning', }, }, { source: { - node_id: POSITIVE_CONDITIONING, - field: 'conditioning', + node_id: NOISE, + field: 'noise', }, destination: { - node_id: INPAINT, - field: 'positive_conditioning', + node_id: DENOISE_LATENTS, + field: 'noise', }, }, + { + source: { + node_id: INPAINT_IMAGE, + field: 'latents', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'latents', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'mask', + }, + }, + // Iterate { source: { node_id: RANGE_OF_SIZE, @@ -219,19 +283,216 @@ export const buildCanvasInpaintGraph = ( field: 'item', }, destination: { - node_id: INPAINT, + node_id: NOISE, field: 'seed', }, }, + // Decode Inpainted Latents To Image + { + source: { + node_id: DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'latents', + }, + }, ], }; - addLoRAsToGraph(state, graph, INPAINT); + // Handle Scale Before Processing + if (['auto', 'manual'].includes(boundingBoxScaleMethod)) { + const scaledWidth: number = scaledBoundingBoxDimensions.width; + const scaledHeight: number = scaledBoundingBoxDimensions.height; - // Add VAE - addVAEToGraph(state, graph); + // Add Scaling Nodes + graph.nodes[INPAINT_IMAGE_RESIZE_UP] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + image: canvasInitImage, + }; + graph.nodes[MASK_RESIZE_UP] = { + type: 'img_resize', + id: MASK_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + image: canvasMaskImage, + }; + graph.nodes[INPAINT_IMAGE_RESIZE_DOWN] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + graph.nodes[MASK_RESIZE_DOWN] = { + type: 'img_resize', + id: MASK_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; - // handle seed + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: scaledWidth, + height: scaledHeight, + }; + + // Connect Nodes + graph.edges.push( + // Scale Inpaint Image and Mask + { + source: { + node_id: INPAINT_IMAGE_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: MASK_BLUR, + field: 'image', + }, + }, + // Color Correct The Inpainted Result + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Back Onto Original Image + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } else { + // Add Images To Nodes + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: width, + height: height, + }; + graph.nodes[INPAINT_IMAGE] = { + ...(graph.nodes[INPAINT_IMAGE] as ImageToLatentsInvocation), + image: canvasInitImage, + }; + graph.nodes[MASK_BLUR] = { + ...(graph.nodes[MASK_BLUR] as ImageBlurInvocation), + image: canvasMaskImage, + }; + + graph.edges.push( + // Color Correct The Inpainted Result + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Back Onto Original Image + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } + + // Handle Seed if (shouldRandomizeSeed) { // Random int node to generate the starting seed const randomIntNode: RandomIntInvocation = { @@ -251,15 +512,24 @@ export const buildCanvasInpaintGraph = ( (graph.nodes[RANGE_OF_SIZE] as RangeOfSizeInvocation).start = seed; } + // Add VAE + addVAEToGraph(state, graph, MAIN_MODEL_LOADER); + + // add LoRA support + addLoRAsToGraph(state, graph, DENOISE_LATENTS, MAIN_MODEL_LOADER); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, DENOISE_LATENTS); + // NSFW & watermark - must be last thing added to graph if (state.system.shouldUseNSFWChecker) { // must add before watermarker! - addNSFWCheckerToGraph(state, graph, INPAINT); + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); } if (state.system.shouldUseWatermarker) { // must add after nsfw checker! - addWatermarkerToGraph(state, graph, INPAINT); + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); } return graph; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasOutpaintGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasOutpaintGraph.ts new file mode 100644 index 0000000000..23f6acb539 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasOutpaintGraph.ts @@ -0,0 +1,677 @@ +import { logger } from 'app/logging/logger'; +import { RootState } from 'app/store/store'; +import { NonNullableGraph } from 'features/nodes/types/types'; +import { + ImageBlurInvocation, + ImageDTO, + ImageToLatentsInvocation, + InfillPatchmatchInvocation, + InfillTileInvocation, + NoiseInvocation, + RandomIntInvocation, + RangeOfSizeInvocation, +} from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; +import { addLoRAsToGraph } from './addLoRAsToGraph'; +import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; +import { addWatermarkerToGraph } from './addWatermarkerToGraph'; +import { + CANVAS_OUTPAINT_GRAPH, + CANVAS_OUTPUT, + CLIP_SKIP, + COLOR_CORRECT, + DENOISE_LATENTS, + INPAINT_IMAGE, + INPAINT_IMAGE_RESIZE_DOWN, + INPAINT_IMAGE_RESIZE_UP, + INPAINT_INFILL, + INPAINT_INFILL_RESIZE_DOWN, + ITERATE, + LATENTS_TO_IMAGE, + MAIN_MODEL_LOADER, + MASK_BLUR, + MASK_COMBINE, + MASK_FROM_ALPHA, + MASK_RESIZE_DOWN, + MASK_RESIZE_UP, + NEGATIVE_CONDITIONING, + NOISE, + POSITIVE_CONDITIONING, + RANDOM_INT, + RANGE_OF_SIZE, +} from './constants'; + +/** + * Builds the Canvas tab's Outpaint graph. + */ +export const buildCanvasOutpaintGraph = ( + state: RootState, + canvasInitImage: ImageDTO, + canvasMaskImage?: ImageDTO +): NonNullableGraph => { + const log = logger('nodes'); + const { + positivePrompt, + negativePrompt, + model, + cfgScale: cfg_scale, + scheduler, + steps, + img2imgStrength: strength, + iterations, + seed, + shouldRandomizeSeed, + vaePrecision, + shouldUseNoiseSettings, + shouldUseCpuNoise, + maskBlur, + maskBlurMethod, + tileSize, + infillMethod, + clipSkip, + } = state.generation; + + if (!model) { + log.error('No model found in state'); + throw new Error('No model found in state'); + } + + // The bounding box determines width and height, not the width and height params + const { width, height } = state.canvas.boundingBoxDimensions; + + // We may need to set the inpaint width and height to scale the image + const { + scaledBoundingBoxDimensions, + boundingBoxScaleMethod, + shouldAutoSave, + } = state.canvas; + + const use_cpu = shouldUseNoiseSettings + ? shouldUseCpuNoise + : shouldUseCpuNoise; + + const graph: NonNullableGraph = { + id: CANVAS_OUTPAINT_GRAPH, + nodes: { + [MAIN_MODEL_LOADER]: { + type: 'main_model_loader', + id: MAIN_MODEL_LOADER, + is_intermediate: true, + model, + }, + [CLIP_SKIP]: { + type: 'clip_skip', + id: CLIP_SKIP, + is_intermediate: true, + skipped_layers: clipSkip, + }, + [POSITIVE_CONDITIONING]: { + type: 'compel', + id: POSITIVE_CONDITIONING, + is_intermediate: true, + prompt: positivePrompt, + }, + [NEGATIVE_CONDITIONING]: { + type: 'compel', + id: NEGATIVE_CONDITIONING, + is_intermediate: true, + prompt: negativePrompt, + }, + [MASK_FROM_ALPHA]: { + type: 'tomask', + id: MASK_FROM_ALPHA, + is_intermediate: true, + image: canvasInitImage, + }, + [MASK_COMBINE]: { + type: 'mask_combine', + id: MASK_COMBINE, + is_intermediate: true, + mask2: canvasMaskImage, + }, + [MASK_BLUR]: { + type: 'img_blur', + id: MASK_BLUR, + is_intermediate: true, + radius: maskBlur, + blur_type: maskBlurMethod, + }, + [INPAINT_INFILL]: { + type: 'infill_tile', + id: INPAINT_INFILL, + is_intermediate: true, + tile_size: tileSize, + }, + [INPAINT_IMAGE]: { + type: 'i2l', + id: INPAINT_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [NOISE]: { + type: 'noise', + id: NOISE, + use_cpu, + is_intermediate: true, + }, + [DENOISE_LATENTS]: { + type: 'denoise_latents', + id: DENOISE_LATENTS, + is_intermediate: true, + steps: steps, + cfg_scale: cfg_scale, + scheduler: scheduler, + denoising_start: 1 - strength, + denoising_end: 1, + }, + [LATENTS_TO_IMAGE]: { + type: 'l2i', + id: LATENTS_TO_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [COLOR_CORRECT]: { + type: 'color_correct', + id: COLOR_CORRECT, + is_intermediate: true, + }, + [CANVAS_OUTPUT]: { + type: 'img_paste', + id: CANVAS_OUTPUT, + is_intermediate: !shouldAutoSave, + }, + [RANGE_OF_SIZE]: { + type: 'range_of_size', + id: RANGE_OF_SIZE, + is_intermediate: true, + // seed - must be connected manually + // start: 0, + size: iterations, + step: 1, + }, + [ITERATE]: { + type: 'iterate', + id: ITERATE, + is_intermediate: true, + }, + }, + edges: [ + // Connect Model Loader To UNet & Clip Skip + { + source: { + node_id: MAIN_MODEL_LOADER, + field: 'unet', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'unet', + }, + }, + { + source: { + node_id: MAIN_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: CLIP_SKIP, + field: 'clip', + }, + }, + // Connect CLIP Skip to Conditioning + { + source: { + node_id: CLIP_SKIP, + field: 'clip', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: CLIP_SKIP, + field: 'clip', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip', + }, + }, + // Connect Infill Result To Inpaint Image + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE, + field: 'image', + }, + }, + // Combine Mask from Init Image with User Painted Mask + { + source: { + node_id: MASK_FROM_ALPHA, + field: 'mask', + }, + destination: { + node_id: MASK_COMBINE, + field: 'mask1', + }, + }, + // Plug Everything Into Inpaint Node + { + source: { + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'positive_conditioning', + }, + }, + { + source: { + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'negative_conditioning', + }, + }, + { + source: { + node_id: NOISE, + field: 'noise', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'noise', + }, + }, + { + source: { + node_id: INPAINT_IMAGE, + field: 'latents', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'latents', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'mask', + }, + }, + // Iterate + { + source: { + node_id: RANGE_OF_SIZE, + field: 'collection', + }, + destination: { + node_id: ITERATE, + field: 'collection', + }, + }, + { + source: { + node_id: ITERATE, + field: 'item', + }, + destination: { + node_id: NOISE, + field: 'seed', + }, + }, + // Decode the result from Inpaint + { + source: { + node_id: DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'latents', + }, + }, + ], + }; + + // Add Infill Nodes + + if (infillMethod === 'patchmatch') { + graph.nodes[INPAINT_INFILL] = { + type: 'infill_patchmatch', + id: INPAINT_INFILL, + is_intermediate: true, + }; + } + + // Handle Scale Before Processing + if (['auto', 'manual'].includes(boundingBoxScaleMethod)) { + const scaledWidth: number = scaledBoundingBoxDimensions.width; + const scaledHeight: number = scaledBoundingBoxDimensions.height; + + // Add Scaling Nodes + graph.nodes[INPAINT_IMAGE_RESIZE_UP] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + image: canvasInitImage, + }; + graph.nodes[MASK_RESIZE_UP] = { + type: 'img_resize', + id: MASK_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + }; + graph.nodes[INPAINT_IMAGE_RESIZE_DOWN] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + graph.nodes[INPAINT_INFILL_RESIZE_DOWN] = { + type: 'img_resize', + id: INPAINT_INFILL_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + graph.nodes[MASK_RESIZE_DOWN] = { + type: 'img_resize', + id: MASK_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: scaledWidth, + height: scaledHeight, + }; + + // Connect Nodes + graph.edges.push( + // Scale Inpaint Image + { + source: { + node_id: INPAINT_IMAGE_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: INPAINT_INFILL, + field: 'image', + }, + }, + // Take combined mask and resize and then blur + { + source: { + node_id: MASK_COMBINE, + field: 'image', + }, + destination: { + node_id: MASK_RESIZE_UP, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: MASK_BLUR, + field: 'image', + }, + }, + // Resize Results Down + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: INPAINT_INFILL_RESIZE_DOWN, + field: 'image', + }, + }, + // Color Correct The Inpainted Result + { + source: { + node_id: INPAINT_INFILL_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'reference', + }, + }, + { + source: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Everything Back + { + source: { + node_id: INPAINT_INFILL_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'base_image', + }, + }, + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } else { + // Add Images To Nodes + graph.nodes[INPAINT_INFILL] = { + ...(graph.nodes[INPAINT_INFILL] as + | InfillTileInvocation + | InfillPatchmatchInvocation), + image: canvasInitImage, + }; + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: width, + height: height, + }; + graph.nodes[INPAINT_IMAGE] = { + ...(graph.nodes[INPAINT_IMAGE] as ImageToLatentsInvocation), + image: canvasInitImage, + }; + graph.nodes[MASK_BLUR] = { + ...(graph.nodes[MASK_BLUR] as ImageBlurInvocation), + image: canvasMaskImage, + }; + + graph.edges.push( + // Take combined mask and plug it to blur + { + source: { + node_id: MASK_COMBINE, + field: 'image', + }, + destination: { + node_id: MASK_BLUR, + field: 'image', + }, + }, + // Color Correct The Inpainted Result + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'reference', + }, + }, + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Everything Back + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'base_image', + }, + }, + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } + + // Handle Seed + if (shouldRandomizeSeed) { + // Random int node to generate the starting seed + const randomIntNode: RandomIntInvocation = { + id: RANDOM_INT, + type: 'rand_int', + }; + + graph.nodes[RANDOM_INT] = randomIntNode; + + // Connect random int to the start of the range of size so the range starts on the random first seed + graph.edges.push({ + source: { node_id: RANDOM_INT, field: 'a' }, + destination: { node_id: RANGE_OF_SIZE, field: 'start' }, + }); + } else { + // User specified seed, so set the start of the range of size to the seed + (graph.nodes[RANGE_OF_SIZE] as RangeOfSizeInvocation).start = seed; + } + + // Add VAE + addVAEToGraph(state, graph, MAIN_MODEL_LOADER); + + // add LoRA support + addLoRAsToGraph(state, graph, DENOISE_LATENTS, MAIN_MODEL_LOADER); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, DENOISE_LATENTS); + + // NSFW & watermark - must be last thing added to graph + if (state.system.shouldUseNSFWChecker) { + // must add before watermarker! + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); + } + + if (state.system.shouldUseWatermarker) { + // must add after nsfw checker! + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); + } + + return graph; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLImageToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLImageToImageGraph.ts new file mode 100644 index 0000000000..ef32943bc8 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLImageToImageGraph.ts @@ -0,0 +1,378 @@ +import { logger } from 'app/logging/logger'; +import { RootState } from 'app/store/store'; +import { NonNullableGraph } from 'features/nodes/types/types'; +import { initialGenerationState } from 'features/parameters/store/generationSlice'; +import { + ImageDTO, + ImageResizeInvocation, + ImageToLatentsInvocation, +} from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; +import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph'; +import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; +import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; +import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; +import { addWatermarkerToGraph } from './addWatermarkerToGraph'; +import { + CANVAS_OUTPUT, + IMAGE_TO_LATENTS, + METADATA_ACCUMULATOR, + NEGATIVE_CONDITIONING, + NOISE, + POSITIVE_CONDITIONING, + RESIZE, + SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH, + SDXL_DENOISE_LATENTS, + SDXL_MODEL_LOADER, +} from './constants'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; + +/** + * Builds the Canvas tab's Image to Image graph. + */ +export const buildCanvasSDXLImageToImageGraph = ( + state: RootState, + initialImage: ImageDTO +): NonNullableGraph => { + const log = logger('nodes'); + const { + positivePrompt, + negativePrompt, + model, + cfgScale: cfg_scale, + scheduler, + steps, + vaePrecision, + clipSkip, + shouldUseCpuNoise, + shouldUseNoiseSettings, + } = state.generation; + + const { + shouldUseSDXLRefiner, + refinerStart, + sdxlImg2ImgDenoisingStrength: strength, + shouldConcatSDXLStylePrompt, + } = state.sdxl; + + // The bounding box determines width and height, not the width and height params + const { width, height } = state.canvas.boundingBoxDimensions; + + const { shouldAutoSave } = state.canvas; + + if (!model) { + log.error('No model found in state'); + throw new Error('No model found in state'); + } + + const use_cpu = shouldUseNoiseSettings + ? shouldUseCpuNoise + : initialGenerationState.shouldUseCpuNoise; + + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, shouldConcatSDXLStylePrompt); + + /** + * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the + * full graph here as a template. Then use the parameters from app state and set friendlier node + * ids. + * + * The only thing we need extra logic for is handling randomized seed, control net, and for img2img, + * the `fit` param. These are added to the graph at the end. + */ + + // copy-pasted graph from node editor, filled in with state values & friendly node ids + const graph: NonNullableGraph = { + id: SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH, + nodes: { + [SDXL_MODEL_LOADER]: { + type: 'sdxl_model_loader', + id: SDXL_MODEL_LOADER, + model, + }, + [POSITIVE_CONDITIONING]: { + type: 'sdxl_compel_prompt', + id: POSITIVE_CONDITIONING, + prompt: positivePrompt, + style: craftedPositiveStylePrompt, + }, + [NEGATIVE_CONDITIONING]: { + type: 'sdxl_compel_prompt', + id: NEGATIVE_CONDITIONING, + prompt: negativePrompt, + style: craftedNegativeStylePrompt, + }, + [NOISE]: { + type: 'noise', + id: NOISE, + is_intermediate: true, + use_cpu, + }, + [IMAGE_TO_LATENTS]: { + type: 'i2l', + id: IMAGE_TO_LATENTS, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + // must be set manually later, bc `fit` parameter may require a resize node inserted + // image: { + // image_name: initialImage.image_name, + // }, + }, + [SDXL_DENOISE_LATENTS]: { + type: 'denoise_latents', + id: SDXL_DENOISE_LATENTS, + is_intermediate: true, + cfg_scale, + scheduler, + steps, + denoising_start: shouldUseSDXLRefiner + ? Math.min(refinerStart, 1 - strength) + : 1 - strength, + denoising_end: shouldUseSDXLRefiner ? refinerStart : 1, + }, + [CANVAS_OUTPUT]: { + type: 'l2i', + id: CANVAS_OUTPUT, + is_intermediate: !shouldAutoSave, + fp32: vaePrecision === 'fp32' ? true : false, + }, + }, + edges: [ + // Connect Model Loader To UNet & CLIP + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'unet', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'unet', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip2', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip2', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip2', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip2', + }, + }, + // Connect Everything to Denoise Latents + { + source: { + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'positive_conditioning', + }, + }, + { + source: { + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'negative_conditioning', + }, + }, + { + source: { + node_id: NOISE, + field: 'noise', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'noise', + }, + }, + { + source: { + node_id: IMAGE_TO_LATENTS, + field: 'latents', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + }, + // Decode denoised latents to an image + { + source: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'latents', + }, + }, + ], + }; + + // handle `fit` + if (initialImage.width !== width || initialImage.height !== height) { + // The init image needs to be resized to the specified width and height before being passed to `IMAGE_TO_LATENTS` + + // Create a resize node, explicitly setting its image + const resizeNode: ImageResizeInvocation = { + id: RESIZE, + type: 'img_resize', + image: { + image_name: initialImage.image_name, + }, + is_intermediate: true, + width, + height, + }; + + graph.nodes[RESIZE] = resizeNode; + + // The `RESIZE` node then passes its image to `IMAGE_TO_LATENTS` + graph.edges.push({ + source: { node_id: RESIZE, field: 'image' }, + destination: { + node_id: IMAGE_TO_LATENTS, + field: 'image', + }, + }); + + // The `RESIZE` node also passes its width and height to `NOISE` + graph.edges.push({ + source: { node_id: RESIZE, field: 'width' }, + destination: { + node_id: NOISE, + field: 'width', + }, + }); + + graph.edges.push({ + source: { node_id: RESIZE, field: 'height' }, + destination: { + node_id: NOISE, + field: 'height', + }, + }); + } else { + // We are not resizing, so we need to set the image on the `IMAGE_TO_LATENTS` node explicitly + (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image = { + image_name: initialImage.image_name, + }; + + // Pass the image's dimensions to the `NOISE` node + graph.edges.push({ + source: { node_id: IMAGE_TO_LATENTS, field: 'width' }, + destination: { + node_id: NOISE, + field: 'width', + }, + }); + graph.edges.push({ + source: { node_id: IMAGE_TO_LATENTS, field: 'height' }, + destination: { + node_id: NOISE, + field: 'height', + }, + }); + } + + // add metadata accumulator, which is only mostly populated - some fields are added later + graph.nodes[METADATA_ACCUMULATOR] = { + id: METADATA_ACCUMULATOR, + type: 'metadata_accumulator', + generation_mode: 'img2img', + cfg_scale, + height, + width, + positive_prompt: '', // set in addDynamicPromptsToGraph + negative_prompt: negativePrompt, + model, + seed: 0, // set in addDynamicPromptsToGraph + steps, + rand_device: use_cpu ? 'cpu' : 'cuda', + scheduler, + vae: undefined, // option; set in addVAEToGraph + controlnets: [], // populated in addControlNetToLinearGraph + loras: [], // populated in addLoRAsToGraph + clip_skip: clipSkip, + strength, + init_image: initialImage.image_name, + }; + + graph.edges.push({ + source: { + node_id: METADATA_ACCUMULATOR, + field: 'metadata', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'metadata', + }, + }); + + // add LoRA support + addSDXLLoRAsToGraph(state, graph, SDXL_DENOISE_LATENTS, SDXL_MODEL_LOADER); + + // Add Refiner if enabled + if (shouldUseSDXLRefiner) { + addSDXLRefinerToGraph(state, graph, SDXL_DENOISE_LATENTS); + } + + // optionally add custom VAE + addVAEToGraph(state, graph, SDXL_MODEL_LOADER); + + // add dynamic prompts - also sets up core iteration and seed + addDynamicPromptsToGraph(state, graph); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, SDXL_DENOISE_LATENTS); + + // NSFW & watermark - must be last thing added to graph + if (state.system.shouldUseNSFWChecker) { + // must add before watermarker! + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); + } + + if (state.system.shouldUseWatermarker) { + // must add after nsfw checker! + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); + } + + return graph; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLInpaintGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLInpaintGraph.ts new file mode 100644 index 0000000000..ba40a70c83 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLInpaintGraph.ts @@ -0,0 +1,555 @@ +import { logger } from 'app/logging/logger'; +import { RootState } from 'app/store/store'; +import { NonNullableGraph } from 'features/nodes/types/types'; +import { + ImageBlurInvocation, + ImageDTO, + ImageToLatentsInvocation, + NoiseInvocation, + RandomIntInvocation, + RangeOfSizeInvocation, +} from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; +import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; +import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; +import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; +import { addWatermarkerToGraph } from './addWatermarkerToGraph'; +import { + CANVAS_OUTPUT, + COLOR_CORRECT, + INPAINT_IMAGE, + INPAINT_IMAGE_RESIZE_DOWN, + INPAINT_IMAGE_RESIZE_UP, + ITERATE, + LATENTS_TO_IMAGE, + MASK_BLUR, + MASK_RESIZE_DOWN, + MASK_RESIZE_UP, + NEGATIVE_CONDITIONING, + NOISE, + POSITIVE_CONDITIONING, + RANDOM_INT, + RANGE_OF_SIZE, + SDXL_CANVAS_INPAINT_GRAPH, + SDXL_DENOISE_LATENTS, + SDXL_MODEL_LOADER, +} from './constants'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; + +/** + * Builds the Canvas tab's Inpaint graph. + */ +export const buildCanvasSDXLInpaintGraph = ( + state: RootState, + canvasInitImage: ImageDTO, + canvasMaskImage: ImageDTO +): NonNullableGraph => { + const log = logger('nodes'); + const { + positivePrompt, + negativePrompt, + model, + cfgScale: cfg_scale, + scheduler, + steps, + iterations, + seed, + shouldRandomizeSeed, + vaePrecision, + shouldUseNoiseSettings, + shouldUseCpuNoise, + maskBlur, + maskBlurMethod, + } = state.generation; + + const { + sdxlImg2ImgDenoisingStrength: strength, + shouldUseSDXLRefiner, + refinerStart, + shouldConcatSDXLStylePrompt, + } = state.sdxl; + + if (!model) { + log.error('No model found in state'); + throw new Error('No model found in state'); + } + + // The bounding box determines width and height, not the width and height params + const { width, height } = state.canvas.boundingBoxDimensions; + + // We may need to set the inpaint width and height to scale the image + const { + scaledBoundingBoxDimensions, + boundingBoxScaleMethod, + shouldAutoSave, + } = state.canvas; + + const use_cpu = shouldUseNoiseSettings + ? shouldUseCpuNoise + : shouldUseCpuNoise; + + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, shouldConcatSDXLStylePrompt); + + const graph: NonNullableGraph = { + id: SDXL_CANVAS_INPAINT_GRAPH, + nodes: { + [SDXL_MODEL_LOADER]: { + type: 'sdxl_model_loader', + id: SDXL_MODEL_LOADER, + model, + }, + [POSITIVE_CONDITIONING]: { + type: 'sdxl_compel_prompt', + id: POSITIVE_CONDITIONING, + prompt: positivePrompt, + style: craftedPositiveStylePrompt, + }, + [NEGATIVE_CONDITIONING]: { + type: 'sdxl_compel_prompt', + id: NEGATIVE_CONDITIONING, + prompt: negativePrompt, + style: craftedNegativeStylePrompt, + }, + [MASK_BLUR]: { + type: 'img_blur', + id: MASK_BLUR, + is_intermediate: true, + radius: maskBlur, + blur_type: maskBlurMethod, + }, + [INPAINT_IMAGE]: { + type: 'i2l', + id: INPAINT_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [NOISE]: { + type: 'noise', + id: NOISE, + use_cpu, + is_intermediate: true, + }, + [SDXL_DENOISE_LATENTS]: { + type: 'denoise_latents', + id: SDXL_DENOISE_LATENTS, + is_intermediate: true, + steps: steps, + cfg_scale: cfg_scale, + scheduler: scheduler, + denoising_start: shouldUseSDXLRefiner + ? Math.min(refinerStart, 1 - strength) + : 1 - strength, + denoising_end: shouldUseSDXLRefiner ? refinerStart : 1, + }, + [LATENTS_TO_IMAGE]: { + type: 'l2i', + id: LATENTS_TO_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [COLOR_CORRECT]: { + type: 'color_correct', + id: COLOR_CORRECT, + is_intermediate: true, + reference: canvasInitImage, + }, + [CANVAS_OUTPUT]: { + type: 'img_paste', + id: CANVAS_OUTPUT, + is_intermediate: !shouldAutoSave, + base_image: canvasInitImage, + }, + [RANGE_OF_SIZE]: { + type: 'range_of_size', + id: RANGE_OF_SIZE, + is_intermediate: true, + // seed - must be connected manually + // start: 0, + size: iterations, + step: 1, + }, + [ITERATE]: { + type: 'iterate', + id: ITERATE, + is_intermediate: true, + }, + }, + edges: [ + // Connect Model Loader to UNet and CLIP + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'unet', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'unet', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip2', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip2', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip2', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip2', + }, + }, + // Connect everything to Inpaint + { + source: { + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'positive_conditioning', + }, + }, + { + source: { + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'negative_conditioning', + }, + }, + { + source: { + node_id: NOISE, + field: 'noise', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'noise', + }, + }, + { + source: { + node_id: INPAINT_IMAGE, + field: 'latents', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'mask', + }, + }, + // Iterate + { + source: { + node_id: RANGE_OF_SIZE, + field: 'collection', + }, + destination: { + node_id: ITERATE, + field: 'collection', + }, + }, + { + source: { + node_id: ITERATE, + field: 'item', + }, + destination: { + node_id: NOISE, + field: 'seed', + }, + }, + // Decode inpainted latents to image + { + source: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'latents', + }, + }, + ], + }; + + // Handle Scale Before Processing + if (['auto', 'manual'].includes(boundingBoxScaleMethod)) { + const scaledWidth: number = scaledBoundingBoxDimensions.width; + const scaledHeight: number = scaledBoundingBoxDimensions.height; + + // Add Scaling Nodes + graph.nodes[INPAINT_IMAGE_RESIZE_UP] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + image: canvasInitImage, + }; + graph.nodes[MASK_RESIZE_UP] = { + type: 'img_resize', + id: MASK_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + image: canvasMaskImage, + }; + graph.nodes[INPAINT_IMAGE_RESIZE_DOWN] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + graph.nodes[MASK_RESIZE_DOWN] = { + type: 'img_resize', + id: MASK_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: scaledWidth, + height: scaledHeight, + }; + + // Connect Nodes + graph.edges.push( + // Scale Inpaint Image and Mask + { + source: { + node_id: INPAINT_IMAGE_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: MASK_BLUR, + field: 'image', + }, + }, + // Color Correct The Inpainted Result + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Back Onto Original Image + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } else { + // Add Images To Nodes + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: width, + height: height, + }; + graph.nodes[INPAINT_IMAGE] = { + ...(graph.nodes[INPAINT_IMAGE] as ImageToLatentsInvocation), + image: canvasInitImage, + }; + graph.nodes[MASK_BLUR] = { + ...(graph.nodes[MASK_BLUR] as ImageBlurInvocation), + image: canvasMaskImage, + }; + + graph.edges.push( + // Color Correct The Inpainted Result + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Back Onto Original Image + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } + + // Handle Seed + if (shouldRandomizeSeed) { + // Random int node to generate the starting seed + const randomIntNode: RandomIntInvocation = { + id: RANDOM_INT, + type: 'rand_int', + }; + + graph.nodes[RANDOM_INT] = randomIntNode; + + // Connect random int to the start of the range of size so the range starts on the random first seed + graph.edges.push({ + source: { node_id: RANDOM_INT, field: 'a' }, + destination: { node_id: RANGE_OF_SIZE, field: 'start' }, + }); + } else { + // User specified seed, so set the start of the range of size to the seed + (graph.nodes[RANGE_OF_SIZE] as RangeOfSizeInvocation).start = seed; + } + + // Add Refiner if enabled + if (shouldUseSDXLRefiner) { + addSDXLRefinerToGraph(state, graph, SDXL_DENOISE_LATENTS); + } + + // optionally add custom VAE + addVAEToGraph(state, graph, SDXL_MODEL_LOADER); + + // add LoRA support + addSDXLLoRAsToGraph(state, graph, SDXL_DENOISE_LATENTS, SDXL_MODEL_LOADER); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, SDXL_DENOISE_LATENTS); + + // NSFW & watermark - must be last thing added to graph + if (state.system.shouldUseNSFWChecker) { + // must add before watermarker! + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); + } + + if (state.system.shouldUseWatermarker) { + // must add after nsfw checker! + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); + } + + return graph; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLOutpaintGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLOutpaintGraph.ts new file mode 100644 index 0000000000..50a773bf50 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLOutpaintGraph.ts @@ -0,0 +1,696 @@ +import { logger } from 'app/logging/logger'; +import { RootState } from 'app/store/store'; +import { NonNullableGraph } from 'features/nodes/types/types'; +import { + ImageBlurInvocation, + ImageDTO, + ImageToLatentsInvocation, + InfillPatchmatchInvocation, + InfillTileInvocation, + NoiseInvocation, + RandomIntInvocation, + RangeOfSizeInvocation, +} from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; +import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; +import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; +import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; +import { addWatermarkerToGraph } from './addWatermarkerToGraph'; +import { + CANVAS_OUTPUT, + COLOR_CORRECT, + INPAINT_IMAGE, + INPAINT_IMAGE_RESIZE_DOWN, + INPAINT_IMAGE_RESIZE_UP, + INPAINT_INFILL, + INPAINT_INFILL_RESIZE_DOWN, + ITERATE, + LATENTS_TO_IMAGE, + MASK_BLUR, + MASK_COMBINE, + MASK_FROM_ALPHA, + MASK_RESIZE_DOWN, + MASK_RESIZE_UP, + NEGATIVE_CONDITIONING, + NOISE, + POSITIVE_CONDITIONING, + RANDOM_INT, + RANGE_OF_SIZE, + SDXL_CANVAS_OUTPAINT_GRAPH, + SDXL_DENOISE_LATENTS, + SDXL_MODEL_LOADER, +} from './constants'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; + +/** + * Builds the Canvas tab's Outpaint graph. + */ +export const buildCanvasSDXLOutpaintGraph = ( + state: RootState, + canvasInitImage: ImageDTO, + canvasMaskImage?: ImageDTO +): NonNullableGraph => { + const log = logger('nodes'); + const { + positivePrompt, + negativePrompt, + model, + cfgScale: cfg_scale, + scheduler, + steps, + iterations, + seed, + shouldRandomizeSeed, + vaePrecision, + shouldUseNoiseSettings, + shouldUseCpuNoise, + maskBlur, + maskBlurMethod, + tileSize, + infillMethod, + } = state.generation; + + const { + sdxlImg2ImgDenoisingStrength: strength, + shouldUseSDXLRefiner, + refinerStart, + shouldConcatSDXLStylePrompt, + } = state.sdxl; + + if (!model) { + log.error('No model found in state'); + throw new Error('No model found in state'); + } + + // The bounding box determines width and height, not the width and height params + const { width, height } = state.canvas.boundingBoxDimensions; + + // We may need to set the inpaint width and height to scale the image + const { + scaledBoundingBoxDimensions, + boundingBoxScaleMethod, + shouldAutoSave, + } = state.canvas; + + const use_cpu = shouldUseNoiseSettings + ? shouldUseCpuNoise + : shouldUseCpuNoise; + + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, shouldConcatSDXLStylePrompt); + + const graph: NonNullableGraph = { + id: SDXL_CANVAS_OUTPAINT_GRAPH, + nodes: { + [SDXL_MODEL_LOADER]: { + type: 'sdxl_model_loader', + id: SDXL_MODEL_LOADER, + model, + }, + [POSITIVE_CONDITIONING]: { + type: 'sdxl_compel_prompt', + id: POSITIVE_CONDITIONING, + prompt: positivePrompt, + style: craftedPositiveStylePrompt, + }, + [NEGATIVE_CONDITIONING]: { + type: 'sdxl_compel_prompt', + id: NEGATIVE_CONDITIONING, + prompt: negativePrompt, + style: craftedNegativeStylePrompt, + }, + [MASK_FROM_ALPHA]: { + type: 'tomask', + id: MASK_FROM_ALPHA, + is_intermediate: true, + image: canvasInitImage, + }, + [MASK_COMBINE]: { + type: 'mask_combine', + id: MASK_COMBINE, + is_intermediate: true, + mask2: canvasMaskImage, + }, + [MASK_BLUR]: { + type: 'img_blur', + id: MASK_BLUR, + is_intermediate: true, + radius: maskBlur, + blur_type: maskBlurMethod, + }, + [INPAINT_INFILL]: { + type: 'infill_tile', + id: INPAINT_INFILL, + is_intermediate: true, + tile_size: tileSize, + }, + [INPAINT_IMAGE]: { + type: 'i2l', + id: INPAINT_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [NOISE]: { + type: 'noise', + id: NOISE, + use_cpu, + is_intermediate: true, + }, + [SDXL_DENOISE_LATENTS]: { + type: 'denoise_latents', + id: SDXL_DENOISE_LATENTS, + is_intermediate: true, + steps: steps, + cfg_scale: cfg_scale, + scheduler: scheduler, + denoising_start: shouldUseSDXLRefiner + ? Math.min(refinerStart, 1 - strength) + : 1 - strength, + denoising_end: shouldUseSDXLRefiner ? refinerStart : 1, + }, + [LATENTS_TO_IMAGE]: { + type: 'l2i', + id: LATENTS_TO_IMAGE, + is_intermediate: true, + fp32: vaePrecision === 'fp32' ? true : false, + }, + [COLOR_CORRECT]: { + type: 'color_correct', + id: COLOR_CORRECT, + is_intermediate: true, + }, + [CANVAS_OUTPUT]: { + type: 'img_paste', + id: CANVAS_OUTPUT, + is_intermediate: !shouldAutoSave, + }, + [RANGE_OF_SIZE]: { + type: 'range_of_size', + id: RANGE_OF_SIZE, + is_intermediate: true, + // seed - must be connected manually + // start: 0, + size: iterations, + step: 1, + }, + [ITERATE]: { + type: 'iterate', + id: ITERATE, + is_intermediate: true, + }, + }, + edges: [ + // Connect Model Loader To UNet and CLIP + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'unet', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'unet', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip2', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip2', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: SDXL_MODEL_LOADER, + field: 'clip2', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip2', + }, + }, + // Connect Infill Result To Inpaint Image + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE, + field: 'image', + }, + }, + // Combine Mask from Init Image with User Painted Mask + { + source: { + node_id: MASK_FROM_ALPHA, + field: 'mask', + }, + destination: { + node_id: MASK_COMBINE, + field: 'mask1', + }, + }, + // Connect Everything To Inpaint + { + source: { + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'positive_conditioning', + }, + }, + { + source: { + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'negative_conditioning', + }, + }, + { + source: { + node_id: NOISE, + field: 'noise', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'noise', + }, + }, + { + source: { + node_id: INPAINT_IMAGE, + field: 'latents', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'mask', + }, + }, + // Iterate + { + source: { + node_id: RANGE_OF_SIZE, + field: 'collection', + }, + destination: { + node_id: ITERATE, + field: 'collection', + }, + }, + { + source: { + node_id: ITERATE, + field: 'item', + }, + destination: { + node_id: NOISE, + field: 'seed', + }, + }, + // Decode inpainted latents to image + { + source: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'latents', + }, + }, + ], + }; + + // Add Infill Nodes + + if (infillMethod === 'patchmatch') { + graph.nodes[INPAINT_INFILL] = { + type: 'infill_patchmatch', + id: INPAINT_INFILL, + is_intermediate: true, + }; + } + + // Handle Scale Before Processing + if (['auto', 'manual'].includes(boundingBoxScaleMethod)) { + const scaledWidth: number = scaledBoundingBoxDimensions.width; + const scaledHeight: number = scaledBoundingBoxDimensions.height; + + // Add Scaling Nodes + graph.nodes[INPAINT_IMAGE_RESIZE_UP] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + image: canvasInitImage, + }; + graph.nodes[MASK_RESIZE_UP] = { + type: 'img_resize', + id: MASK_RESIZE_UP, + is_intermediate: true, + width: scaledWidth, + height: scaledHeight, + }; + graph.nodes[INPAINT_IMAGE_RESIZE_DOWN] = { + type: 'img_resize', + id: INPAINT_IMAGE_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + graph.nodes[INPAINT_INFILL_RESIZE_DOWN] = { + type: 'img_resize', + id: INPAINT_INFILL_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + graph.nodes[MASK_RESIZE_DOWN] = { + type: 'img_resize', + id: MASK_RESIZE_DOWN, + is_intermediate: true, + width: width, + height: height, + }; + + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: scaledWidth, + height: scaledHeight, + }; + + // Connect Nodes + graph.edges.push( + // Scale Inpaint Image + { + source: { + node_id: INPAINT_IMAGE_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: INPAINT_INFILL, + field: 'image', + }, + }, + // Take combined mask and resize and then blur + { + source: { + node_id: MASK_COMBINE, + field: 'image', + }, + destination: { + node_id: MASK_RESIZE_UP, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_UP, + field: 'image', + }, + destination: { + node_id: MASK_BLUR, + field: 'image', + }, + }, + // Resize Results Down + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + }, + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: INPAINT_INFILL_RESIZE_DOWN, + field: 'image', + }, + }, + // Color Correct The Inpainted Result + { + source: { + node_id: INPAINT_INFILL_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'reference', + }, + }, + { + source: { + node_id: INPAINT_IMAGE_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Everything Back + { + source: { + node_id: INPAINT_INFILL_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'base_image', + }, + }, + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_RESIZE_DOWN, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } else { + // Add Images To Nodes + graph.nodes[INPAINT_INFILL] = { + ...(graph.nodes[INPAINT_INFILL] as + | InfillTileInvocation + | InfillPatchmatchInvocation), + image: canvasInitImage, + }; + graph.nodes[NOISE] = { + ...(graph.nodes[NOISE] as NoiseInvocation), + width: width, + height: height, + }; + graph.nodes[INPAINT_IMAGE] = { + ...(graph.nodes[INPAINT_IMAGE] as ImageToLatentsInvocation), + image: canvasInitImage, + }; + graph.nodes[MASK_BLUR] = { + ...(graph.nodes[MASK_BLUR] as ImageBlurInvocation), + image: canvasMaskImage, + }; + + graph.edges.push( + // Take combined mask and plug it to blur + { + source: { + node_id: MASK_COMBINE, + field: 'image', + }, + destination: { + node_id: MASK_BLUR, + field: 'image', + }, + }, + // Color Correct The Inpainted Result + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'reference', + }, + }, + { + source: { + node_id: LATENTS_TO_IMAGE, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: COLOR_CORRECT, + field: 'mask', + }, + }, + // Paste Everything Back + { + source: { + node_id: INPAINT_INFILL, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'base_image', + }, + }, + { + source: { + node_id: COLOR_CORRECT, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'image', + }, + }, + { + source: { + node_id: MASK_BLUR, + field: 'image', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'mask', + }, + } + ); + } + + // Handle seed + if (shouldRandomizeSeed) { + // Random int node to generate the starting seed + const randomIntNode: RandomIntInvocation = { + id: RANDOM_INT, + type: 'rand_int', + }; + + graph.nodes[RANDOM_INT] = randomIntNode; + + // Connect random int to the start of the range of size so the range starts on the random first seed + graph.edges.push({ + source: { node_id: RANDOM_INT, field: 'a' }, + destination: { node_id: RANGE_OF_SIZE, field: 'start' }, + }); + } else { + // User specified seed, so set the start of the range of size to the seed + (graph.nodes[RANGE_OF_SIZE] as RangeOfSizeInvocation).start = seed; + } + + // Add Refiner if enabled + if (shouldUseSDXLRefiner) { + addSDXLRefinerToGraph(state, graph, SDXL_DENOISE_LATENTS); + } + + // optionally add custom VAE + addVAEToGraph(state, graph, SDXL_MODEL_LOADER); + + // add LoRA support + addSDXLLoRAsToGraph(state, graph, SDXL_DENOISE_LATENTS, SDXL_MODEL_LOADER); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, SDXL_DENOISE_LATENTS); + + // NSFW & watermark - must be last thing added to graph + if (state.system.shouldUseNSFWChecker) { + // must add before watermarker! + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); + } + + if (state.system.shouldUseWatermarker) { + // must add after nsfw checker! + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); + } + + return graph; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLTextToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLTextToImageGraph.ts new file mode 100644 index 0000000000..e79e08ba41 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLTextToImageGraph.ts @@ -0,0 +1,309 @@ +import { logger } from 'app/logging/logger'; +import { RootState } from 'app/store/store'; +import { NonNullableGraph } from 'features/nodes/types/types'; +import { initialGenerationState } from 'features/parameters/store/generationSlice'; +import { + DenoiseLatentsInvocation, + ONNXTextToLatentsInvocation, +} from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; +import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph'; +import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; +import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; +import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; +import { addWatermarkerToGraph } from './addWatermarkerToGraph'; +import { + CANVAS_OUTPUT, + METADATA_ACCUMULATOR, + NEGATIVE_CONDITIONING, + NOISE, + ONNX_MODEL_LOADER, + POSITIVE_CONDITIONING, + SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH, + SDXL_DENOISE_LATENTS, + SDXL_MODEL_LOADER, +} from './constants'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; + +/** + * Builds the Canvas tab's Text to Image graph. + */ +export const buildCanvasSDXLTextToImageGraph = ( + state: RootState +): NonNullableGraph => { + const log = logger('nodes'); + const { + positivePrompt, + negativePrompt, + model, + cfgScale: cfg_scale, + scheduler, + steps, + vaePrecision, + clipSkip, + shouldUseCpuNoise, + shouldUseNoiseSettings, + } = state.generation; + + // The bounding box determines width and height, not the width and height params + const { width, height } = state.canvas.boundingBoxDimensions; + + const { shouldAutoSave } = state.canvas; + + const { shouldUseSDXLRefiner, refinerStart, shouldConcatSDXLStylePrompt } = + state.sdxl; + + if (!model) { + log.error('No model found in state'); + throw new Error('No model found in state'); + } + + const use_cpu = shouldUseNoiseSettings + ? shouldUseCpuNoise + : initialGenerationState.shouldUseCpuNoise; + + const isUsingOnnxModel = model.model_type === 'onnx'; + + const modelLoaderNodeId = isUsingOnnxModel + ? ONNX_MODEL_LOADER + : SDXL_MODEL_LOADER; + + const modelLoaderNodeType = isUsingOnnxModel + ? 'onnx_model_loader' + : 'sdxl_model_loader'; + + const t2lNode: DenoiseLatentsInvocation | ONNXTextToLatentsInvocation = + isUsingOnnxModel + ? { + type: 't2l_onnx', + id: SDXL_DENOISE_LATENTS, + is_intermediate: true, + cfg_scale, + scheduler, + steps, + } + : { + type: 'denoise_latents', + id: SDXL_DENOISE_LATENTS, + is_intermediate: true, + cfg_scale, + scheduler, + steps, + denoising_start: 0, + denoising_end: shouldUseSDXLRefiner ? refinerStart : 1, + }; + + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, shouldConcatSDXLStylePrompt); + + /** + * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the + * full graph here as a template. Then use the parameters from app state and set friendlier node + * ids. + * + * The only thing we need extra logic for is handling randomized seed, control net, and for img2img, + * the `fit` param. These are added to the graph at the end. + */ + + // copy-pasted graph from node editor, filled in with state values & friendly node ids + // TODO: Actually create the graph correctly for ONNX + const graph: NonNullableGraph = { + id: SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH, + nodes: { + [modelLoaderNodeId]: { + type: modelLoaderNodeType, + id: modelLoaderNodeId, + is_intermediate: true, + model, + }, + [POSITIVE_CONDITIONING]: { + type: isUsingOnnxModel ? 'prompt_onnx' : 'sdxl_compel_prompt', + id: POSITIVE_CONDITIONING, + is_intermediate: true, + prompt: positivePrompt, + style: craftedPositiveStylePrompt, + }, + [NEGATIVE_CONDITIONING]: { + type: isUsingOnnxModel ? 'prompt_onnx' : 'sdxl_compel_prompt', + id: NEGATIVE_CONDITIONING, + is_intermediate: true, + prompt: negativePrompt, + style: craftedNegativeStylePrompt, + }, + [NOISE]: { + type: 'noise', + id: NOISE, + is_intermediate: true, + width, + height, + use_cpu, + }, + [t2lNode.id]: t2lNode, + [CANVAS_OUTPUT]: { + type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i', + id: CANVAS_OUTPUT, + is_intermediate: !shouldAutoSave, + fp32: vaePrecision === 'fp32' ? true : false, + }, + }, + edges: [ + // Connect Model Loader to UNet and CLIP + { + source: { + node_id: modelLoaderNodeId, + field: 'unet', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'unet', + }, + }, + { + source: { + node_id: modelLoaderNodeId, + field: 'clip', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: modelLoaderNodeId, + field: 'clip2', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip2', + }, + }, + { + source: { + node_id: modelLoaderNodeId, + field: 'clip', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: modelLoaderNodeId, + field: 'clip2', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip2', + }, + }, + // Connect everything to Denoise Latents + { + source: { + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'positive_conditioning', + }, + }, + { + source: { + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'negative_conditioning', + }, + }, + { + source: { + node_id: NOISE, + field: 'noise', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'noise', + }, + }, + // Decode Denoised Latents To Image + { + source: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'latents', + }, + }, + ], + }; + + // add metadata accumulator, which is only mostly populated - some fields are added later + graph.nodes[METADATA_ACCUMULATOR] = { + id: METADATA_ACCUMULATOR, + type: 'metadata_accumulator', + generation_mode: 'txt2img', + cfg_scale, + height, + width, + positive_prompt: '', // set in addDynamicPromptsToGraph + negative_prompt: negativePrompt, + model, + seed: 0, // set in addDynamicPromptsToGraph + steps, + rand_device: use_cpu ? 'cpu' : 'cuda', + scheduler, + vae: undefined, // option; set in addVAEToGraph + controlnets: [], // populated in addControlNetToLinearGraph + loras: [], // populated in addLoRAsToGraph + clip_skip: clipSkip, + }; + + graph.edges.push({ + source: { + node_id: METADATA_ACCUMULATOR, + field: 'metadata', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'metadata', + }, + }); + + // Add Refiner if enabled + if (shouldUseSDXLRefiner) { + addSDXLRefinerToGraph(state, graph, SDXL_DENOISE_LATENTS); + } + + // add LoRA support + addSDXLLoRAsToGraph(state, graph, SDXL_DENOISE_LATENTS, modelLoaderNodeId); + + // optionally add custom VAE + addVAEToGraph(state, graph, modelLoaderNodeId); + + // add dynamic prompts - also sets up core iteration and seed + addDynamicPromptsToGraph(state, graph); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, SDXL_DENOISE_LATENTS); + + // NSFW & watermark - must be last thing added to graph + if (state.system.shouldUseNSFWChecker) { + // must add before watermarker! + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); + } + + if (state.system.shouldUseWatermarker) { + // must add after nsfw checker! + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); + } + + return graph; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasTextToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasTextToImageGraph.ts index 5b636b482a..4548a7e099 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasTextToImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasTextToImageGraph.ts @@ -2,6 +2,10 @@ import { logger } from 'app/logging/logger'; import { RootState } from 'app/store/store'; import { NonNullableGraph } from 'features/nodes/types/types'; import { initialGenerationState } from 'features/parameters/store/generationSlice'; +import { + DenoiseLatentsInvocation, + ONNXTextToLatentsInvocation, +} from 'services/api/types'; import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph'; import { addLoRAsToGraph } from './addLoRAsToGraph'; @@ -9,21 +13,17 @@ import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { + CANVAS_OUTPUT, + CANVAS_TEXT_TO_IMAGE_GRAPH, CLIP_SKIP, - LATENTS_TO_IMAGE, + DENOISE_LATENTS, MAIN_MODEL_LOADER, - ONNX_MODEL_LOADER, METADATA_ACCUMULATOR, NEGATIVE_CONDITIONING, NOISE, + ONNX_MODEL_LOADER, POSITIVE_CONDITIONING, - TEXT_TO_IMAGE_GRAPH, - TEXT_TO_LATENTS, } from './constants'; -import { - ONNXTextToLatentsInvocation, - TextToLatentsInvocation, -} from 'services/api/types'; /** * Builds the Canvas tab's Text to Image graph. @@ -57,31 +57,38 @@ export const buildCanvasTextToImageGraph = ( const use_cpu = shouldUseNoiseSettings ? shouldUseCpuNoise : initialGenerationState.shouldUseCpuNoise; + const isUsingOnnxModel = model.model_type === 'onnx'; + const modelLoaderNodeId = isUsingOnnxModel ? ONNX_MODEL_LOADER : MAIN_MODEL_LOADER; + const modelLoaderNodeType = isUsingOnnxModel ? 'onnx_model_loader' : 'main_model_loader'; - const t2lNode: TextToLatentsInvocation | ONNXTextToLatentsInvocation = + + const t2lNode: DenoiseLatentsInvocation | ONNXTextToLatentsInvocation = isUsingOnnxModel ? { type: 't2l_onnx', - id: TEXT_TO_LATENTS, + id: DENOISE_LATENTS, is_intermediate: true, cfg_scale, scheduler, steps, } : { - type: 't2l', - id: TEXT_TO_LATENTS, + type: 'denoise_latents', + id: DENOISE_LATENTS, is_intermediate: true, cfg_scale, scheduler, steps, + denoising_start: 0, + denoising_end: 1, }; + /** * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the * full graph here as a template. Then use the parameters from app state and set friendlier node @@ -94,8 +101,20 @@ export const buildCanvasTextToImageGraph = ( // copy-pasted graph from node editor, filled in with state values & friendly node ids // TODO: Actually create the graph correctly for ONNX const graph: NonNullableGraph = { - id: TEXT_TO_IMAGE_GRAPH, + id: CANVAS_TEXT_TO_IMAGE_GRAPH, nodes: { + [modelLoaderNodeId]: { + type: modelLoaderNodeType, + id: modelLoaderNodeId, + is_intermediate: true, + model, + }, + [CLIP_SKIP]: { + type: 'clip_skip', + id: CLIP_SKIP, + is_intermediate: true, + skipped_layers: clipSkip, + }, [POSITIVE_CONDITIONING]: { type: isUsingOnnxModel ? 'prompt_onnx' : 'compel', id: POSITIVE_CONDITIONING, @@ -117,93 +136,74 @@ export const buildCanvasTextToImageGraph = ( use_cpu, }, [t2lNode.id]: t2lNode, - [modelLoaderNodeId]: { - type: modelLoaderNodeType, - id: modelLoaderNodeId, - is_intermediate: true, - model, - }, - [CLIP_SKIP]: { - type: 'clip_skip', - id: CLIP_SKIP, - is_intermediate: true, - skipped_layers: clipSkip, - }, - [LATENTS_TO_IMAGE]: { + [CANVAS_OUTPUT]: { type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i', - id: LATENTS_TO_IMAGE, + id: CANVAS_OUTPUT, is_intermediate: !shouldAutoSave, }, }, edges: [ + // Connect Model Loader to UNet & CLIP Skip { source: { - node_id: NEGATIVE_CONDITIONING, - field: 'conditioning', + node_id: modelLoaderNodeId, + field: 'unet', }, destination: { - node_id: TEXT_TO_LATENTS, - field: 'negative_conditioning', + node_id: DENOISE_LATENTS, + field: 'unet', }, }, + { + source: { + node_id: modelLoaderNodeId, + field: 'clip', + }, + destination: { + node_id: CLIP_SKIP, + field: 'clip', + }, + }, + // Connect CLIP Skip to Conditioning + { + source: { + node_id: CLIP_SKIP, + field: 'clip', + }, + destination: { + node_id: POSITIVE_CONDITIONING, + field: 'clip', + }, + }, + { + source: { + node_id: CLIP_SKIP, + field: 'clip', + }, + destination: { + node_id: NEGATIVE_CONDITIONING, + field: 'clip', + }, + }, + // Connect everything to Denoise Latents { source: { node_id: POSITIVE_CONDITIONING, field: 'conditioning', }, destination: { - node_id: TEXT_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'positive_conditioning', }, }, { source: { - node_id: modelLoaderNodeId, - field: 'clip', - }, - destination: { - node_id: CLIP_SKIP, - field: 'clip', - }, - }, - { - source: { - node_id: CLIP_SKIP, - field: 'clip', - }, - destination: { - node_id: POSITIVE_CONDITIONING, - field: 'clip', - }, - }, - { - source: { - node_id: CLIP_SKIP, - field: 'clip', - }, - destination: { node_id: NEGATIVE_CONDITIONING, - field: 'clip', - }, - }, - { - source: { - node_id: modelLoaderNodeId, - field: 'unet', + field: 'conditioning', }, destination: { - node_id: TEXT_TO_LATENTS, - field: 'unet', - }, - }, - { - source: { - node_id: TEXT_TO_LATENTS, - field: 'latents', - }, - destination: { - node_id: LATENTS_TO_IMAGE, - field: 'latents', + node_id: DENOISE_LATENTS, + field: 'negative_conditioning', }, }, { @@ -212,10 +212,21 @@ export const buildCanvasTextToImageGraph = ( field: 'noise', }, destination: { - node_id: TEXT_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'noise', }, }, + // Decode denoised latents to image + { + source: { + node_id: DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: CANVAS_OUTPUT, + field: 'latents', + }, + }, ], }; @@ -246,32 +257,32 @@ export const buildCanvasTextToImageGraph = ( field: 'metadata', }, destination: { - node_id: LATENTS_TO_IMAGE, + node_id: CANVAS_OUTPUT, field: 'metadata', }, }); - // add LoRA support - addLoRAsToGraph(state, graph, TEXT_TO_LATENTS, modelLoaderNodeId); - // optionally add custom VAE addVAEToGraph(state, graph, modelLoaderNodeId); + // add LoRA support + addLoRAsToGraph(state, graph, DENOISE_LATENTS, modelLoaderNodeId); + // add dynamic prompts - also sets up core iteration and seed addDynamicPromptsToGraph(state, graph); // add controlnet, mutating `graph` - addControlNetToLinearGraph(state, graph, TEXT_TO_LATENTS); + addControlNetToLinearGraph(state, graph, DENOISE_LATENTS); // NSFW & watermark - must be last thing added to graph if (state.system.shouldUseNSFWChecker) { // must add before watermarker! - addNSFWCheckerToGraph(state, graph); + addNSFWCheckerToGraph(state, graph, CANVAS_OUTPUT); } if (state.system.shouldUseWatermarker) { // must add after nsfw checker! - addWatermarkerToGraph(state, graph); + addWatermarkerToGraph(state, graph, CANVAS_OUTPUT); } return graph; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearImageToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearImageToImageGraph.ts index f264edc6be..982a09357f 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearImageToImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearImageToImageGraph.ts @@ -14,10 +14,10 @@ import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { CLIP_SKIP, + DENOISE_LATENTS, IMAGE_TO_IMAGE_GRAPH, IMAGE_TO_LATENTS, LATENTS_TO_IMAGE, - LATENTS_TO_LATENTS, MAIN_MODEL_LOADER, METADATA_ACCUMULATOR, NEGATIVE_CONDITIONING, @@ -118,13 +118,14 @@ export const buildLinearImageToImageGraph = ( id: LATENTS_TO_IMAGE, fp32: vaePrecision === 'fp32' ? true : false, }, - [LATENTS_TO_LATENTS]: { - type: 'l2l', - id: LATENTS_TO_LATENTS, + [DENOISE_LATENTS]: { + type: 'denoise_latents', + id: DENOISE_LATENTS, cfg_scale, scheduler, steps, - strength, + denoising_start: 1 - strength, + denoising_end: 1, }, [IMAGE_TO_LATENTS]: { type: 'i2l', @@ -137,13 +138,14 @@ export const buildLinearImageToImageGraph = ( }, }, edges: [ + // Connect Model Loader to UNet and CLIP Skip { source: { node_id: MAIN_MODEL_LOADER, field: 'unet', }, destination: { - node_id: LATENTS_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'unet', }, }, @@ -157,6 +159,7 @@ export const buildLinearImageToImageGraph = ( field: 'clip', }, }, + // Connect CLIP Skip to Conditioning { source: { node_id: CLIP_SKIP, @@ -177,24 +180,25 @@ export const buildLinearImageToImageGraph = ( field: 'clip', }, }, + // Connect everything to Denoise Latents { source: { - node_id: LATENTS_TO_LATENTS, - field: 'latents', + node_id: POSITIVE_CONDITIONING, + field: 'conditioning', }, destination: { - node_id: LATENTS_TO_IMAGE, - field: 'latents', + node_id: DENOISE_LATENTS, + field: 'positive_conditioning', }, }, { source: { - node_id: IMAGE_TO_LATENTS, - field: 'latents', + node_id: NEGATIVE_CONDITIONING, + field: 'conditioning', }, destination: { - node_id: LATENTS_TO_LATENTS, - field: 'latents', + node_id: DENOISE_LATENTS, + field: 'negative_conditioning', }, }, { @@ -203,28 +207,29 @@ export const buildLinearImageToImageGraph = ( field: 'noise', }, destination: { - node_id: LATENTS_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'noise', }, }, { source: { - node_id: NEGATIVE_CONDITIONING, - field: 'conditioning', + node_id: IMAGE_TO_LATENTS, + field: 'latents', }, destination: { - node_id: LATENTS_TO_LATENTS, - field: 'negative_conditioning', + node_id: DENOISE_LATENTS, + field: 'latents', }, }, + // Decode denoised latents to image { source: { - node_id: POSITIVE_CONDITIONING, - field: 'conditioning', + node_id: DENOISE_LATENTS, + field: 'latents', }, destination: { - node_id: LATENTS_TO_LATENTS, - field: 'positive_conditioning', + node_id: LATENTS_TO_IMAGE, + field: 'latents', }, }, ], @@ -333,17 +338,17 @@ export const buildLinearImageToImageGraph = ( }, }); - // add LoRA support - addLoRAsToGraph(state, graph, LATENTS_TO_LATENTS); - // optionally add custom VAE - addVAEToGraph(state, graph); + addVAEToGraph(state, graph, MAIN_MODEL_LOADER); + + // add LoRA support + addLoRAsToGraph(state, graph, DENOISE_LATENTS); // add dynamic prompts - also sets up core iteration and seed addDynamicPromptsToGraph(state, graph); // add controlnet, mutating `graph` - addControlNetToLinearGraph(state, graph, LATENTS_TO_LATENTS); + addControlNetToLinearGraph(state, graph, DENOISE_LATENTS); // NSFW & watermark - must be last thing added to graph if (state.system.shouldUseNSFWChecker) { diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts index 0ec4e096d9..42ea07c923 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts @@ -6,9 +6,12 @@ import { ImageResizeInvocation, ImageToLatentsInvocation, } from 'services/api/types'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph'; import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; +import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { IMAGE_TO_LATENTS, @@ -18,11 +21,11 @@ import { NOISE, POSITIVE_CONDITIONING, RESIZE, + SDXL_DENOISE_LATENTS, SDXL_IMAGE_TO_IMAGE_GRAPH, - SDXL_LATENTS_TO_LATENTS, SDXL_MODEL_LOADER, } from './constants'; -import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; /** * Builds the Image to Image tab graph. @@ -80,6 +83,10 @@ export const buildLinearSDXLImageToImageGraph = ( ? shouldUseCpuNoise : initialGenerationState.shouldUseCpuNoise; + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, shouldConcatSDXLStylePrompt); + // copy-pasted graph from node editor, filled in with state values & friendly node ids const graph: NonNullableGraph = { id: SDXL_IMAGE_TO_IMAGE_GRAPH, @@ -93,17 +100,13 @@ export const buildLinearSDXLImageToImageGraph = ( type: 'sdxl_compel_prompt', id: POSITIVE_CONDITIONING, prompt: positivePrompt, - style: shouldConcatSDXLStylePrompt - ? `${positivePrompt} ${positiveStylePrompt}` - : positiveStylePrompt, + style: craftedPositiveStylePrompt, }, [NEGATIVE_CONDITIONING]: { type: 'sdxl_compel_prompt', id: NEGATIVE_CONDITIONING, prompt: negativePrompt, - style: shouldConcatSDXLStylePrompt - ? `${negativePrompt} ${negativeStylePrompt}` - : negativeStylePrompt, + style: craftedNegativeStylePrompt, }, [NOISE]: { type: 'noise', @@ -115,9 +118,9 @@ export const buildLinearSDXLImageToImageGraph = ( id: LATENTS_TO_IMAGE, fp32: vaePrecision === 'fp32' ? true : false, }, - [SDXL_LATENTS_TO_LATENTS]: { - type: 'l2l_sdxl', - id: SDXL_LATENTS_TO_LATENTS, + [SDXL_DENOISE_LATENTS]: { + type: 'denoise_latents', + id: SDXL_DENOISE_LATENTS, cfg_scale, scheduler, steps, @@ -137,36 +140,17 @@ export const buildLinearSDXLImageToImageGraph = ( }, }, edges: [ + // Connect Model Loader to UNet, CLIP & VAE { source: { node_id: SDXL_MODEL_LOADER, field: 'unet', }, destination: { - node_id: SDXL_LATENTS_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'unet', }, }, - { - source: { - node_id: SDXL_MODEL_LOADER, - field: 'vae', - }, - destination: { - node_id: LATENTS_TO_IMAGE, - field: 'vae', - }, - }, - { - source: { - node_id: SDXL_MODEL_LOADER, - field: 'vae', - }, - destination: { - node_id: IMAGE_TO_LATENTS, - field: 'vae', - }, - }, { source: { node_id: SDXL_MODEL_LOADER, @@ -207,43 +191,14 @@ export const buildLinearSDXLImageToImageGraph = ( field: 'clip2', }, }, - { - source: { - node_id: SDXL_LATENTS_TO_LATENTS, - field: 'latents', - }, - destination: { - node_id: LATENTS_TO_IMAGE, - field: 'latents', - }, - }, - { - source: { - node_id: IMAGE_TO_LATENTS, - field: 'latents', - }, - destination: { - node_id: SDXL_LATENTS_TO_LATENTS, - field: 'latents', - }, - }, - { - source: { - node_id: NOISE, - field: 'noise', - }, - destination: { - node_id: SDXL_LATENTS_TO_LATENTS, - field: 'noise', - }, - }, + // Connect everything to Denoise Latents { source: { node_id: POSITIVE_CONDITIONING, field: 'conditioning', }, destination: { - node_id: SDXL_LATENTS_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'positive_conditioning', }, }, @@ -253,10 +208,41 @@ export const buildLinearSDXLImageToImageGraph = ( field: 'conditioning', }, destination: { - node_id: SDXL_LATENTS_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'negative_conditioning', }, }, + { + source: { + node_id: NOISE, + field: 'noise', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'noise', + }, + }, + { + source: { + node_id: IMAGE_TO_LATENTS, + field: 'latents', + }, + destination: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + }, + // Decode Denoised Latents To Image + { + source: { + node_id: SDXL_DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'latents', + }, + }, ], }; @@ -365,13 +351,19 @@ export const buildLinearSDXLImageToImageGraph = ( }, }); - addSDXLLoRAsToGraph(state, graph, SDXL_LATENTS_TO_LATENTS, SDXL_MODEL_LOADER); + addSDXLLoRAsToGraph(state, graph, SDXL_DENOISE_LATENTS, SDXL_MODEL_LOADER); // Add Refiner if enabled if (shouldUseSDXLRefiner) { - addSDXLRefinerToGraph(state, graph, SDXL_LATENTS_TO_LATENTS); + addSDXLRefinerToGraph(state, graph, SDXL_DENOISE_LATENTS); } + // optionally add custom VAE + addVAEToGraph(state, graph, SDXL_MODEL_LOADER); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, SDXL_DENOISE_LATENTS); + // add dynamic prompts - also sets up core iteration and seed addDynamicPromptsToGraph(state, graph); diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts index 21b7c1e0ac..a74884f23b 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts @@ -2,10 +2,12 @@ import { logger } from 'app/logging/logger'; import { RootState } from 'app/store/store'; import { NonNullableGraph } from 'features/nodes/types/types'; import { initialGenerationState } from 'features/parameters/store/generationSlice'; +import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph'; import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph'; import { addSDXLLoRAsToGraph } from './addSDXLLoRAstoGraph'; import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph'; +import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { LATENTS_TO_IMAGE, @@ -13,10 +15,11 @@ import { NEGATIVE_CONDITIONING, NOISE, POSITIVE_CONDITIONING, + SDXL_DENOISE_LATENTS, SDXL_MODEL_LOADER, SDXL_TEXT_TO_IMAGE_GRAPH, - SDXL_TEXT_TO_LATENTS, } from './constants'; +import { craftSDXLStylePrompt } from './helpers/craftSDXLStylePrompt'; export const buildLinearSDXLTextToImageGraph = ( state: RootState @@ -40,8 +43,8 @@ export const buildLinearSDXLTextToImageGraph = ( const { positiveStylePrompt, negativeStylePrompt, - shouldConcatSDXLStylePrompt, shouldUseSDXLRefiner, + shouldConcatSDXLStylePrompt, refinerStart, } = state.sdxl; @@ -54,6 +57,10 @@ export const buildLinearSDXLTextToImageGraph = ( throw new Error('No model found in state'); } + // Construct Style Prompt + const { craftedPositiveStylePrompt, craftedNegativeStylePrompt } = + craftSDXLStylePrompt(state, shouldConcatSDXLStylePrompt); + /** * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the * full graph here as a template. Then use the parameters from app state and set friendlier node @@ -76,17 +83,13 @@ export const buildLinearSDXLTextToImageGraph = ( type: 'sdxl_compel_prompt', id: POSITIVE_CONDITIONING, prompt: positivePrompt, - style: shouldConcatSDXLStylePrompt - ? `${positivePrompt} ${positiveStylePrompt}` - : positiveStylePrompt, + style: craftedPositiveStylePrompt, }, [NEGATIVE_CONDITIONING]: { type: 'sdxl_compel_prompt', id: NEGATIVE_CONDITIONING, prompt: negativePrompt, - style: shouldConcatSDXLStylePrompt - ? `${negativePrompt} ${negativeStylePrompt}` - : negativeStylePrompt, + style: craftedNegativeStylePrompt, }, [NOISE]: { type: 'noise', @@ -95,12 +98,13 @@ export const buildLinearSDXLTextToImageGraph = ( height, use_cpu, }, - [SDXL_TEXT_TO_LATENTS]: { - type: 't2l_sdxl', - id: SDXL_TEXT_TO_LATENTS, + [SDXL_DENOISE_LATENTS]: { + type: 'denoise_latents', + id: SDXL_DENOISE_LATENTS, cfg_scale, scheduler, steps, + denoising_start: 0, denoising_end: shouldUseSDXLRefiner ? refinerStart : 1, }, [LATENTS_TO_IMAGE]: { @@ -110,26 +114,17 @@ export const buildLinearSDXLTextToImageGraph = ( }, }, edges: [ + // Connect Model Loader to UNet, VAE & CLIP { source: { node_id: SDXL_MODEL_LOADER, field: 'unet', }, destination: { - node_id: SDXL_TEXT_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'unet', }, }, - { - source: { - node_id: SDXL_MODEL_LOADER, - field: 'vae', - }, - destination: { - node_id: LATENTS_TO_IMAGE, - field: 'vae', - }, - }, { source: { node_id: SDXL_MODEL_LOADER, @@ -170,13 +165,14 @@ export const buildLinearSDXLTextToImageGraph = ( field: 'clip2', }, }, + // Connect everything to Denoise Latents { source: { node_id: POSITIVE_CONDITIONING, field: 'conditioning', }, destination: { - node_id: SDXL_TEXT_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'positive_conditioning', }, }, @@ -186,7 +182,7 @@ export const buildLinearSDXLTextToImageGraph = ( field: 'conditioning', }, destination: { - node_id: SDXL_TEXT_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'negative_conditioning', }, }, @@ -196,13 +192,14 @@ export const buildLinearSDXLTextToImageGraph = ( field: 'noise', }, destination: { - node_id: SDXL_TEXT_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'noise', }, }, + // Decode Denoised Latents To Image { source: { - node_id: SDXL_TEXT_TO_LATENTS, + node_id: SDXL_DENOISE_LATENTS, field: 'latents', }, destination: { @@ -247,13 +244,20 @@ export const buildLinearSDXLTextToImageGraph = ( }, }); - addSDXLLoRAsToGraph(state, graph, SDXL_TEXT_TO_LATENTS, SDXL_MODEL_LOADER); - // Add Refiner if enabled if (shouldUseSDXLRefiner) { - addSDXLRefinerToGraph(state, graph, SDXL_TEXT_TO_LATENTS); + addSDXLRefinerToGraph(state, graph, SDXL_DENOISE_LATENTS); } + // optionally add custom VAE + addVAEToGraph(state, graph, SDXL_MODEL_LOADER); + + // add LoRA support + addSDXLLoRAsToGraph(state, graph, SDXL_DENOISE_LATENTS, SDXL_MODEL_LOADER); + + // add controlnet, mutating `graph` + addControlNetToLinearGraph(state, graph, SDXL_DENOISE_LATENTS); + // add dynamic prompts - also sets up core iteration and seed addDynamicPromptsToGraph(state, graph); diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearTextToImageGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearTextToImageGraph.ts index 9dcc502d14..99a1ec7420 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearTextToImageGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearTextToImageGraph.ts @@ -2,6 +2,10 @@ import { logger } from 'app/logging/logger'; import { RootState } from 'app/store/store'; import { NonNullableGraph } from 'features/nodes/types/types'; import { initialGenerationState } from 'features/parameters/store/generationSlice'; +import { + DenoiseLatentsInvocation, + ONNXTextToLatentsInvocation, +} from 'services/api/types'; import { addControlNetToLinearGraph } from './addControlNetToLinearGraph'; import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph'; import { addLoRAsToGraph } from './addLoRAsToGraph'; @@ -10,20 +14,16 @@ import { addVAEToGraph } from './addVAEToGraph'; import { addWatermarkerToGraph } from './addWatermarkerToGraph'; import { CLIP_SKIP, + DENOISE_LATENTS, LATENTS_TO_IMAGE, MAIN_MODEL_LOADER, - ONNX_MODEL_LOADER, METADATA_ACCUMULATOR, NEGATIVE_CONDITIONING, NOISE, + ONNX_MODEL_LOADER, POSITIVE_CONDITIONING, TEXT_TO_IMAGE_GRAPH, - TEXT_TO_LATENTS, } from './constants'; -import { - ONNXTextToLatentsInvocation, - TextToLatentsInvocation, -} from 'services/api/types'; export const buildLinearTextToImageGraph = ( state: RootState @@ -54,30 +54,36 @@ export const buildLinearTextToImageGraph = ( } const isUsingOnnxModel = model.model_type === 'onnx'; + const modelLoaderNodeId = isUsingOnnxModel ? ONNX_MODEL_LOADER : MAIN_MODEL_LOADER; + const modelLoaderNodeType = isUsingOnnxModel ? 'onnx_model_loader' : 'main_model_loader'; - const t2lNode: TextToLatentsInvocation | ONNXTextToLatentsInvocation = + + const t2lNode: DenoiseLatentsInvocation | ONNXTextToLatentsInvocation = isUsingOnnxModel ? { type: 't2l_onnx', - id: TEXT_TO_LATENTS, + id: DENOISE_LATENTS, is_intermediate: true, cfg_scale, scheduler, steps, } : { - type: 't2l', - id: TEXT_TO_LATENTS, + type: 'denoise_latents', + id: DENOISE_LATENTS, is_intermediate: true, cfg_scale, scheduler, steps, + denoising_start: 0, + denoising_end: 1, }; + /** * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the * full graph here as a template. Then use the parameters from app state and set friendlier node @@ -93,6 +99,18 @@ export const buildLinearTextToImageGraph = ( const graph: NonNullableGraph = { id: TEXT_TO_IMAGE_GRAPH, nodes: { + [modelLoaderNodeId]: { + type: modelLoaderNodeType, + id: modelLoaderNodeId, + is_intermediate: true, + model, + }, + [CLIP_SKIP]: { + type: 'clip_skip', + id: CLIP_SKIP, + skipped_layers: clipSkip, + is_intermediate: true, + }, [POSITIVE_CONDITIONING]: { type: isUsingOnnxModel ? 'prompt_onnx' : 'compel', id: POSITIVE_CONDITIONING, @@ -114,18 +132,6 @@ export const buildLinearTextToImageGraph = ( is_intermediate: true, }, [t2lNode.id]: t2lNode, - [modelLoaderNodeId]: { - type: modelLoaderNodeType, - id: modelLoaderNodeId, - is_intermediate: true, - model, - }, - [CLIP_SKIP]: { - type: 'clip_skip', - id: CLIP_SKIP, - skipped_layers: clipSkip, - is_intermediate: true, - }, [LATENTS_TO_IMAGE]: { type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i', id: LATENTS_TO_IMAGE, @@ -133,6 +139,17 @@ export const buildLinearTextToImageGraph = ( }, }, edges: [ + // Connect Model Loader to UNet and CLIP Skip + { + source: { + node_id: modelLoaderNodeId, + field: 'unet', + }, + destination: { + node_id: DENOISE_LATENTS, + field: 'unet', + }, + }, { source: { node_id: modelLoaderNodeId, @@ -143,16 +160,7 @@ export const buildLinearTextToImageGraph = ( field: 'clip', }, }, - { - source: { - node_id: modelLoaderNodeId, - field: 'unet', - }, - destination: { - node_id: TEXT_TO_LATENTS, - field: 'unet', - }, - }, + // Connect CLIP Skip to Conditioning { source: { node_id: CLIP_SKIP, @@ -173,13 +181,14 @@ export const buildLinearTextToImageGraph = ( field: 'clip', }, }, + // Connect everything to Denoise Latents { source: { node_id: POSITIVE_CONDITIONING, field: 'conditioning', }, destination: { - node_id: TEXT_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'positive_conditioning', }, }, @@ -189,30 +198,31 @@ export const buildLinearTextToImageGraph = ( field: 'conditioning', }, destination: { - node_id: TEXT_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'negative_conditioning', }, }, - { - source: { - node_id: TEXT_TO_LATENTS, - field: 'latents', - }, - destination: { - node_id: LATENTS_TO_IMAGE, - field: 'latents', - }, - }, { source: { node_id: NOISE, field: 'noise', }, destination: { - node_id: TEXT_TO_LATENTS, + node_id: DENOISE_LATENTS, field: 'noise', }, }, + // Decode Denoised Latents To Image + { + source: { + node_id: DENOISE_LATENTS, + field: 'latents', + }, + destination: { + node_id: LATENTS_TO_IMAGE, + field: 'latents', + }, + }, ], }; @@ -248,17 +258,17 @@ export const buildLinearTextToImageGraph = ( }, }); - // add LoRA support - addLoRAsToGraph(state, graph, TEXT_TO_LATENTS, modelLoaderNodeId); - // optionally add custom VAE addVAEToGraph(state, graph, modelLoaderNodeId); + // add LoRA support + addLoRAsToGraph(state, graph, DENOISE_LATENTS, modelLoaderNodeId); + // add dynamic prompts - also sets up core iteration and seed addDynamicPromptsToGraph(state, graph); // add controlnet, mutating `graph` - addControlNetToLinearGraph(state, graph, TEXT_TO_LATENTS); + addControlNetToLinearGraph(state, graph, DENOISE_LATENTS); // NSFW & watermark - must be last thing added to graph if (state.system.shouldUseNSFWChecker) { diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts index 7fa87c7f20..3e213120b3 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts @@ -1,7 +1,7 @@ // friendly node ids export const POSITIVE_CONDITIONING = 'positive_conditioning'; export const NEGATIVE_CONDITIONING = 'negative_conditioning'; -export const TEXT_TO_LATENTS = 'text_to_latents'; +export const DENOISE_LATENTS = 'denoise_latents'; export const LATENTS_TO_IMAGE = 'latents_to_image'; export const NSFW_CHECKER = 'nsfw_checker'; export const WATERMARKER = 'invisible_watermark'; @@ -17,7 +17,24 @@ export const CLIP_SKIP = 'clip_skip'; export const IMAGE_TO_LATENTS = 'image_to_latents'; export const LATENTS_TO_LATENTS = 'latents_to_latents'; export const RESIZE = 'resize_image'; +export const CANVAS_OUTPUT = 'canvas_output'; export const INPAINT = 'inpaint'; +export const INPAINT_SEAM_FIX = 'inpaint_seam_fix'; +export const INPAINT_IMAGE = 'inpaint_image'; +export const SCALED_INPAINT_IMAGE = 'scaled_inpaint_image'; +export const INPAINT_IMAGE_RESIZE_UP = 'inpaint_image_resize_up'; +export const INPAINT_IMAGE_RESIZE_DOWN = 'inpaint_image_resize_down'; +export const INPAINT_INFILL = 'inpaint_infill'; +export const INPAINT_INFILL_RESIZE_DOWN = 'inpaint_infill_resize_down'; +export const INPAINT_FINAL_IMAGE = 'inpaint_final_image'; +export const MASK_FROM_ALPHA = 'tomask'; +export const MASK_EDGE = 'mask_edge'; +export const MASK_BLUR = 'mask_blur'; +export const MASK_COMBINE = 'mask_combine'; +export const MASK_RESIZE_UP = 'mask_resize_up'; +export const MASK_RESIZE_DOWN = 'mask_resize_down'; +export const COLOR_CORRECT = 'color_correct'; +export const PASTE_IMAGE = 'img_paste'; export const CONTROL_NET_COLLECT = 'control_net_collect'; export const DYNAMIC_PROMPT = 'dynamic_prompt'; export const IMAGE_COLLECTION = 'image_collection'; @@ -27,18 +44,26 @@ export const REALESRGAN = 'esrgan'; export const DIVIDE = 'divide'; export const SCALE = 'scale_image'; export const SDXL_MODEL_LOADER = 'sdxl_model_loader'; -export const SDXL_TEXT_TO_LATENTS = 't2l_sdxl'; -export const SDXL_LATENTS_TO_LATENTS = 'l2l_sdxl'; +export const SDXL_DENOISE_LATENTS = 'sdxl_denoise_latents'; export const SDXL_REFINER_MODEL_LOADER = 'sdxl_refiner_model_loader'; export const SDXL_REFINER_POSITIVE_CONDITIONING = 'sdxl_refiner_positive_conditioning'; export const SDXL_REFINER_NEGATIVE_CONDITIONING = 'sdxl_refiner_negative_conditioning'; -export const SDXL_REFINER_LATENTS_TO_LATENTS = 'l2l_sdxl_refiner'; +export const SDXL_REFINER_DENOISE_LATENTS = 'sdxl_refiner_denoise_latents'; // friendly graph ids export const TEXT_TO_IMAGE_GRAPH = 'text_to_image_graph'; +export const IMAGE_TO_IMAGE_GRAPH = 'image_to_image_graph'; +export const CANVAS_TEXT_TO_IMAGE_GRAPH = 'canvas_text_to_image_graph'; +export const CANVAS_IMAGE_TO_IMAGE_GRAPH = 'canvas_image_to_image_graph'; +export const CANVAS_INPAINT_GRAPH = 'canvas_inpaint_graph'; +export const CANVAS_OUTPAINT_GRAPH = 'canvas_outpaint_graph'; export const SDXL_TEXT_TO_IMAGE_GRAPH = 'sdxl_text_to_image_graph'; export const SDXL_IMAGE_TO_IMAGE_GRAPH = 'sxdl_image_to_image_graph'; -export const IMAGE_TO_IMAGE_GRAPH = 'image_to_image_graph'; -export const INPAINT_GRAPH = 'inpaint_graph'; +export const SDXL_CANVAS_TEXT_TO_IMAGE_GRAPH = + 'sdxl_canvas_text_to_image_graph'; +export const SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH = + 'sdxl_canvas_image_to_image_graph'; +export const SDXL_CANVAS_INPAINT_GRAPH = 'sdxl_canvas_inpaint_graph'; +export const SDXL_CANVAS_OUTPAINT_GRAPH = 'sdxl_canvas_outpaint_graph'; diff --git a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/helpers/craftSDXLStylePrompt.ts b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/helpers/craftSDXLStylePrompt.ts new file mode 100644 index 0000000000..f46d5cc5dc --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/helpers/craftSDXLStylePrompt.ts @@ -0,0 +1,28 @@ +import { RootState } from 'app/store/store'; + +export const craftSDXLStylePrompt = ( + state: RootState, + shouldConcatSDXLStylePrompt: boolean +) => { + const { positivePrompt, negativePrompt } = state.generation; + const { positiveStylePrompt, negativeStylePrompt } = state.sdxl; + + let craftedPositiveStylePrompt = positiveStylePrompt; + let craftedNegativeStylePrompt = negativeStylePrompt; + + if (shouldConcatSDXLStylePrompt) { + if (positiveStylePrompt.length > 0) { + craftedPositiveStylePrompt = `${positivePrompt} ${positiveStylePrompt}`; + } else { + craftedPositiveStylePrompt = positivePrompt; + } + + if (negativeStylePrompt.length > 0) { + craftedNegativeStylePrompt = `${negativePrompt} ${negativeStylePrompt}`; + } else { + craftedNegativeStylePrompt = negativePrompt; + } + } + + return { craftedPositiveStylePrompt, craftedNegativeStylePrompt }; +}; diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskAdjustmentCollapse.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskAdjustmentCollapse.tsx new file mode 100644 index 0000000000..9ca6503d3d --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskAdjustmentCollapse.tsx @@ -0,0 +1,21 @@ +import { Flex } from '@chakra-ui/react'; +import IAICollapse from 'common/components/IAICollapse'; +import { memo } from 'react'; +import { useTranslation } from 'react-i18next'; +import ParamMaskBlur from './ParamMaskBlur'; +import ParamMaskBlurMethod from './ParamMaskBlurMethod'; + +const ParamMaskAdjustmentCollapse = () => { + const { t } = useTranslation(); + + return ( + + + + + + + ); +}; + +export default memo(ParamMaskAdjustmentCollapse); diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamBlur.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskBlur.tsx similarity index 62% rename from invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamBlur.tsx rename to invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskBlur.tsx index 5c20ba7a13..82b82228e2 100644 --- a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamBlur.tsx +++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskBlur.tsx @@ -1,31 +1,31 @@ import type { RootState } from 'app/store/store'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; import IAISlider from 'common/components/IAISlider'; -import { setSeamBlur } from 'features/parameters/store/generationSlice'; +import { setMaskBlur } from 'features/parameters/store/generationSlice'; import { useTranslation } from 'react-i18next'; -export default function ParamSeamBlur() { +export default function ParamMaskBlur() { const dispatch = useAppDispatch(); - const seamBlur = useAppSelector( - (state: RootState) => state.generation.seamBlur + const maskBlur = useAppSelector( + (state: RootState) => state.generation.maskBlur ); const { t } = useTranslation(); return ( { - dispatch(setSeamBlur(v)); + dispatch(setMaskBlur(v)); }} withInput withSliderMarks withReset handleReset={() => { - dispatch(setSeamBlur(16)); + dispatch(setMaskBlur(16)); }} /> ); diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskBlurMethod.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskBlurMethod.tsx new file mode 100644 index 0000000000..fa20dcdbcc --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskBlurMethod.tsx @@ -0,0 +1,36 @@ +import { SelectItem } from '@mantine/core'; +import { RootState } from 'app/store/store'; + +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import IAIMantineSelect from 'common/components/IAIMantineSelect'; +import { setMaskBlurMethod } from 'features/parameters/store/generationSlice'; +import { useTranslation } from 'react-i18next'; + +type MaskBlurMethods = 'box' | 'gaussian'; + +const maskBlurMethods: SelectItem[] = [ + { label: 'Box Blur', value: 'box' }, + { label: 'Gaussian Blur', value: 'gaussian' }, +]; + +export default function ParamMaskBlurMethod() { + const maskBlurMethod = useAppSelector( + (state: RootState) => state.generation.maskBlurMethod + ); + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + + const handleMaskBlurMethodChange = (v: string | null) => { + if (!v) return; + dispatch(setMaskBlurMethod(v as MaskBlurMethods)); + }; + + return ( + + ); +} diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamCorrectionCollapse.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamCorrectionCollapse.tsx deleted file mode 100644 index 88d839fa15..0000000000 --- a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamCorrectionCollapse.tsx +++ /dev/null @@ -1,22 +0,0 @@ -import IAICollapse from 'common/components/IAICollapse'; -import { memo } from 'react'; -import { useTranslation } from 'react-i18next'; -import ParamSeamBlur from './ParamSeamBlur'; -import ParamSeamSize from './ParamSeamSize'; -import ParamSeamSteps from './ParamSeamSteps'; -import ParamSeamStrength from './ParamSeamStrength'; - -const ParamSeamCorrectionCollapse = () => { - const { t } = useTranslation(); - - return ( - - - - - - - ); -}; - -export default memo(ParamSeamCorrectionCollapse); diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamSize.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamSize.tsx deleted file mode 100644 index 8e56cded7b..0000000000 --- a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamSize.tsx +++ /dev/null @@ -1,31 +0,0 @@ -import type { RootState } from 'app/store/store'; -import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; -import IAISlider from 'common/components/IAISlider'; -import { setSeamSize } from 'features/parameters/store/generationSlice'; -import { useTranslation } from 'react-i18next'; - -export default function ParamSeamSize() { - const dispatch = useAppDispatch(); - const { t } = useTranslation(); - - const seamSize = useAppSelector( - (state: RootState) => state.generation.seamSize - ); - - return ( - { - dispatch(setSeamSize(v)); - }} - withInput - withSliderMarks - withReset - handleReset={() => dispatch(setSeamSize(96))} - /> - ); -} diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamSteps.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamSteps.tsx deleted file mode 100644 index 8ca5226621..0000000000 --- a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamSteps.tsx +++ /dev/null @@ -1,32 +0,0 @@ -import type { RootState } from 'app/store/store'; -import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; -import IAISlider from 'common/components/IAISlider'; -import { setSeamSteps } from 'features/parameters/store/generationSlice'; -import { useTranslation } from 'react-i18next'; - -export default function ParamSeamSteps() { - const { t } = useTranslation(); - const seamSteps = useAppSelector( - (state: RootState) => state.generation.seamSteps - ); - const dispatch = useAppDispatch(); - - return ( - { - dispatch(setSeamSteps(v)); - }} - withInput - withSliderMarks - withReset - handleReset={() => { - dispatch(setSeamSteps(30)); - }} - /> - ); -} diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamStrength.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamStrength.tsx deleted file mode 100644 index de74156cd3..0000000000 --- a/invokeai/frontend/web/src/features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamStrength.tsx +++ /dev/null @@ -1,32 +0,0 @@ -import { RootState } from 'app/store/store'; -import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; -import IAISlider from 'common/components/IAISlider'; -import { setSeamStrength } from 'features/parameters/store/generationSlice'; -import { useTranslation } from 'react-i18next'; - -export default function ParamSeamStrength() { - const dispatch = useAppDispatch(); - const { t } = useTranslation(); - const seamStrength = useAppSelector( - (state: RootState) => state.generation.seamStrength - ); - - return ( - { - dispatch(setSeamStrength(v)); - }} - withInput - withSliderMarks - withReset - handleReset={() => { - dispatch(setSeamStrength(0.7)); - }} - /> - ); -} diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/MainModel/ParamMainModelSelect.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/MainModel/ParamMainModelSelect.tsx index 0a18d4f556..05b5b6468a 100644 --- a/invokeai/frontend/web/src/features/parameters/components/Parameters/MainModel/ParamMainModelSelect.tsx +++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/MainModel/ParamMainModelSelect.tsx @@ -15,11 +15,11 @@ import { modelIdToMainModelParam } from 'features/parameters/util/modelIdToMainM import SyncModelsButton from 'features/ui/components/tabs/ModelManager/subpanels/ModelManagerSettingsPanel/SyncModelsButton'; import { activeTabNameSelector } from 'features/ui/store/uiSelectors'; import { forEach } from 'lodash-es'; +import { NON_REFINER_BASE_MODELS } from 'services/api/constants'; import { useGetMainModelsQuery, useGetOnnxModelsQuery, } from 'services/api/endpoints/models'; -import { NON_REFINER_BASE_MODELS } from 'services/api/constants'; import { useFeatureStatus } from '../../../../system/hooks/useFeatureStatus'; const selector = createSelector( @@ -52,10 +52,7 @@ const ParamMainModelSelect = () => { const data: SelectItem[] = []; forEach(mainModels.entities, (model, id) => { - if ( - !model || - (activeTabName === 'unifiedCanvas' && model.base_model === 'sdxl') - ) { + if (!model) { return; } diff --git a/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts b/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts index 907107e95e..95caf9a9e1 100644 --- a/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts +++ b/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts @@ -4,16 +4,16 @@ import { refinerModelChanged, setNegativeStylePromptSDXL, setPositiveStylePromptSDXL, - setRefinerAestheticScore, setRefinerCFGScale, + setRefinerNegativeAestheticScore, + setRefinerPositiveAestheticScore, setRefinerScheduler, setRefinerStart, setRefinerSteps, } from 'features/sdxl/store/sdxlSlice'; import { useCallback } from 'react'; import { useTranslation } from 'react-i18next'; -import { UnsafeImageMetadata } from 'services/api/types'; -import { ImageDTO } from 'services/api/types'; +import { ImageDTO, UnsafeImageMetadata } from 'services/api/types'; import { initialImageSelected, modelSelected } from '../store/actions'; import { setCfgScale, @@ -34,8 +34,9 @@ import { isValidPositivePrompt, isValidSDXLNegativeStylePrompt, isValidSDXLPositiveStylePrompt, - isValidSDXLRefinerAestheticScore, isValidSDXLRefinerModel, + isValidSDXLRefinerNegativeAestheticScore, + isValidSDXLRefinerPositiveAestheticScore, isValidSDXLRefinerStart, isValidScheduler, isValidSeed, @@ -339,7 +340,8 @@ export const useRecallParameters = () => { refiner_cfg_scale, refiner_steps, refiner_scheduler, - refiner_aesthetic_store, + refiner_positive_aesthetic_store, + refiner_negative_aesthetic_store, refiner_start, } = metadata; @@ -398,8 +400,24 @@ export const useRecallParameters = () => { dispatch(setRefinerScheduler(refiner_scheduler)); } - if (isValidSDXLRefinerAestheticScore(refiner_aesthetic_store)) { - dispatch(setRefinerAestheticScore(refiner_aesthetic_store)); + if ( + isValidSDXLRefinerPositiveAestheticScore( + refiner_positive_aesthetic_store + ) + ) { + dispatch( + setRefinerPositiveAestheticScore(refiner_positive_aesthetic_store) + ); + } + + if ( + isValidSDXLRefinerNegativeAestheticScore( + refiner_negative_aesthetic_store + ) + ) { + dispatch( + setRefinerNegativeAestheticScore(refiner_negative_aesthetic_store) + ); } if (isValidSDXLRefinerStart(refiner_start)) { diff --git a/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts b/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts index 12f2c3eabf..0173391833 100644 --- a/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts +++ b/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts @@ -4,11 +4,13 @@ import { roundToMultiple } from 'common/util/roundDownToMultiple'; import { configChanged } from 'features/system/store/configSlice'; import { clamp } from 'lodash-es'; import { ImageDTO } from 'services/api/types'; + import { clipSkipMap } from '../types/constants'; import { CfgScaleParam, HeightParam, MainModelParam, + MaskBlurMethodParam, NegativePromptParam, OnnxModelParam, PositivePromptParam, @@ -33,10 +35,8 @@ export interface GenerationState { positivePrompt: PositivePromptParam; negativePrompt: NegativePromptParam; scheduler: SchedulerParam; - seamBlur: number; - seamSize: number; - seamSteps: number; - seamStrength: number; + maskBlur: number; + maskBlurMethod: MaskBlurMethodParam; seed: SeedParam; seedWeights: string; shouldFitToWidthHeight: boolean; @@ -72,10 +72,8 @@ export const initialGenerationState: GenerationState = { positivePrompt: '', negativePrompt: '', scheduler: 'euler', - seamBlur: 16, - seamSize: 96, - seamSteps: 30, - seamStrength: 0.7, + maskBlur: 16, + maskBlurMethod: 'box', seed: 0, seedWeights: '', shouldFitToWidthHeight: true, @@ -196,17 +194,11 @@ export const generationSlice = createSlice({ clearInitialImage: (state) => { state.initialImage = undefined; }, - setSeamSize: (state, action: PayloadAction) => { - state.seamSize = action.payload; + setMaskBlur: (state, action: PayloadAction) => { + state.maskBlur = action.payload; }, - setSeamBlur: (state, action: PayloadAction) => { - state.seamBlur = action.payload; - }, - setSeamStrength: (state, action: PayloadAction) => { - state.seamStrength = action.payload; - }, - setSeamSteps: (state, action: PayloadAction) => { - state.seamSteps = action.payload; + setMaskBlurMethod: (state, action: PayloadAction) => { + state.maskBlurMethod = action.payload; }, setTileSize: (state, action: PayloadAction) => { state.tileSize = action.payload; @@ -312,10 +304,8 @@ export const { setPositivePrompt, setNegativePrompt, setScheduler, - setSeamBlur, - setSeamSize, - setSeamSteps, - setSeamStrength, + setMaskBlur, + setMaskBlurMethod, setSeed, setSeedWeights, setShouldFitToWidthHeight, diff --git a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts index ac799ac600..25905e1e14 100644 --- a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts +++ b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts @@ -353,22 +353,40 @@ export const isValidPrecision = (val: unknown): val is PrecisionParam => zPrecision.safeParse(val).success; /** - * Zod schema for SDXL refiner aesthetic score parameter + * Zod schema for SDXL refiner positive aesthetic score parameter */ -export const zSDXLRefinerAestheticScore = z.number().min(1).max(10); +export const zSDXLRefinerPositiveAestheticScore = z.number().min(1).max(10); /** - * Type alias for SDXL refiner aesthetic score parameter, inferred from its zod schema + * Type alias for SDXL refiner aesthetic positive score parameter, inferred from its zod schema */ -export type SDXLRefinerAestheticScoreParam = z.infer< - typeof zSDXLRefinerAestheticScore +export type SDXLRefinerPositiveAestheticScoreParam = z.infer< + typeof zSDXLRefinerPositiveAestheticScore >; /** - * Validates/type-guards a value as a SDXL refiner aesthetic score parameter + * Validates/type-guards a value as a SDXL refiner positive aesthetic score parameter */ -export const isValidSDXLRefinerAestheticScore = ( +export const isValidSDXLRefinerPositiveAestheticScore = ( val: unknown -): val is SDXLRefinerAestheticScoreParam => - zSDXLRefinerAestheticScore.safeParse(val).success; +): val is SDXLRefinerPositiveAestheticScoreParam => + zSDXLRefinerPositiveAestheticScore.safeParse(val).success; + +/** + * Zod schema for SDXL refiner negative aesthetic score parameter + */ +export const zSDXLRefinerNegativeAestheticScore = z.number().min(1).max(10); +/** + * Type alias for SDXL refiner aesthetic negative score parameter, inferred from its zod schema + */ +export type SDXLRefinerNegativeAestheticScoreParam = z.infer< + typeof zSDXLRefinerNegativeAestheticScore +>; +/** + * Validates/type-guards a value as a SDXL refiner negative aesthetic score parameter + */ +export const isValidSDXLRefinerNegativeAestheticScore = ( + val: unknown +): val is SDXLRefinerNegativeAestheticScoreParam => + zSDXLRefinerNegativeAestheticScore.safeParse(val).success; /** * Zod schema for SDXL start parameter @@ -385,6 +403,21 @@ export const isValidSDXLRefinerStart = ( val: unknown ): val is SDXLRefinerStartParam => zSDXLRefinerstart.safeParse(val).success; +/** + * Zod schema for a mask blur method parameter + */ +export const zMaskBlurMethod = z.enum(['box', 'gaussian']); +/** + * Type alias for mask blur method parameter, inferred from its zod schema + */ +export type MaskBlurMethodParam = z.infer; +/** + * Validates/type-guards a value as a mask blur method parameter + */ +export const isValidMaskBlurMethod = ( + val: unknown +): val is MaskBlurMethodParam => zMaskBlurMethod.safeParse(val).success; + // /** // * Zod schema for BaseModelType // */ diff --git a/invokeai/frontend/web/src/features/sdxl/components/ParamSDXLRefinerCollapse.tsx b/invokeai/frontend/web/src/features/sdxl/components/ParamSDXLRefinerCollapse.tsx index 37e1718dc6..3b186006f1 100644 --- a/invokeai/frontend/web/src/features/sdxl/components/ParamSDXLRefinerCollapse.tsx +++ b/invokeai/frontend/web/src/features/sdxl/components/ParamSDXLRefinerCollapse.tsx @@ -4,9 +4,10 @@ import { stateSelector } from 'app/store/store'; import { useAppSelector } from 'app/store/storeHooks'; import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; import IAICollapse from 'common/components/IAICollapse'; -import ParamSDXLRefinerAestheticScore from './SDXLRefiner/ParamSDXLRefinerAestheticScore'; import ParamSDXLRefinerCFGScale from './SDXLRefiner/ParamSDXLRefinerCFGScale'; import ParamSDXLRefinerModelSelect from './SDXLRefiner/ParamSDXLRefinerModelSelect'; +import ParamSDXLRefinerNegativeAestheticScore from './SDXLRefiner/ParamSDXLRefinerNegativeAestheticScore'; +import ParamSDXLRefinerPositiveAestheticScore from './SDXLRefiner/ParamSDXLRefinerPositiveAestheticScore'; import ParamSDXLRefinerScheduler from './SDXLRefiner/ParamSDXLRefinerScheduler'; import ParamSDXLRefinerStart from './SDXLRefiner/ParamSDXLRefinerStart'; import ParamSDXLRefinerSteps from './SDXLRefiner/ParamSDXLRefinerSteps'; @@ -38,7 +39,8 @@ const ParamSDXLRefinerCollapse = () => { - + + diff --git a/invokeai/frontend/web/src/features/sdxl/components/SDXLImageToImageTabParameters.tsx b/invokeai/frontend/web/src/features/sdxl/components/SDXLImageToImageTabParameters.tsx index edc92a56c8..a6ee21ab68 100644 --- a/invokeai/frontend/web/src/features/sdxl/components/SDXLImageToImageTabParameters.tsx +++ b/invokeai/frontend/web/src/features/sdxl/components/SDXLImageToImageTabParameters.tsx @@ -1,10 +1,11 @@ import ParamDynamicPromptsCollapse from 'features/dynamicPrompts/components/ParamDynamicPromptsCollapse'; +import ParamLoraCollapse from 'features/lora/components/ParamLoraCollapse'; +import ParamControlNetCollapse from 'features/parameters/components/Parameters/ControlNet/ParamControlNetCollapse'; import ParamNoiseCollapse from 'features/parameters/components/Parameters/Noise/ParamNoiseCollapse'; import ProcessButtons from 'features/parameters/components/ProcessButtons/ProcessButtons'; import ParamSDXLPromptArea from './ParamSDXLPromptArea'; import ParamSDXLRefinerCollapse from './ParamSDXLRefinerCollapse'; import SDXLImageToImageTabCoreParameters from './SDXLImageToImageTabCoreParameters'; -import ParamLoraCollapse from 'features/lora/components/ParamLoraCollapse'; const SDXLImageToImageTabParameters = () => { return ( @@ -13,6 +14,7 @@ const SDXLImageToImageTabParameters = () => { + diff --git a/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerNegativeAestheticScore.tsx b/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerNegativeAestheticScore.tsx new file mode 100644 index 0000000000..4dad3f519a --- /dev/null +++ b/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerNegativeAestheticScore.tsx @@ -0,0 +1,60 @@ +import { createSelector } from '@reduxjs/toolkit'; +import { stateSelector } from 'app/store/store'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; +import IAISlider from 'common/components/IAISlider'; +import { setRefinerNegativeAestheticScore } from 'features/sdxl/store/sdxlSlice'; +import { memo, useCallback } from 'react'; +import { useIsRefinerAvailable } from 'services/api/hooks/useIsRefinerAvailable'; + +const selector = createSelector( + [stateSelector], + ({ sdxl, hotkeys }) => { + const { refinerNegativeAestheticScore } = sdxl; + const { shift } = hotkeys; + + return { + refinerNegativeAestheticScore, + shift, + }; + }, + defaultSelectorOptions +); + +const ParamSDXLRefinerNegativeAestheticScore = () => { + const { refinerNegativeAestheticScore, shift } = useAppSelector(selector); + + const isRefinerAvailable = useIsRefinerAvailable(); + + const dispatch = useAppDispatch(); + + const handleChange = useCallback( + (v: number) => dispatch(setRefinerNegativeAestheticScore(v)), + [dispatch] + ); + + const handleReset = useCallback( + () => dispatch(setRefinerNegativeAestheticScore(2.5)), + [dispatch] + ); + + return ( + + ); +}; + +export default memo(ParamSDXLRefinerNegativeAestheticScore); diff --git a/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerAestheticScore.tsx b/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerPositiveAestheticScore.tsx similarity index 66% rename from invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerAestheticScore.tsx rename to invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerPositiveAestheticScore.tsx index 9c9c4b2f89..47842629f6 100644 --- a/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerAestheticScore.tsx +++ b/invokeai/frontend/web/src/features/sdxl/components/SDXLRefiner/ParamSDXLRefinerPositiveAestheticScore.tsx @@ -3,50 +3,50 @@ import { stateSelector } from 'app/store/store'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; import IAISlider from 'common/components/IAISlider'; -import { setRefinerAestheticScore } from 'features/sdxl/store/sdxlSlice'; +import { setRefinerPositiveAestheticScore } from 'features/sdxl/store/sdxlSlice'; import { memo, useCallback } from 'react'; import { useIsRefinerAvailable } from 'services/api/hooks/useIsRefinerAvailable'; const selector = createSelector( [stateSelector], ({ sdxl, hotkeys }) => { - const { refinerAestheticScore } = sdxl; + const { refinerPositiveAestheticScore } = sdxl; const { shift } = hotkeys; return { - refinerAestheticScore, + refinerPositiveAestheticScore, shift, }; }, defaultSelectorOptions ); -const ParamSDXLRefinerAestheticScore = () => { - const { refinerAestheticScore, shift } = useAppSelector(selector); +const ParamSDXLRefinerPositiveAestheticScore = () => { + const { refinerPositiveAestheticScore, shift } = useAppSelector(selector); const isRefinerAvailable = useIsRefinerAvailable(); const dispatch = useAppDispatch(); const handleChange = useCallback( - (v: number) => dispatch(setRefinerAestheticScore(v)), + (v: number) => dispatch(setRefinerPositiveAestheticScore(v)), [dispatch] ); const handleReset = useCallback( - () => dispatch(setRefinerAestheticScore(6)), + () => dispatch(setRefinerPositiveAestheticScore(6)), [dispatch] ); return ( { ); }; -export default memo(ParamSDXLRefinerAestheticScore); +export default memo(ParamSDXLRefinerPositiveAestheticScore); diff --git a/invokeai/frontend/web/src/features/sdxl/components/SDXLTextToImageTabParameters.tsx b/invokeai/frontend/web/src/features/sdxl/components/SDXLTextToImageTabParameters.tsx index 325fd7d881..c562951c4d 100644 --- a/invokeai/frontend/web/src/features/sdxl/components/SDXLTextToImageTabParameters.tsx +++ b/invokeai/frontend/web/src/features/sdxl/components/SDXLTextToImageTabParameters.tsx @@ -1,10 +1,11 @@ import ParamDynamicPromptsCollapse from 'features/dynamicPrompts/components/ParamDynamicPromptsCollapse'; +import ParamLoraCollapse from 'features/lora/components/ParamLoraCollapse'; +import ParamControlNetCollapse from 'features/parameters/components/Parameters/ControlNet/ParamControlNetCollapse'; import ParamNoiseCollapse from 'features/parameters/components/Parameters/Noise/ParamNoiseCollapse'; import ProcessButtons from 'features/parameters/components/ProcessButtons/ProcessButtons'; import TextToImageTabCoreParameters from 'features/ui/components/tabs/TextToImage/TextToImageTabCoreParameters'; import ParamSDXLPromptArea from './ParamSDXLPromptArea'; import ParamSDXLRefinerCollapse from './ParamSDXLRefinerCollapse'; -import ParamLoraCollapse from 'features/lora/components/ParamLoraCollapse'; const SDXLTextToImageTabParameters = () => { return ( @@ -13,6 +14,7 @@ const SDXLTextToImageTabParameters = () => { + diff --git a/invokeai/frontend/web/src/features/sdxl/components/SDXLUnifiedCanvasTabCoreParameters.tsx b/invokeai/frontend/web/src/features/sdxl/components/SDXLUnifiedCanvasTabCoreParameters.tsx new file mode 100644 index 0000000000..7db6ccc219 --- /dev/null +++ b/invokeai/frontend/web/src/features/sdxl/components/SDXLUnifiedCanvasTabCoreParameters.tsx @@ -0,0 +1,75 @@ +import { Box, Flex } from '@chakra-ui/react'; +import { createSelector } from '@reduxjs/toolkit'; +import { stateSelector } from 'app/store/store'; +import { useAppSelector } from 'app/store/storeHooks'; +import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; +import IAICollapse from 'common/components/IAICollapse'; +import ParamBoundingBoxSize from 'features/parameters/components/Parameters/Canvas/BoundingBox/ParamBoundingBoxSize'; +import ParamCFGScale from 'features/parameters/components/Parameters/Core/ParamCFGScale'; +import ParamIterations from 'features/parameters/components/Parameters/Core/ParamIterations'; +import ParamModelandVAEandScheduler from 'features/parameters/components/Parameters/Core/ParamModelandVAEandScheduler'; +import ParamSteps from 'features/parameters/components/Parameters/Core/ParamSteps'; +import ParamSeedFull from 'features/parameters/components/Parameters/Seed/ParamSeedFull'; +import { memo } from 'react'; +import ParamSDXLImg2ImgDenoisingStrength from './ParamSDXLImg2ImgDenoisingStrength'; + +const selector = createSelector( + stateSelector, + ({ ui, generation }) => { + const { shouldUseSliders } = ui; + const { shouldRandomizeSeed } = generation; + + const activeLabel = !shouldRandomizeSeed ? 'Manual Seed' : undefined; + + return { shouldUseSliders, activeLabel }; + }, + defaultSelectorOptions +); + +const SDXLUnifiedCanvasTabCoreParameters = () => { + const { shouldUseSliders, activeLabel } = useAppSelector(selector); + + return ( + + + {shouldUseSliders ? ( + <> + + + + + + + + + + ) : ( + <> + + + + + + + + + + + + )} + + + + ); +}; + +export default memo(SDXLUnifiedCanvasTabCoreParameters); diff --git a/invokeai/frontend/web/src/features/sdxl/components/SDXLUnifiedCanvasTabParameters.tsx b/invokeai/frontend/web/src/features/sdxl/components/SDXLUnifiedCanvasTabParameters.tsx new file mode 100644 index 0000000000..c6af754ad9 --- /dev/null +++ b/invokeai/frontend/web/src/features/sdxl/components/SDXLUnifiedCanvasTabParameters.tsx @@ -0,0 +1,27 @@ +import ParamDynamicPromptsCollapse from 'features/dynamicPrompts/components/ParamDynamicPromptsCollapse'; +import ParamLoraCollapse from 'features/lora/components/ParamLoraCollapse'; +import ParamInfillAndScalingCollapse from 'features/parameters/components/Parameters/Canvas/InfillAndScaling/ParamInfillAndScalingCollapse'; +import ParamMaskAdjustmentCollapse from 'features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskAdjustmentCollapse'; +import ParamControlNetCollapse from 'features/parameters/components/Parameters/ControlNet/ParamControlNetCollapse'; +import ParamNoiseCollapse from 'features/parameters/components/Parameters/Noise/ParamNoiseCollapse'; +import ProcessButtons from 'features/parameters/components/ProcessButtons/ProcessButtons'; +import ParamSDXLPromptArea from './ParamSDXLPromptArea'; +import ParamSDXLRefinerCollapse from './ParamSDXLRefinerCollapse'; +import SDXLUnifiedCanvasTabCoreParameters from './SDXLUnifiedCanvasTabCoreParameters'; + +export default function SDXLUnifiedCanvasTabParameters() { + return ( + <> + + + + + + + + + + + + ); +} diff --git a/invokeai/frontend/web/src/features/sdxl/store/sdxlSlice.ts b/invokeai/frontend/web/src/features/sdxl/store/sdxlSlice.ts index 7ee3ea1d4f..7670790f05 100644 --- a/invokeai/frontend/web/src/features/sdxl/store/sdxlSlice.ts +++ b/invokeai/frontend/web/src/features/sdxl/store/sdxlSlice.ts @@ -16,7 +16,8 @@ type SDXLInitialState = { refinerSteps: number; refinerCFGScale: number; refinerScheduler: SchedulerParam; - refinerAestheticScore: number; + refinerPositiveAestheticScore: number; + refinerNegativeAestheticScore: number; refinerStart: number; }; @@ -30,7 +31,8 @@ const sdxlInitialState: SDXLInitialState = { refinerSteps: 20, refinerCFGScale: 7.5, refinerScheduler: 'euler', - refinerAestheticScore: 6, + refinerPositiveAestheticScore: 6, + refinerNegativeAestheticScore: 2.5, refinerStart: 0.7, }; @@ -68,8 +70,17 @@ const sdxlSlice = createSlice({ setRefinerScheduler: (state, action: PayloadAction) => { state.refinerScheduler = action.payload; }, - setRefinerAestheticScore: (state, action: PayloadAction) => { - state.refinerAestheticScore = action.payload; + setRefinerPositiveAestheticScore: ( + state, + action: PayloadAction + ) => { + state.refinerPositiveAestheticScore = action.payload; + }, + setRefinerNegativeAestheticScore: ( + state, + action: PayloadAction + ) => { + state.refinerNegativeAestheticScore = action.payload; }, setRefinerStart: (state, action: PayloadAction) => { state.refinerStart = action.payload; @@ -87,7 +98,8 @@ export const { setRefinerSteps, setRefinerCFGScale, setRefinerScheduler, - setRefinerAestheticScore, + setRefinerPositiveAestheticScore, + setRefinerNegativeAestheticScore, setRefinerStart, } = sdxlSlice.actions; diff --git a/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasParameters.tsx b/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasParameters.tsx index de7ce3b084..fcfffee48b 100644 --- a/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasParameters.tsx +++ b/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasParameters.tsx @@ -2,10 +2,10 @@ import ParamDynamicPromptsCollapse from 'features/dynamicPrompts/components/Para import ParamLoraCollapse from 'features/lora/components/ParamLoraCollapse'; import ParamAdvancedCollapse from 'features/parameters/components/Parameters/Advanced/ParamAdvancedCollapse'; import ParamInfillAndScalingCollapse from 'features/parameters/components/Parameters/Canvas/InfillAndScaling/ParamInfillAndScalingCollapse'; -import ParamSeamCorrectionCollapse from 'features/parameters/components/Parameters/Canvas/SeamCorrection/ParamSeamCorrectionCollapse'; import ParamControlNetCollapse from 'features/parameters/components/Parameters/ControlNet/ParamControlNetCollapse'; import ParamSymmetryCollapse from 'features/parameters/components/Parameters/Symmetry/ParamSymmetryCollapse'; // import ParamVariationCollapse from 'features/parameters/components/Parameters/Variations/ParamVariationCollapse'; +import ParamMaskAdjustmentCollapse from 'features/parameters/components/Parameters/Canvas/MaskAdjustment/ParamMaskAdjustmentCollapse'; import ParamPromptArea from 'features/parameters/components/Parameters/Prompt/ParamPromptArea'; import ProcessButtons from 'features/parameters/components/ProcessButtons/ProcessButtons'; import UnifiedCanvasCoreParameters from './UnifiedCanvasCoreParameters'; @@ -21,7 +21,7 @@ const UnifiedCanvasParameters = () => { {/* */} - + diff --git a/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasTab.tsx b/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasTab.tsx index 4c36c45e13..0a5b872e4b 100644 --- a/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasTab.tsx +++ b/invokeai/frontend/web/src/features/ui/components/tabs/UnifiedCanvas/UnifiedCanvasTab.tsx @@ -1,14 +1,22 @@ import { Flex } from '@chakra-ui/react'; +import { RootState } from 'app/store/store'; +import { useAppSelector } from 'app/store/storeHooks'; +import SDXLUnifiedCanvasTabParameters from 'features/sdxl/components/SDXLUnifiedCanvasTabParameters'; import { memo } from 'react'; import ParametersPinnedWrapper from '../../ParametersPinnedWrapper'; import UnifiedCanvasContent from './UnifiedCanvasContent'; import UnifiedCanvasParameters from './UnifiedCanvasParameters'; const UnifiedCanvasTab = () => { + const model = useAppSelector((state: RootState) => state.generation.model); return ( - + {model && model.base_model === 'sdxl' ? ( + + ) : ( + + )} diff --git a/invokeai/frontend/web/src/services/api/schema.d.ts b/invokeai/frontend/web/src/services/api/schema.d.ts index fc3397820e..0bfa7c334f 100644 --- a/invokeai/frontend/web/src/services/api/schema.d.ts +++ b/invokeai/frontend/web/src/services/api/schema.d.ts @@ -179,6 +179,11 @@ export type paths = { * @description Gets a full-resolution image file */ get: operations["get_image_full"]; + /** + * Get Image Full + * @description Gets a full-resolution image file + */ + head: operations["get_image_full"]; }; "/api/v1/images/i/{image_name}/thumbnail": { /** @@ -707,6 +712,51 @@ export type components = { */ collection: (unknown)[]; }; + /** + * ColorCorrectInvocation + * @description Shifts the colors of a target image to match the reference image, optionally + * using a mask to only color-correct certain regions of the target image. + */ + ColorCorrectInvocation: { + /** + * Id + * @description The id of this node. Must be unique among all nodes. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this node is an intermediate node. + * @default false + */ + is_intermediate?: boolean; + /** + * Type + * @default color_correct + * @enum {string} + */ + type?: "color_correct"; + /** + * Image + * @description The image to color-correct + */ + image?: components["schemas"]["ImageField"]; + /** + * Reference + * @description Reference image for color-correction + */ + reference?: components["schemas"]["ImageField"]; + /** + * Mask + * @description Mask to use when applying color-correction + */ + mask?: components["schemas"]["ImageField"]; + /** + * Mask Blur Radius + * @description Mask blur radius + * @default 8 + */ + mask_blur_radius?: number; + }; /** ColorField */ ColorField: { /** @@ -1037,6 +1087,12 @@ export type components = { * @description Core generation metadata for an image generated in InvokeAI. */ CoreMetadata: { + /** + * App Version + * @description The version of InvokeAI used to generate this image + * @default 3.0.2 + */ + app_version?: string; /** * Generation Mode * @description The generation mode that output this image @@ -1153,10 +1209,15 @@ export type components = { */ refiner_scheduler?: string; /** - * Refiner Aesthetic Store + * Refiner Positive Aesthetic Store * @description The aesthetic score used for the refiner */ - refiner_aesthetic_store?: number; + refiner_positive_aesthetic_store?: number; + /** + * Refiner Negative Aesthetic Store + * @description The aesthetic score used for the refiner + */ + refiner_negative_aesthetic_store?: number; /** * Refiner Start * @description The start value used for refiner denoising @@ -1219,6 +1280,93 @@ export type components = { /** Deleted Images */ deleted_images: (string)[]; }; + /** + * DenoiseLatentsInvocation + * @description Denoises noisy latents to decodable images + */ + DenoiseLatentsInvocation: { + /** + * Id + * @description The id of this node. Must be unique among all nodes. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this node is an intermediate node. + * @default false + */ + is_intermediate?: boolean; + /** + * Type + * @default denoise_latents + * @enum {string} + */ + type?: "denoise_latents"; + /** + * Positive Conditioning + * @description Positive conditioning for generation + */ + positive_conditioning?: components["schemas"]["ConditioningField"]; + /** + * Negative Conditioning + * @description Negative conditioning for generation + */ + negative_conditioning?: components["schemas"]["ConditioningField"]; + /** + * Noise + * @description The noise to use + */ + noise?: components["schemas"]["LatentsField"]; + /** + * Steps + * @description The number of steps to use to generate the image + * @default 10 + */ + steps?: number; + /** + * Cfg Scale + * @description The Classifier-Free Guidance, higher values may result in a result closer to the prompt + * @default 7.5 + */ + cfg_scale?: number | (number)[]; + /** + * Denoising Start + * @default 0 + */ + denoising_start?: number; + /** + * Denoising End + * @default 1 + */ + denoising_end?: number; + /** + * Scheduler + * @description The scheduler to use + * @default euler + * @enum {string} + */ + scheduler?: "ddim" | "ddpm" | "deis" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_a" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc"; + /** + * Unet + * @description UNet submodel + */ + unet?: components["schemas"]["UNetField"]; + /** + * Control + * @description The control to use + */ + control?: components["schemas"]["ControlField"] | (components["schemas"]["ControlField"])[]; + /** + * Latents + * @description The latents to use as a base image + */ + latents?: components["schemas"]["LatentsField"]; + /** + * Mask + * @description Mask + */ + mask?: components["schemas"]["ImageField"]; + }; /** * DivideInvocation * @description Divides two numbers @@ -1443,7 +1591,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: (components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataAccumulatorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRawPromptInvocation"] | components["schemas"]["SDXLRefinerRawPromptInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LoadImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageLuminosityAdjustmentInvocation"] | components["schemas"]["ImageSaturationAdjustmentInvocation"] | components["schemas"]["TextToLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SDXLTextToLatentsInvocation"] | components["schemas"]["SDXLLatentsToLatentsInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ONNXSD1ModelLoaderInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["ParamIntInvocation"] | components["schemas"]["ParamFloatInvocation"] | components["schemas"]["ParamStringInvocation"] | components["schemas"]["ParamPromptInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["InpaintInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LatentsToLatentsInvocation"]) | undefined; + [key: string]: (components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataAccumulatorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LoadImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageLuminosityAdjustmentInvocation"] | components["schemas"]["ImageSaturationAdjustmentInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ONNXSD1ModelLoaderInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ParamIntInvocation"] | components["schemas"]["ParamFloatInvocation"] | components["schemas"]["ParamStringInvocation"] | components["schemas"]["ParamPromptInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"]) | undefined; }; /** * Edges @@ -1486,7 +1634,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: (components["schemas"]["ImageOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["VaeLoaderOutput"] | components["schemas"]["MetadataAccumulatorOutput"] | components["schemas"]["CompelOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["PromptOutput"] | components["schemas"]["PromptCollectionOutput"] | components["schemas"]["IntOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["IntCollectionOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["CollectInvocationOutput"]) | undefined; + [key: string]: (components["schemas"]["ImageOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["VaeLoaderOutput"] | components["schemas"]["MetadataAccumulatorOutput"] | components["schemas"]["IntCollectionOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["CompelOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["IntOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["PromptOutput"] | components["schemas"]["PromptCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["CollectInvocationOutput"]) | undefined; }; /** * Errors @@ -2593,171 +2741,6 @@ export type components = { */ seed?: number; }; - /** - * InpaintInvocation - * @description Generates an image using inpaint. - */ - InpaintInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default inpaint - * @enum {string} - */ - type?: "inpaint"; - /** - * Positive Conditioning - * @description Positive conditioning for generation - */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Negative Conditioning - * @description Negative conditioning for generation - */ - negative_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Seed - * @description The seed to use (omit for random) - */ - seed?: number; - /** - * Steps - * @description The number of steps to use to generate the image - * @default 30 - */ - steps?: number; - /** - * Width - * @description The width of the resulting image - * @default 512 - */ - width?: number; - /** - * Height - * @description The height of the resulting image - * @default 512 - */ - height?: number; - /** - * Cfg Scale - * @description The Classifier-Free Guidance, higher values may result in a result closer to the prompt - * @default 7.5 - */ - cfg_scale?: number; - /** - * Scheduler - * @description The scheduler to use - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_a" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc"; - /** - * Unet - * @description UNet model - */ - unet?: components["schemas"]["UNetField"]; - /** - * Vae - * @description Vae model - */ - vae?: components["schemas"]["VaeField"]; - /** - * Image - * @description The input image - */ - image?: components["schemas"]["ImageField"]; - /** - * Strength - * @description The strength of the original image - * @default 0.75 - */ - strength?: number; - /** - * Fit - * @description Whether or not the result should be fit to the aspect ratio of the input image - * @default true - */ - fit?: boolean; - /** - * Mask - * @description The mask - */ - mask?: components["schemas"]["ImageField"]; - /** - * Seam Size - * @description The seam inpaint size (px) - * @default 96 - */ - seam_size?: number; - /** - * Seam Blur - * @description The seam inpaint blur radius (px) - * @default 16 - */ - seam_blur?: number; - /** - * Seam Strength - * @description The seam inpaint strength - * @default 0.75 - */ - seam_strength?: number; - /** - * Seam Steps - * @description The number of steps to use for seam inpaint - * @default 30 - */ - seam_steps?: number; - /** - * Tile Size - * @description The tile infill method size (px) - * @default 32 - */ - tile_size?: number; - /** - * Infill Method - * @description The method used to infill empty regions (px) - * @default patchmatch - * @enum {string} - */ - infill_method?: "patchmatch" | "tile" | "solid"; - /** - * Inpaint Width - * @description The width of the inpaint region (px) - */ - inpaint_width?: number; - /** - * Inpaint Height - * @description The height of the inpaint region (px) - */ - inpaint_height?: number; - /** - * Inpaint Fill - * @description The solid infill method color - * @default { - * "r": 127, - * "g": 127, - * "b": 127, - * "a": 255 - * } - */ - inpaint_fill?: components["schemas"]["ColorField"]; - /** - * Inpaint Replace - * @description The amount by which to replace masked areas with latent noise - * @default 0 - */ - inpaint_replace?: number; - }; /** * IntCollectionOutput * @description A collection of integers @@ -2854,6 +2837,11 @@ export type components = { * @description The name of the latents */ latents_name: string; + /** + * Seed + * @description Seed used to generate this latents + */ + seed?: number; }; /** * LatentsOutput @@ -2932,84 +2920,6 @@ export type components = { */ metadata?: components["schemas"]["CoreMetadata"]; }; - /** - * LatentsToLatentsInvocation - * @description Generates latents using latents as base image. - */ - LatentsToLatentsInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default l2l - * @enum {string} - */ - type?: "l2l"; - /** - * Positive Conditioning - * @description Positive conditioning for generation - */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Negative Conditioning - * @description Negative conditioning for generation - */ - negative_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Noise - * @description The noise to use - */ - noise?: components["schemas"]["LatentsField"]; - /** - * Steps - * @description The number of steps to use to generate the image - * @default 10 - */ - steps?: number; - /** - * Cfg Scale - * @description The Classifier-Free Guidance, higher values may result in a result closer to the prompt - * @default 7.5 - */ - cfg_scale?: number | (number)[]; - /** - * Scheduler - * @description The scheduler to use - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_a" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc"; - /** - * Unet - * @description UNet submodel - */ - unet?: components["schemas"]["UNetField"]; - /** - * Control - * @description The control to use - */ - control?: components["schemas"]["ControlField"] | (components["schemas"]["ControlField"])[]; - /** - * Latents - * @description The latents to use as a base image - */ - latents?: components["schemas"]["LatentsField"]; - /** - * Strength - * @description The strength of the latents to use - * @default 0.7 - */ - strength?: number; - }; /** * LeresImageProcessorInvocation * @description Applies leres processing to image @@ -3368,6 +3278,87 @@ export type components = { */ model: components["schemas"]["MainModelField"]; }; + /** + * MaskCombineInvocation + * @description Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`. + */ + MaskCombineInvocation: { + /** + * Id + * @description The id of this node. Must be unique among all nodes. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this node is an intermediate node. + * @default false + */ + is_intermediate?: boolean; + /** + * Type + * @default mask_combine + * @enum {string} + */ + type?: "mask_combine"; + /** + * Mask1 + * @description The first mask to combine + */ + mask1?: components["schemas"]["ImageField"]; + /** + * Mask2 + * @description The second image to combine + */ + mask2?: components["schemas"]["ImageField"]; + }; + /** + * MaskEdgeInvocation + * @description Applies an edge mask to an image + */ + MaskEdgeInvocation: { + /** + * Id + * @description The id of this node. Must be unique among all nodes. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this node is an intermediate node. + * @default false + */ + is_intermediate?: boolean; + /** + * Type + * @default mask_edge + * @enum {string} + */ + type?: "mask_edge"; + /** + * Image + * @description The image to apply the mask to + */ + image?: components["schemas"]["ImageField"]; + /** + * Edge Size + * @description The size of the edge + */ + edge_size: number; + /** + * Edge Blur + * @description The amount of blur on the edge + */ + edge_blur: number; + /** + * Low Threshold + * @description First threshold for the hysteresis procedure in Canny edge detection + */ + low_threshold: number; + /** + * High Threshold + * @description Second threshold for the hysteresis procedure in Canny edge detection + */ + high_threshold: number; + }; /** * MaskFromAlphaInvocation * @description Extracts the alpha channel of an image as a mask. @@ -3613,10 +3604,15 @@ export type components = { */ refiner_scheduler?: string; /** - * Refiner Aesthetic Store + * Refiner Positive Aesthetic Score * @description The aesthetic score used for the refiner */ - refiner_aesthetic_store?: number; + refiner_positive_aesthetic_score?: number; + /** + * Refiner Negative Aesthetic Score + * @description The aesthetic score used for the refiner + */ + refiner_negative_aesthetic_score?: number; /** * Refiner Start * @description The start value used for refiner denoising @@ -4937,83 +4933,6 @@ export type components = { */ clip2?: components["schemas"]["ClipField"]; }; - /** - * SDXLLatentsToLatentsInvocation - * @description Generates latents from conditionings. - */ - SDXLLatentsToLatentsInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default l2l_sdxl - * @enum {string} - */ - type?: "l2l_sdxl"; - /** - * Positive Conditioning - * @description Positive conditioning for generation - */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Negative Conditioning - * @description Negative conditioning for generation - */ - negative_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Noise - * @description The noise to use - */ - noise?: components["schemas"]["LatentsField"]; - /** - * Steps - * @description The number of steps to use to generate the image - * @default 10 - */ - steps?: number; - /** - * Cfg Scale - * @description The Classifier-Free Guidance, higher values may result in a result closer to the prompt - * @default 7.5 - */ - cfg_scale?: number | (number)[]; - /** - * Scheduler - * @description The scheduler to use - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_a" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc"; - /** - * Unet - * @description UNet submodel - */ - unet?: components["schemas"]["UNetField"]; - /** - * Latents - * @description Initial latents - */ - latents?: components["schemas"]["LatentsField"]; - /** - * Denoising Start - * @default 0 - */ - denoising_start?: number; - /** - * Denoising End - * @default 1 - */ - denoising_end?: number; - }; /** * SDXLLoraLoaderInvocation * @description Apply selected lora to unet and text_encoder. @@ -5150,81 +5069,6 @@ export type components = { */ vae?: components["schemas"]["VaeField"]; }; - /** - * SDXLRawPromptInvocation - * @description Pass unmodified prompt to conditioning without compel processing. - */ - SDXLRawPromptInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default sdxl_raw_prompt - * @enum {string} - */ - type?: "sdxl_raw_prompt"; - /** - * Prompt - * @description Prompt - * @default - */ - prompt?: string; - /** - * Style - * @description Style prompt - * @default - */ - style?: string; - /** - * Original Width - * @default 1024 - */ - original_width?: number; - /** - * Original Height - * @default 1024 - */ - original_height?: number; - /** - * Crop Top - * @default 0 - */ - crop_top?: number; - /** - * Crop Left - * @default 0 - */ - crop_left?: number; - /** - * Target Width - * @default 1024 - */ - target_width?: number; - /** - * Target Height - * @default 1024 - */ - target_height?: number; - /** - * Clip - * @description Clip to use - */ - clip?: components["schemas"]["ClipField"]; - /** - * Clip2 - * @description Clip2 to use - */ - clip2?: components["schemas"]["ClipField"]; - }; /** * SDXLRefinerCompelPromptInvocation * @description Parse prompt using compel package to conditioning. @@ -5339,132 +5183,6 @@ export type components = { */ vae?: components["schemas"]["VaeField"]; }; - /** - * SDXLRefinerRawPromptInvocation - * @description Parse prompt using compel package to conditioning. - */ - SDXLRefinerRawPromptInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default sdxl_refiner_raw_prompt - * @enum {string} - */ - type?: "sdxl_refiner_raw_prompt"; - /** - * Style - * @description Style prompt - * @default - */ - style?: string; - /** - * Original Width - * @default 1024 - */ - original_width?: number; - /** - * Original Height - * @default 1024 - */ - original_height?: number; - /** - * Crop Top - * @default 0 - */ - crop_top?: number; - /** - * Crop Left - * @default 0 - */ - crop_left?: number; - /** - * Aesthetic Score - * @default 6 - */ - aesthetic_score?: number; - /** - * Clip2 - * @description Clip to use - */ - clip2?: components["schemas"]["ClipField"]; - }; - /** - * SDXLTextToLatentsInvocation - * @description Generates latents from conditionings. - */ - SDXLTextToLatentsInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default t2l_sdxl - * @enum {string} - */ - type?: "t2l_sdxl"; - /** - * Positive Conditioning - * @description Positive conditioning for generation - */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Negative Conditioning - * @description Negative conditioning for generation - */ - negative_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Noise - * @description The noise to use - */ - noise?: components["schemas"]["LatentsField"]; - /** - * Steps - * @description The number of steps to use to generate the image - * @default 10 - */ - steps?: number; - /** - * Cfg Scale - * @description The Classifier-Free Guidance, higher values may result in a result closer to the prompt - * @default 7.5 - */ - cfg_scale?: number | (number)[]; - /** - * Scheduler - * @description The scheduler to use - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_a" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc"; - /** - * Unet - * @description UNet submodel - */ - unet?: components["schemas"]["UNetField"]; - /** - * Denoising End - * @default 1 - */ - denoising_end?: number; - }; /** * ScaleLatentsInvocation * @description Scales latents by a given factor. @@ -5863,73 +5581,6 @@ export type components = { */ b?: number; }; - /** - * TextToLatentsInvocation - * @description Generates latents from conditionings. - */ - TextToLatentsInvocation: { - /** - * Id - * @description The id of this node. Must be unique among all nodes. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this node is an intermediate node. - * @default false - */ - is_intermediate?: boolean; - /** - * Type - * @default t2l - * @enum {string} - */ - type?: "t2l"; - /** - * Positive Conditioning - * @description Positive conditioning for generation - */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Negative Conditioning - * @description Negative conditioning for generation - */ - negative_conditioning?: components["schemas"]["ConditioningField"]; - /** - * Noise - * @description The noise to use - */ - noise?: components["schemas"]["LatentsField"]; - /** - * Steps - * @description The number of steps to use to generate the image - * @default 10 - */ - steps?: number; - /** - * Cfg Scale - * @description The Classifier-Free Guidance, higher values may result in a result closer to the prompt - * @default 7.5 - */ - cfg_scale?: number | (number)[]; - /** - * Scheduler - * @description The scheduler to use - * @default euler - * @enum {string} - */ - scheduler?: "ddim" | "ddpm" | "deis" | "lms" | "lms_k" | "pndm" | "heun" | "heun_k" | "euler" | "euler_k" | "euler_a" | "kdpm_2" | "kdpm_2_a" | "dpmpp_2s" | "dpmpp_2s_k" | "dpmpp_2m" | "dpmpp_2m_k" | "dpmpp_2m_sde" | "dpmpp_2m_sde_k" | "dpmpp_sde" | "dpmpp_sde_k" | "unipc"; - /** - * Unet - * @description UNet submodel - */ - unet?: components["schemas"]["UNetField"]; - /** - * Control - * @description The control to use - */ - control?: components["schemas"]["ControlField"] | (components["schemas"]["ControlField"])[]; - }; /** TextualInversionModelConfig */ TextualInversionModelConfig: { /** Model Name */ @@ -6145,18 +5796,18 @@ export type components = { * @enum {string} */ ControlNetModelFormat: "checkpoint" | "diffusers"; + /** + * StableDiffusion2ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; /** * StableDiffusionXLModelFormat * @description An enumeration. * @enum {string} */ StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; - /** - * StableDiffusion1ModelFormat - * @description An enumeration. - * @enum {string} - */ - StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; /** * StableDiffusionOnnxModelFormat * @description An enumeration. @@ -6164,11 +5815,11 @@ export type components = { */ StableDiffusionOnnxModelFormat: "olive" | "onnx"; /** - * StableDiffusion2ModelFormat + * StableDiffusion1ModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; + StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; }; responses: never; parameters: never; @@ -6279,7 +5930,7 @@ export type operations = { }; requestBody: { content: { - "application/json": components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataAccumulatorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRawPromptInvocation"] | components["schemas"]["SDXLRefinerRawPromptInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LoadImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageLuminosityAdjustmentInvocation"] | components["schemas"]["ImageSaturationAdjustmentInvocation"] | components["schemas"]["TextToLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SDXLTextToLatentsInvocation"] | components["schemas"]["SDXLLatentsToLatentsInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ONNXSD1ModelLoaderInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["ParamIntInvocation"] | components["schemas"]["ParamFloatInvocation"] | components["schemas"]["ParamStringInvocation"] | components["schemas"]["ParamPromptInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["InpaintInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LatentsToLatentsInvocation"]; + "application/json": components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataAccumulatorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LoadImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageLuminosityAdjustmentInvocation"] | components["schemas"]["ImageSaturationAdjustmentInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ONNXSD1ModelLoaderInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ParamIntInvocation"] | components["schemas"]["ParamFloatInvocation"] | components["schemas"]["ParamStringInvocation"] | components["schemas"]["ParamPromptInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"]; }; }; responses: { @@ -6316,7 +5967,7 @@ export type operations = { }; requestBody: { content: { - "application/json": components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataAccumulatorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRawPromptInvocation"] | components["schemas"]["SDXLRefinerRawPromptInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LoadImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageLuminosityAdjustmentInvocation"] | components["schemas"]["ImageSaturationAdjustmentInvocation"] | components["schemas"]["TextToLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SDXLTextToLatentsInvocation"] | components["schemas"]["SDXLLatentsToLatentsInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ONNXSD1ModelLoaderInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["ParamIntInvocation"] | components["schemas"]["ParamFloatInvocation"] | components["schemas"]["ParamStringInvocation"] | components["schemas"]["ParamPromptInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["InpaintInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LatentsToLatentsInvocation"]; + "application/json": components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataAccumulatorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LoadImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageLuminosityAdjustmentInvocation"] | components["schemas"]["ImageSaturationAdjustmentInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ONNXSD1ModelLoaderInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ParamIntInvocation"] | components["schemas"]["ParamFloatInvocation"] | components["schemas"]["ParamStringInvocation"] | components["schemas"]["ParamPromptInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"]; }; }; responses: { diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index e7e3accdad..435b605489 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -120,9 +120,6 @@ export type RandomRangeInvocation = TypeReq< export type RangeOfSizeInvocation = TypeReq< components['schemas']['RangeOfSizeInvocation'] >; -export type InpaintInvocation = TypeReq< - components['schemas']['InpaintInvocation'] ->; export type ImageResizeInvocation = TypeReq< components['schemas']['ImageResizeInvocation'] >; @@ -139,14 +136,11 @@ export type DynamicPromptInvocation = TypeReq< components['schemas']['DynamicPromptInvocation'] >; export type NoiseInvocation = TypeReq; -export type TextToLatentsInvocation = TypeReq< - components['schemas']['TextToLatentsInvocation'] ->; export type ONNXTextToLatentsInvocation = TypeReq< components['schemas']['ONNXTextToLatentsInvocation'] >; -export type LatentsToLatentsInvocation = TypeReq< - components['schemas']['LatentsToLatentsInvocation'] +export type DenoiseLatentsInvocation = TypeReq< + components['schemas']['DenoiseLatentsInvocation'] >; export type ImageToLatentsInvocation = TypeReq< components['schemas']['ImageToLatentsInvocation'] @@ -178,12 +172,27 @@ export type ESRGANInvocation = TypeReq< export type DivideInvocation = TypeReq< components['schemas']['DivideInvocation'] >; +export type InfillTileInvocation = TypeReq< + components['schemas']['InfillTileInvocation'] +>; +export type InfillPatchmatchInvocation = TypeReq< + components['schemas']['InfillPatchMatchInvocation'] +>; export type ImageNSFWBlurInvocation = TypeReq< components['schemas']['ImageNSFWBlurInvocation'] >; export type ImageWatermarkInvocation = TypeReq< components['schemas']['ImageWatermarkInvocation'] >; +export type ImageBlurInvocation = TypeReq< + components['schemas']['ImageBlurInvocation'] +>; +export type ColorCorrectInvocation = TypeReq< + components['schemas']['ColorCorrectInvocation'] +>; +export type ImagePasteInvocation = TypeReq< + components['schemas']['ImagePasteInvocation'] +>; // ControlNet Nodes export type ControlNetInvocation = TypeReq< diff --git a/pyproject.toml b/pyproject.toml index 8fb2e7aaa7..6e5b754914 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "controlnet-aux>=0.0.6", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 "datasets", - "diffusers[torch]~=0.19.0", + "diffusers[torch]~=0.19.3", "dnspython~=2.4.0", "dynamicprompts", "easing-functions",