From 6ea183f0d460fb2ea6f983f84b47a4fda83df4e8 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 11:09:45 +0530 Subject: [PATCH 01/21] wip: Initial Implementation IP Adapter Style & Comp Modes --- invokeai/app/invocations/ip_adapter.py | 27 +- invokeai/app/invocations/latent.py | 59 ++-- .../stable_diffusion/diffusers_pipeline.py | 14 +- .../diffusion/conditioning_data.py | 1 + .../diffusion/custom_atttention.py | 54 ++-- .../diffusion/unet_attention_patcher.py | 35 ++- .../util/graph/addIPAdapterToLinearGraph.ts | 1 + .../frontend/web/src/services/api/schema.ts | 255 +++++++++++++++++- 8 files changed, 352 insertions(+), 94 deletions(-) diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py index 40cde8f3e9..b239e89bb6 100644 --- a/invokeai/app/invocations/ip_adapter.py +++ b/invokeai/app/invocations/ip_adapter.py @@ -4,20 +4,8 @@ from typing import List, Literal, Optional, Union from pydantic import BaseModel, Field, field_validator, model_validator from typing_extensions import Self -from invokeai.app.invocations.baseinvocation import ( - BaseInvocation, - BaseInvocationOutput, - invocation, - invocation_output, -) -from invokeai.app.invocations.fields import ( - FieldDescriptions, - Input, - InputField, - OutputField, - TensorField, - UIType, -) +from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, TensorField, UIType from invokeai.app.invocations.model import ModelIdentifierField from invokeai.app.invocations.primitives import ImageField from invokeai.app.invocations.util import validate_begin_end_step, validate_weights @@ -36,6 +24,7 @@ class IPAdapterField(BaseModel): ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model to use.") image_encoder_model: ModelIdentifierField = Field(description="The name of the CLIP image encoder model.") weight: Union[float, List[float]] = Field(default=1, description="The weight given to the IP-Adapter.") + target_blocks: List[str] = Field(default=[], description="The IP Adapter blocks to apply") begin_step_percent: float = Field( default=0, ge=0, le=1, description="When the IP-Adapter is first applied (% of total steps)" ) @@ -90,6 +79,9 @@ class IPAdapterInvocation(BaseInvocation): weight: Union[float, List[float]] = InputField( default=1, description="The weight given to the IP-Adapter", title="Weight" ) + method: Literal["full", "style", "composition"] = InputField( + default="full", description="The method to apply the IP-Adapter" + ) begin_step_percent: float = InputField( default=0, ge=0, le=1, description="When the IP-Adapter is first applied (% of total steps)" ) @@ -124,12 +116,19 @@ class IPAdapterInvocation(BaseInvocation): image_encoder_model = self._get_image_encoder(context, image_encoder_model_name) + target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] + if self.method == "style": + target_blocks = ["up_blocks.0.attentions.1"] + elif self.method == "composition": + target_blocks = ["down_blocks.2.attentions.1"] + return IPAdapterOutput( ip_adapter=IPAdapterField( image=self.image, ip_adapter_model=self.ip_adapter_model, image_encoder_model=ModelIdentifierField.from_config(image_encoder_model), weight=self.weight, + target_blocks=target_blocks, begin_step_percent=self.begin_step_percent, end_step_percent=self.end_step_percent, mask=self.mask, diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ce63d568c6..ede26a823f 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -15,12 +15,10 @@ from diffusers import AutoencoderKL, AutoencoderTiny from diffusers.configuration_utils import ConfigMixin from diffusers.image_processor import VaeImageProcessor from diffusers.models.adapter import T2IAdapter -from diffusers.models.attention_processor import ( - AttnProcessor2_0, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, - XFormersAttnProcessor, -) +from diffusers.models.attention_processor import (AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor) from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from diffusers.schedulers import DPMSolverSDEScheduler from diffusers.schedulers import SchedulerMixin as Scheduler @@ -29,22 +27,17 @@ from pydantic import field_validator from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPVisionModelWithProjection -from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES -from invokeai.app.invocations.fields import ( - ConditioningField, - DenoiseMaskField, - FieldDescriptions, - ImageField, - Input, - InputField, - LatentsField, - OutputField, - UIType, - WithBoard, - WithMetadata, -) +from invokeai.app.invocations.constants import (LATENT_SCALE_FACTOR, + SCHEDULER_NAME_VALUES) +from invokeai.app.invocations.fields import (ConditioningField, + DenoiseMaskField, + FieldDescriptions, ImageField, + Input, InputField, LatentsField, + OutputField, UIType, WithBoard, + WithMetadata) from invokeai.app.invocations.ip_adapter import IPAdapterField -from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput +from invokeai.app.invocations.primitives import (DenoiseMaskOutput, + ImageOutput, LatentsOutput) from invokeai.app.invocations.t2i_adapter import T2IAdapterField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.util.controlnet_utils import prepare_control_image @@ -52,28 +45,21 @@ from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus from invokeai.backend.lora import LoRAModelRaw from invokeai.backend.model_manager import BaseModelType, LoadedModel from invokeai.backend.model_patcher import ModelPatcher -from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless +from invokeai.backend.stable_diffusion import (PipelineIntermediateState, + set_seamless) from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( - BasicConditioningInfo, - IPAdapterConditioningInfo, - IPAdapterData, - Range, - SDXLConditioningInfo, - TextConditioningData, - TextConditioningRegions, -) + BasicConditioningInfo, IPAdapterConditioningInfo, IPAdapterData, Range, + SDXLConditioningInfo, TextConditioningData, TextConditioningRegions) from invokeai.backend.util.mask import to_standard_float_mask from invokeai.backend.util.silence_warnings import SilenceWarnings from ...backend.stable_diffusion.diffusers_pipeline import ( - ControlNetData, - StableDiffusionGeneratorPipeline, - T2IAdapterData, - image_resized_to_grid_as_tensor, -) + ControlNetData, StableDiffusionGeneratorPipeline, T2IAdapterData, + image_resized_to_grid_as_tensor) from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP from ...backend.util.devices import choose_precision, choose_torch_device -from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from .baseinvocation import (BaseInvocation, BaseInvocationOutput, invocation, + invocation_output) from .controlnet_image_processors import ControlField from .model import ModelIdentifierField, UNetField, VAEField @@ -682,6 +668,7 @@ class DenoiseLatentsInvocation(BaseInvocation): IPAdapterData( ip_adapter_model=ip_adapter_model, weight=single_ip_adapter.weight, + target_blocks=single_ip_adapter.target_blocks, begin_step_percent=single_ip_adapter.begin_step_percent, end_step_percent=single_ip_adapter.end_step_percent, ip_adapter_conditioning=IPAdapterConditioningInfo(image_prompt_embeds, uncond_image_prompt_embeds), diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index b4d1b3381c..befda72751 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -21,12 +21,9 @@ from pydantic import Field from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from invokeai.app.services.config.config_default import get_config -from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( - IPAdapterData, - TextConditioningData, -) +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import IPAdapterData, TextConditioningData from invokeai.backend.stable_diffusion.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent -from invokeai.backend.stable_diffusion.diffusion.unet_attention_patcher import UNetAttentionPatcher +from invokeai.backend.stable_diffusion.diffusion.unet_attention_patcher import UNetAttentionPatcher, UNetIPAdapterData from invokeai.backend.util.attention import auto_detect_slice_size from invokeai.backend.util.devices import normalize_device @@ -394,8 +391,13 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): unet_attention_patcher = None self.use_ip_adapter = use_ip_adapter attn_ctx = nullcontext() + if use_ip_adapter or use_regional_prompting: - ip_adapters = [ipa.ip_adapter_model for ipa in ip_adapter_data] if use_ip_adapter else None + ip_adapters: Optional[List[UNetIPAdapterData]] = ( + [{"ip_adapter": ipa.ip_adapter_model, "target_blocks": ipa.target_blocks} for ipa in ip_adapter_data] + if use_ip_adapter + else None + ) unet_attention_patcher = UNetAttentionPatcher(ip_adapters) attn_ctx = unet_attention_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model) diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py index 9b8ea0968a..85950a01df 100644 --- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py @@ -53,6 +53,7 @@ class IPAdapterData: ip_adapter_model: IPAdapter ip_adapter_conditioning: IPAdapterConditioningInfo mask: torch.Tensor + target_blocks: List[str] # Either a single weight applied to all steps, or a list of weights for each step. weight: Union[float, List[float]] = 1.0 diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index ed706f6453..5a42d9572e 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import List, Optional, TypedDict import torch import torch.nn.functional as F @@ -9,6 +9,11 @@ from invokeai.backend.stable_diffusion.diffusion.regional_ip_data import Regiona from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData +class IPAdapterAttentionWeights(TypedDict): + ip_adapter_weights: List[IPAttentionProcessorWeights] + skip: bool + + class CustomAttnProcessor2_0(AttnProcessor2_0): """A custom implementation of AttnProcessor2_0 that supports additional Invoke features. This implementation is based on @@ -20,7 +25,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): def __init__( self, - ip_adapter_weights: Optional[list[IPAttentionProcessorWeights]] = None, + ip_adapter_attention_weights: Optional[IPAdapterAttentionWeights] = None, ): """Initialize a CustomAttnProcessor2_0. Note: Arguments that are the same for all attention layers are passed to __call__(). Arguments that are @@ -30,10 +35,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): for the i'th IP-Adapter. """ super().__init__() - self._ip_adapter_weights = ip_adapter_weights - - def _is_ip_adapter_enabled(self) -> bool: - return self._ip_adapter_weights is not None + self._ip_adapter_attention_weights = ip_adapter_attention_weights def __call__( self, @@ -130,17 +132,17 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Apply IP-Adapter conditioning. if is_cross_attention: - if self._is_ip_adapter_enabled(): + if self._ip_adapter_attention_weights: assert regional_ip_data is not None ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len) assert ( len(regional_ip_data.image_prompt_embeds) - == len(self._ip_adapter_weights) + == len(self._ip_adapter_attention_weights["ip_adapter_weights"]) == len(regional_ip_data.scales) == ip_masks.shape[1] ) for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds): - ipa_weights = self._ip_adapter_weights[ipa_index] + ipa_weights = self._ip_adapter_attention_weights["ip_adapter_weights"][ipa_index] ipa_scale = regional_ip_data.scales[ipa_index] ip_mask = ip_masks[0, ipa_index, ...] @@ -153,29 +155,33 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) - ip_key = ipa_weights.to_k_ip(ip_hidden_states) - ip_value = ipa_weights.to_v_ip(ip_hidden_states) + if self._ip_adapter_attention_weights["skip"]: - # Expected ip_key and ip_value shape: (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) + ip_key = ipa_weights.to_k_ip(ip_hidden_states) + ip_value = ipa_weights.to_v_ip(ip_hidden_states) - ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + # Expected ip_key and ip_value shape: (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) - # Expected ip_key and ip_value shape: (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) + ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - # TODO: add support for attn.scale when we move to Torch 2.1 - ip_hidden_states = F.scaled_dot_product_attention( - query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False - ) + # Expected ip_key and ip_value shape: (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) - # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) + # TODO: add support for attn.scale when we move to Torch 2.1 + ip_hidden_states = F.scaled_dot_product_attention( + query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False + ) - ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim) - ip_hidden_states = ip_hidden_states.to(query.dtype) + # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) - # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) + ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape( + batch_size, -1, attn.heads * head_dim + ) + ip_hidden_states = ip_hidden_states.to(query.dtype) - hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask + # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) + + hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask else: # If IP-Adapter is not enabled, then regional_ip_data should not be passed in. assert regional_ip_data is None diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index 89a203f643..65992a62b5 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -1,17 +1,25 @@ from contextlib import contextmanager -from typing import Optional +from typing import List, Optional, TypedDict from diffusers.models import UNet2DConditionModel from invokeai.backend.ip_adapter.ip_adapter import IPAdapter -from invokeai.backend.stable_diffusion.diffusion.custom_atttention import CustomAttnProcessor2_0 +from invokeai.backend.stable_diffusion.diffusion.custom_atttention import ( + CustomAttnProcessor2_0, + IPAdapterAttentionWeights, +) + + +class UNetIPAdapterData(TypedDict): + ip_adapter: IPAdapter + target_blocks: List[str] class UNetAttentionPatcher: """A class for patching a UNet with CustomAttnProcessor2_0 attention layers.""" - def __init__(self, ip_adapters: Optional[list[IPAdapter]]): - self._ip_adapters = ip_adapters + def __init__(self, ip_adapter_data: Optional[List[UNetIPAdapterData]]): + self._ip_adapters = ip_adapter_data def _prepare_attention_processors(self, unet: UNet2DConditionModel): """Prepare a dict of attention processors that can be injected into a unet, and load the IP-Adapter attention @@ -25,10 +33,23 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: + + ip_adapter_attention_weights: IPAdapterAttentionWeights = {"ip_adapter_weights": [], "skip": False} + for ip_adapter in self._ip_adapters: + + ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) + skip = False + for block in ip_adapter["target_blocks"]: + if block in name: + skip = True + break + + ip_adapter_attention_weights.update({"ip_adapter_weights": [ip_adapter_weight], "skip": skip}) + # Collect the weights from each IP Adapter for the idx'th attention processor. - attn_procs[name] = CustomAttnProcessor2_0( - [ip_adapter.attn_weights.get_attention_processor_weights(idx) for ip_adapter in self._ip_adapters], - ) + + attn_procs[name] = CustomAttnProcessor2_0(ip_adapter_attention_weights) + return attn_procs @contextmanager diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts index ad563de468..ad530f7765 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts @@ -57,6 +57,7 @@ export const addIPAdapterToLinearGraph = async ( type: 'ip_adapter', is_intermediate: true, weight: weight, + method: 'composition', ip_adapter_model: model, clip_vision_model: clipVisionModel, begin_step_percent: beginStepPct, diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index cb222bd497..9028054a1f 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -1238,6 +1238,39 @@ export type components = { */ type: "boolean_output"; }; + /** + * BRIA AI Background Removal + * @description Uses the new Bria 1.4 model to remove backgrounds from images. + */ + BriaRemoveBackgroundInvocation: { + /** @description Optional metadata to be saved with the image */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** @description The image to crop */ + image?: components["schemas"]["ImageField"]; + /** + * type + * @default bria_bg_remove + * @constant + */ + type: "bria_bg_remove"; + }; /** CLIPField */ CLIPField: { /** @description Info to load tokenizer submodel */ @@ -2070,6 +2103,8 @@ export type components = { * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count */ clip?: components["schemas"]["CLIPField"]; + /** @description A mask defining the region that this conditioning prompt applies to. */ + mask?: components["schemas"]["TensorField"] | null; /** * type * @default compel @@ -2139,6 +2174,11 @@ export type components = { * @description The name of conditioning tensor */ conditioning_name: string; + /** + * @description The mask associated with this conditioning tensor. Excluded regions should be set to False, included regions should be set to True. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; }; /** * Conditioning Primitive @@ -3049,10 +3089,16 @@ export type components = { * @default true */ use_cache?: boolean; - /** @description Positive conditioning tensor */ - positive_conditioning?: components["schemas"]["ConditioningField"]; - /** @description Negative conditioning tensor */ - negative_conditioning?: components["schemas"]["ConditioningField"]; + /** + * Positive Conditioning + * @description Positive conditioning tensor + */ + positive_conditioning?: components["schemas"]["ConditioningField"] | components["schemas"]["ConditioningField"][]; + /** + * Negative Conditioning + * @description Negative conditioning tensor + */ + negative_conditioning?: components["schemas"]["ConditioningField"] | components["schemas"]["ConditioningField"][]; /** @description Noise tensor */ noise?: components["schemas"]["LatentsField"] | null; /** @@ -4112,7 +4158,7 @@ export type components = { * @description The nodes in this graph */ nodes: { - [key: string]: components["schemas"]["IntegerMathInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartImageProcessorInvocation"]; + [key: string]: components["schemas"]["ImagePasteInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["FloatMathInvocation"]; }; /** * Edges @@ -4149,7 +4195,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["FloatCollectionOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["String2Output"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"]; + [key: string]: components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["LatentsCollectionOutput"]; }; /** * Errors @@ -4200,6 +4246,83 @@ export type components = { /** Detail */ detail?: components["schemas"]["ValidationError"][]; }; + /** + * Hand Depth w/ MeshGraphormer + * @description Generate hand depth maps to inpaint with using ControlNet + */ + HandDepthMeshGraphormerProcessor: { + /** @description Optional metadata to be saved with the image */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** @description The image to process */ + image?: components["schemas"]["ImageField"]; + /** + * Resolution + * @description Pixel resolution for output image + * @default 512 + */ + resolution?: number; + /** + * Mask Padding + * @description Amount to pad the hand mask by + * @default 30 + */ + mask_padding?: number; + /** + * Offload + * @description Offload model after usage + * @default false + */ + offload?: boolean; + /** + * type + * @default hand_depth_mesh_graphormer_image_processor + * @constant + */ + type: "hand_depth_mesh_graphormer_image_processor"; + }; + /** + * HandDepthOutput + * @description Base class for to output Meshgraphormer results + */ + HandDepthOutput: { + /** @description Improved hands depth map */ + image: components["schemas"]["ImageField"]; + /** @description Hands area mask */ + mask: components["schemas"]["ImageField"]; + /** + * Width + * @description The width of the depth map in pixels + */ + width: number; + /** + * Height + * @description The height of the depth map in pixels + */ + height: number; + /** + * type + * @default meshgraphormer_output + * @constant + */ + type: "meshgraphormer_output"; + }; /** * HED (softedge) Processor * @description Applies HED edge detection to image @@ -4383,10 +4506,16 @@ export type components = { image_encoder_model: components["schemas"]["ModelIdentifierField"]; /** * Weight - * @description The weight given to the ControlNet + * @description The weight given to the IP-Adapter. * @default 1 */ weight?: number | number[]; + /** + * Target Blocks + * @description The IP Adapter blocks to apply + * @default [] + */ + target_blocks?: string[]; /** * Begin Step Percent * @description When the IP-Adapter is first applied (% of total steps) @@ -4399,6 +4528,11 @@ export type components = { * @default 1 */ end_step_percent?: number; + /** + * @description The bool mask associated with this IP-Adapter. Excluded regions should be set to False, included regions should be set to True. + * @default null + */ + mask?: components["schemas"]["TensorField"] | null; }; /** * IP-Adapter @@ -4445,6 +4579,13 @@ export type components = { * @default 1 */ weight?: number | number[]; + /** + * Method + * @description The method to apply the IP-Adapter + * @default full + * @enum {string} + */ + method?: "full" | "style" | "composition"; /** * Begin Step Percent * @description When the IP-Adapter is first applied (% of total steps) @@ -4457,6 +4598,8 @@ export type components = { * @default 1 */ end_step_percent?: number; + /** @description A mask defining the region that this IP-Adapter applies to. */ + mask?: components["schemas"]["TensorField"] | null; /** * type * @default ip_adapter @@ -7104,6 +7247,30 @@ export type components = { */ type: "mask_from_id"; }; + /** + * MaskOutput + * @description A torch mask tensor. + */ + MaskOutput: { + /** @description The mask. */ + mask: components["schemas"]["TensorField"]; + /** + * Width + * @description The width of the mask in pixels. + */ + width: number; + /** + * Height + * @description The height of the mask in pixels. + */ + height: number; + /** + * type + * @default mask_output + * @constant + */ + type: "mask_output"; + }; /** * Mediapipe Face Processor * @description Applies mediapipe face processing to image @@ -8364,6 +8531,67 @@ export type components = { */ type: "range_of_size"; }; + /** + * Create Rectangle Mask + * @description Create a rectangular mask. + */ + RectangleMaskInvocation: { + /** @description Optional metadata to be saved with the image */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Width + * @description The width of the entire mask. + */ + width?: number; + /** + * Height + * @description The height of the entire mask. + */ + height?: number; + /** + * X Left + * @description The left x-coordinate of the rectangular masked region (inclusive). + */ + x_left?: number; + /** + * Y Top + * @description The top y-coordinate of the rectangular masked region (inclusive). + */ + y_top?: number; + /** + * Rectangle Width + * @description The width of the rectangular masked region. + */ + rectangle_width?: number; + /** + * Rectangle Height + * @description The height of the rectangular masked region. + */ + rectangle_height?: number; + /** + * type + * @default rectangle_mask + * @constant + */ + type: "rectangle_mask"; + }; /** * RemoteModelFile * @description Information about a downloadable file that forms part of a model. @@ -8579,6 +8807,8 @@ export type components = { * @description CLIP (tokenizer, text encoder, LoRAs) and skipped layer count */ clip2?: components["schemas"]["CLIPField"]; + /** @description A mask defining the region that this conditioning prompt applies to. */ + mask?: components["schemas"]["TensorField"] | null; /** * type * @default sdxl_compel_prompt @@ -10115,6 +10345,17 @@ export type components = { /** Right */ right: number; }; + /** + * TensorField + * @description A tensor primitive field. + */ + TensorField: { + /** + * Tensor Name + * @description The name of a tensor. + */ + tensor_name: string; + }; /** * TextualInversionFileConfig * @description Model config for textual inversion embeddings. From e9f16ac8c756d9462bcb812539d1d2632695e6c4 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:06:59 +0530 Subject: [PATCH 02/21] feat: add UI for IP Adapter Method --- invokeai/app/invocations/metadata.py | 1 + invokeai/frontend/web/public/locales/en.json | 4 ++ .../components/ControlAdapterConfig.tsx | 4 +- .../ParamControlAdapterIPMethod.tsx | 63 +++++++++++++++++++ .../hooks/useControlAdapterIPMethod.ts | 24 +++++++ .../store/controlAdaptersSlice.ts | 6 ++ .../features/controlAdapters/store/types.ts | 5 ++ .../util/buildControlAdapter.ts | 1 + .../web/src/features/metadata/util/parsers.ts | 1 + .../util/graph/addIPAdapterToLinearGraph.ts | 7 ++- .../frontend/web/src/services/api/schema.ts | 10 ++- 11 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterIPMethod.tsx create mode 100644 invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterIPMethod.ts diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index 2da482c833..a02d0a57ef 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -36,6 +36,7 @@ class IPAdapterMetadataField(BaseModel): image: ImageField = Field(description="The IP-Adapter image prompt.") ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model.") clip_vision_model: Literal["ViT-H", "ViT-G"] = Field(description="The CLIP Vision model") + method: Literal["full", "style", "composition"] = Field(description="Method to apply IP Weights with") weight: Union[float, list[float]] = Field(description="The weight given to the IP-Adapter") begin_step_percent: float = Field(description="When the IP-Adapter is first applied (% of total steps)") end_step_percent: float = Field(description="When the IP-Adapter is last applied (% of total steps)") diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 5454c72e68..f806fc7b95 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -213,6 +213,10 @@ "resize": "Resize", "resizeSimple": "Resize (Simple)", "resizeMode": "Resize Mode", + "ipAdapterMethod": "Method", + "full": "Full", + "style": "Style Only", + "composition": "Composition Only", "safe": "Safe", "saveControlImage": "Save Control Image", "scribble": "scribble", diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx index 42499b015c..445e421df4 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx @@ -21,6 +21,7 @@ import ControlAdapterShouldAutoConfig from './ControlAdapterShouldAutoConfig'; import ControlNetCanvasImageImports from './imports/ControlNetCanvasImageImports'; import { ParamControlAdapterBeginEnd } from './parameters/ParamControlAdapterBeginEnd'; import ParamControlAdapterControlMode from './parameters/ParamControlAdapterControlMode'; +import ParamControlAdapterIPMethod from './parameters/ParamControlAdapterIPMethod'; import ParamControlAdapterProcessorSelect from './parameters/ParamControlAdapterProcessorSelect'; import ParamControlAdapterResizeMode from './parameters/ParamControlAdapterResizeMode'; import ParamControlAdapterWeight from './parameters/ParamControlAdapterWeight'; @@ -111,7 +112,8 @@ const ControlAdapterConfig = (props: { id: string; number: number }) => { - + + diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterIPMethod.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterIPMethod.tsx new file mode 100644 index 0000000000..7385997804 --- /dev/null +++ b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterIPMethod.tsx @@ -0,0 +1,63 @@ +import type { ComboboxOnChange } from '@invoke-ai/ui-library'; +import { Combobox, FormControl, FormLabel } from '@invoke-ai/ui-library'; +import { useAppDispatch } from 'app/store/storeHooks'; +import { InformationalPopover } from 'common/components/InformationalPopover/InformationalPopover'; +import { useControlAdapterIPMethod } from 'features/controlAdapters/hooks/useControlAdapterIPMethod'; +import { useControlAdapterIsEnabled } from 'features/controlAdapters/hooks/useControlAdapterIsEnabled'; +import { controlAdapterIPMethodChanged } from 'features/controlAdapters/store/controlAdaptersSlice'; +import type { IPMethod } from 'features/controlAdapters/store/types'; +import { isIPMethod } from 'features/controlAdapters/store/types'; +import { memo, useCallback, useMemo } from 'react'; +import { useTranslation } from 'react-i18next'; + +type Props = { + id: string; +}; + +const ParamControlAdapterIPMethod = ({ id }: Props) => { + const isEnabled = useControlAdapterIsEnabled(id); + const method = useControlAdapterIPMethod(id); + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + + const options: { label: string; value: IPMethod }[] = useMemo( + () => [ + { label: t('controlnet.full'), value: 'full' }, + { label: t('controlnet.style'), value: 'style' }, + { label: t('controlnet.composition'), value: 'composition' }, + ], + [t] + ); + + const handleIPMethodChanged = useCallback( + (v) => { + if (!isIPMethod(v?.value)) { + return; + } + dispatch( + controlAdapterIPMethodChanged({ + id, + method: v.value, + }) + ); + }, + [id, dispatch] + ); + + const value = useMemo(() => options.find((o) => o.value === method), [options, method]); + + if (!method) { + return null; + } + + return ( + + + {t('controlnet.ipAdapterMethod')} + + + + ); +}; + +export default memo(ParamControlAdapterIPMethod); diff --git a/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterIPMethod.ts b/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterIPMethod.ts new file mode 100644 index 0000000000..a179899396 --- /dev/null +++ b/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterIPMethod.ts @@ -0,0 +1,24 @@ +import { createMemoizedSelector } from 'app/store/createMemoizedSelector'; +import { useAppSelector } from 'app/store/storeHooks'; +import { + selectControlAdapterById, + selectControlAdaptersSlice, +} from 'features/controlAdapters/store/controlAdaptersSlice'; +import { useMemo } from 'react'; + +export const useControlAdapterIPMethod = (id: string) => { + const selector = useMemo( + () => + createMemoizedSelector(selectControlAdaptersSlice, (controlAdapters) => { + const cn = selectControlAdapterById(controlAdapters, id); + if (cn && cn?.type === 'ip_adapter') { + return cn.method; + } + }), + [id] + ); + + const method = useAppSelector(selector); + + return method; +}; diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts b/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts index 100bb3f6ad..9a1ce5e984 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts +++ b/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts @@ -21,6 +21,7 @@ import type { ControlAdapterType, ControlMode, ControlNetConfig, + IPMethod, RequiredControlAdapterProcessorNode, ResizeMode, T2IAdapterConfig, @@ -245,6 +246,10 @@ export const controlAdaptersSlice = createSlice({ } caAdapter.updateOne(state, { id, changes: { controlMode } }); }, + controlAdapterIPMethodChanged: (state, action: PayloadAction<{ id: string; method: IPMethod }>) => { + const { id, method } = action.payload; + caAdapter.updateOne(state, { id, changes: { method } }); + }, controlAdapterCLIPVisionModelChanged: ( state, action: PayloadAction<{ id: string; clipVisionModel: CLIPVisionModel }> @@ -390,6 +395,7 @@ export const { controlAdapterIsEnabledChanged, controlAdapterModelChanged, controlAdapterCLIPVisionModelChanged, + controlAdapterIPMethodChanged, controlAdapterWeightChanged, controlAdapterBeginStepPctChanged, controlAdapterEndStepPctChanged, diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts index 329c318759..7e2f18af5c 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts +++ b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts @@ -210,6 +210,10 @@ const zResizeMode = z.enum(['just_resize', 'crop_resize', 'fill_resize', 'just_r export type ResizeMode = z.infer; export const isResizeMode = (v: unknown): v is ResizeMode => zResizeMode.safeParse(v).success; +const zIPMethod = z.enum(['full', 'style', 'composition']); +export type IPMethod = z.infer; +export const isIPMethod = (v: unknown): v is IPMethod => zIPMethod.safeParse(v).success; + export type ControlNetConfig = { type: 'controlnet'; id: string; @@ -253,6 +257,7 @@ export type IPAdapterConfig = { model: ParameterIPAdapterModel | null; clipVisionModel: CLIPVisionModel; weight: number; + method: IPMethod; beginStepPct: number; endStepPct: number; }; diff --git a/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts b/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts index dc893ceb1c..ad7bdba363 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts +++ b/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts @@ -46,6 +46,7 @@ export const initialIPAdapter: Omit = { isEnabled: true, controlImage: null, model: null, + method: 'full', clipVisionModel: 'ViT-H', weight: 1, beginStepPct: 0, diff --git a/invokeai/frontend/web/src/features/metadata/util/parsers.ts b/invokeai/frontend/web/src/features/metadata/util/parsers.ts index 9f5c14d94e..9a07ea1d80 100644 --- a/invokeai/frontend/web/src/features/metadata/util/parsers.ts +++ b/invokeai/frontend/web/src/features/metadata/util/parsers.ts @@ -386,6 +386,7 @@ const parseIPAdapter: MetadataParseFunc = async (metada clipVisionModel: 'ViT-H', controlImage: image?.image_name ?? null, weight: weight ?? initialIPAdapter.weight, + method: 'full', beginStepPct: begin_step_percent ?? initialIPAdapter.beginStepPct, endStepPct: end_step_percent ?? initialIPAdapter.endStepPct, }; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts index ad530f7765..568b24ccfd 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts @@ -48,7 +48,7 @@ export const addIPAdapterToLinearGraph = async ( if (!ipAdapter.model) { return; } - const { id, weight, model, clipVisionModel, beginStepPct, endStepPct, controlImage } = ipAdapter; + const { id, weight, model, clipVisionModel, method, beginStepPct, endStepPct, controlImage } = ipAdapter; assert(controlImage, 'IP Adapter image is required'); @@ -57,7 +57,7 @@ export const addIPAdapterToLinearGraph = async ( type: 'ip_adapter', is_intermediate: true, weight: weight, - method: 'composition', + method: method, ip_adapter_model: model, clip_vision_model: clipVisionModel, begin_step_percent: beginStepPct, @@ -85,7 +85,7 @@ export const addIPAdapterToLinearGraph = async ( }; const buildIPAdapterMetadata = (ipAdapter: IPAdapterConfig): S['IPAdapterMetadataField'] => { - const { controlImage, beginStepPct, endStepPct, model, clipVisionModel, weight } = ipAdapter; + const { controlImage, beginStepPct, endStepPct, model, clipVisionModel, method, weight } = ipAdapter; assert(model, 'IP Adapter model is required'); @@ -103,6 +103,7 @@ const buildIPAdapterMetadata = (ipAdapter: IPAdapterConfig): S['IPAdapterMetadat ip_adapter_model: model, clip_vision_model: clipVisionModel, weight, + method, begin_step_percent: beginStepPct, end_step_percent: endStepPct, image, diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 9028054a1f..6a8c292374 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -4158,7 +4158,7 @@ export type components = { * @description The nodes in this graph */ nodes: { - [key: string]: components["schemas"]["ImagePasteInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["FloatMathInvocation"]; + [key: string]: components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["CanvasPasteBackInvocation"]; }; /** * Edges @@ -4195,7 +4195,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["LatentsCollectionOutput"]; + [key: string]: components["schemas"]["LatentsOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["String2Output"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["MaskOutput"]; }; /** * Errors @@ -4685,6 +4685,12 @@ export type components = { * @enum {string} */ clip_vision_model: "ViT-H" | "ViT-G"; + /** + * Method + * @description Method to apply IP Weights with + * @enum {string} + */ + method: "full" | "style" | "composition"; /** * Weight * @description The weight given to the IP-Adapter From af36fe8c1eae9d6e66c1bda2c7d2e880ac017a52 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:08:52 +0530 Subject: [PATCH 03/21] chore: ruff fixes --- invokeai/app/invocations/latent.py | 58 ++++++++++++------- invokeai/app/invocations/mask.py | 6 +- .../sqlite_migrator/sqlite_migrator_common.py | 3 +- .../diffusion/custom_atttention.py | 1 - .../diffusion/unet_attention_patcher.py | 2 - 5 files changed, 41 insertions(+), 29 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ede26a823f..85f1e2bf24 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -15,10 +15,12 @@ from diffusers import AutoencoderKL, AutoencoderTiny from diffusers.configuration_utils import ConfigMixin from diffusers.image_processor import VaeImageProcessor from diffusers.models.adapter import T2IAdapter -from diffusers.models.attention_processor import (AttnProcessor2_0, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, - XFormersAttnProcessor) +from diffusers.models.attention_processor import ( + AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor, +) from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from diffusers.schedulers import DPMSolverSDEScheduler from diffusers.schedulers import SchedulerMixin as Scheduler @@ -27,17 +29,22 @@ from pydantic import field_validator from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPVisionModelWithProjection -from invokeai.app.invocations.constants import (LATENT_SCALE_FACTOR, - SCHEDULER_NAME_VALUES) -from invokeai.app.invocations.fields import (ConditioningField, - DenoiseMaskField, - FieldDescriptions, ImageField, - Input, InputField, LatentsField, - OutputField, UIType, WithBoard, - WithMetadata) +from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES +from invokeai.app.invocations.fields import ( + ConditioningField, + DenoiseMaskField, + FieldDescriptions, + ImageField, + Input, + InputField, + LatentsField, + OutputField, + UIType, + WithBoard, + WithMetadata, +) from invokeai.app.invocations.ip_adapter import IPAdapterField -from invokeai.app.invocations.primitives import (DenoiseMaskOutput, - ImageOutput, LatentsOutput) +from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput from invokeai.app.invocations.t2i_adapter import T2IAdapterField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.util.controlnet_utils import prepare_control_image @@ -45,21 +52,28 @@ from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus from invokeai.backend.lora import LoRAModelRaw from invokeai.backend.model_manager import BaseModelType, LoadedModel from invokeai.backend.model_patcher import ModelPatcher -from invokeai.backend.stable_diffusion import (PipelineIntermediateState, - set_seamless) +from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( - BasicConditioningInfo, IPAdapterConditioningInfo, IPAdapterData, Range, - SDXLConditioningInfo, TextConditioningData, TextConditioningRegions) + BasicConditioningInfo, + IPAdapterConditioningInfo, + IPAdapterData, + Range, + SDXLConditioningInfo, + TextConditioningData, + TextConditioningRegions, +) from invokeai.backend.util.mask import to_standard_float_mask from invokeai.backend.util.silence_warnings import SilenceWarnings from ...backend.stable_diffusion.diffusers_pipeline import ( - ControlNetData, StableDiffusionGeneratorPipeline, T2IAdapterData, - image_resized_to_grid_as_tensor) + ControlNetData, + StableDiffusionGeneratorPipeline, + T2IAdapterData, + image_resized_to_grid_as_tensor, +) from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP from ...backend.util.devices import choose_precision, choose_torch_device -from .baseinvocation import (BaseInvocation, BaseInvocationOutput, invocation, - invocation_output) +from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output from .controlnet_image_processors import ControlField from .model import ModelIdentifierField, UNetField, VAEField diff --git a/invokeai/app/invocations/mask.py b/invokeai/app/invocations/mask.py index a7f3207764..acacaedaed 100644 --- a/invokeai/app/invocations/mask.py +++ b/invokeai/app/invocations/mask.py @@ -24,9 +24,9 @@ class RectangleMaskInvocation(BaseInvocation, WithMetadata): def invoke(self, context: InvocationContext) -> MaskOutput: mask = torch.zeros((1, self.height, self.width), dtype=torch.bool) - mask[:, self.y_top : self.y_top + self.rectangle_height, self.x_left : self.x_left + self.rectangle_width] = ( - True - ) + mask[ + :, self.y_top : self.y_top + self.rectangle_height, self.x_left : self.x_left + self.rectangle_width + ] = True mask_tensor_name = context.tensors.save(mask) return MaskOutput( diff --git a/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py b/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py index 9b2444dae4..47ed5da505 100644 --- a/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py +++ b/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py @@ -17,7 +17,8 @@ class MigrateCallback(Protocol): See :class:`Migration` for an example. """ - def __call__(self, cursor: sqlite3.Cursor) -> None: ... + def __call__(self, cursor: sqlite3.Cursor) -> None: + ... class MigrationError(RuntimeError): diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 5a42d9572e..2a15e4fbe2 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -156,7 +156,6 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) if self._ip_adapter_attention_weights["skip"]: - ip_key = ipa_weights.to_k_ip(ip_hidden_states) ip_value = ipa_weights.to_v_ip(ip_hidden_states) diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index 65992a62b5..05011e3d9a 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -33,10 +33,8 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: - ip_adapter_attention_weights: IPAdapterAttentionWeights = {"ip_adapter_weights": [], "skip": False} for ip_adapter in self._ip_adapters: - ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = False for block in ip_adapter["target_blocks"]: From 7a67fd6a06e502a93ee1b38dea9328c75b836cd4 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:10:20 +0530 Subject: [PATCH 04/21] Revert "chore: ruff fixes" This reverts commit af36fe8c1eae9d6e66c1bda2c7d2e880ac017a52. --- invokeai/app/invocations/latent.py | 58 +++++++------------ .../diffusion/custom_atttention.py | 1 + .../diffusion/unet_attention_patcher.py | 2 + 3 files changed, 25 insertions(+), 36 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 85f1e2bf24..ede26a823f 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -15,12 +15,10 @@ from diffusers import AutoencoderKL, AutoencoderTiny from diffusers.configuration_utils import ConfigMixin from diffusers.image_processor import VaeImageProcessor from diffusers.models.adapter import T2IAdapter -from diffusers.models.attention_processor import ( - AttnProcessor2_0, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, - XFormersAttnProcessor, -) +from diffusers.models.attention_processor import (AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor) from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from diffusers.schedulers import DPMSolverSDEScheduler from diffusers.schedulers import SchedulerMixin as Scheduler @@ -29,22 +27,17 @@ from pydantic import field_validator from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPVisionModelWithProjection -from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES -from invokeai.app.invocations.fields import ( - ConditioningField, - DenoiseMaskField, - FieldDescriptions, - ImageField, - Input, - InputField, - LatentsField, - OutputField, - UIType, - WithBoard, - WithMetadata, -) +from invokeai.app.invocations.constants import (LATENT_SCALE_FACTOR, + SCHEDULER_NAME_VALUES) +from invokeai.app.invocations.fields import (ConditioningField, + DenoiseMaskField, + FieldDescriptions, ImageField, + Input, InputField, LatentsField, + OutputField, UIType, WithBoard, + WithMetadata) from invokeai.app.invocations.ip_adapter import IPAdapterField -from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput +from invokeai.app.invocations.primitives import (DenoiseMaskOutput, + ImageOutput, LatentsOutput) from invokeai.app.invocations.t2i_adapter import T2IAdapterField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.util.controlnet_utils import prepare_control_image @@ -52,28 +45,21 @@ from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus from invokeai.backend.lora import LoRAModelRaw from invokeai.backend.model_manager import BaseModelType, LoadedModel from invokeai.backend.model_patcher import ModelPatcher -from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless +from invokeai.backend.stable_diffusion import (PipelineIntermediateState, + set_seamless) from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( - BasicConditioningInfo, - IPAdapterConditioningInfo, - IPAdapterData, - Range, - SDXLConditioningInfo, - TextConditioningData, - TextConditioningRegions, -) + BasicConditioningInfo, IPAdapterConditioningInfo, IPAdapterData, Range, + SDXLConditioningInfo, TextConditioningData, TextConditioningRegions) from invokeai.backend.util.mask import to_standard_float_mask from invokeai.backend.util.silence_warnings import SilenceWarnings from ...backend.stable_diffusion.diffusers_pipeline import ( - ControlNetData, - StableDiffusionGeneratorPipeline, - T2IAdapterData, - image_resized_to_grid_as_tensor, -) + ControlNetData, StableDiffusionGeneratorPipeline, T2IAdapterData, + image_resized_to_grid_as_tensor) from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP from ...backend.util.devices import choose_precision, choose_torch_device -from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output +from .baseinvocation import (BaseInvocation, BaseInvocationOutput, invocation, + invocation_output) from .controlnet_image_processors import ControlField from .model import ModelIdentifierField, UNetField, VAEField diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 2a15e4fbe2..5a42d9572e 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -156,6 +156,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) if self._ip_adapter_attention_weights["skip"]: + ip_key = ipa_weights.to_k_ip(ip_hidden_states) ip_value = ipa_weights.to_v_ip(ip_hidden_states) diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index 05011e3d9a..65992a62b5 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -33,8 +33,10 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: + ip_adapter_attention_weights: IPAdapterAttentionWeights = {"ip_adapter_weights": [], "skip": False} for ip_adapter in self._ip_adapters: + ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = False for block in ip_adapter["target_blocks"]: From d4393e417026ed4f4e67ec4052a6634820d1f966 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:13:50 +0530 Subject: [PATCH 06/21] chore: linter fixes --- invokeai/app/invocations/latent.py | 58 ++++++++++++------- invokeai/app/invocations/mask.py | 6 +- .../sqlite_migrator/sqlite_migrator_common.py | 3 +- .../diffusion/custom_atttention.py | 1 - .../diffusion/unet_attention_patcher.py | 2 - 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ede26a823f..85f1e2bf24 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -15,10 +15,12 @@ from diffusers import AutoencoderKL, AutoencoderTiny from diffusers.configuration_utils import ConfigMixin from diffusers.image_processor import VaeImageProcessor from diffusers.models.adapter import T2IAdapter -from diffusers.models.attention_processor import (AttnProcessor2_0, - LoRAAttnProcessor2_0, - LoRAXFormersAttnProcessor, - XFormersAttnProcessor) +from diffusers.models.attention_processor import ( + AttnProcessor2_0, + LoRAAttnProcessor2_0, + LoRAXFormersAttnProcessor, + XFormersAttnProcessor, +) from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel from diffusers.schedulers import DPMSolverSDEScheduler from diffusers.schedulers import SchedulerMixin as Scheduler @@ -27,17 +29,22 @@ from pydantic import field_validator from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPVisionModelWithProjection -from invokeai.app.invocations.constants import (LATENT_SCALE_FACTOR, - SCHEDULER_NAME_VALUES) -from invokeai.app.invocations.fields import (ConditioningField, - DenoiseMaskField, - FieldDescriptions, ImageField, - Input, InputField, LatentsField, - OutputField, UIType, WithBoard, - WithMetadata) +from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES +from invokeai.app.invocations.fields import ( + ConditioningField, + DenoiseMaskField, + FieldDescriptions, + ImageField, + Input, + InputField, + LatentsField, + OutputField, + UIType, + WithBoard, + WithMetadata, +) from invokeai.app.invocations.ip_adapter import IPAdapterField -from invokeai.app.invocations.primitives import (DenoiseMaskOutput, - ImageOutput, LatentsOutput) +from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput from invokeai.app.invocations.t2i_adapter import T2IAdapterField from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.app.util.controlnet_utils import prepare_control_image @@ -45,21 +52,28 @@ from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus from invokeai.backend.lora import LoRAModelRaw from invokeai.backend.model_manager import BaseModelType, LoadedModel from invokeai.backend.model_patcher import ModelPatcher -from invokeai.backend.stable_diffusion import (PipelineIntermediateState, - set_seamless) +from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( - BasicConditioningInfo, IPAdapterConditioningInfo, IPAdapterData, Range, - SDXLConditioningInfo, TextConditioningData, TextConditioningRegions) + BasicConditioningInfo, + IPAdapterConditioningInfo, + IPAdapterData, + Range, + SDXLConditioningInfo, + TextConditioningData, + TextConditioningRegions, +) from invokeai.backend.util.mask import to_standard_float_mask from invokeai.backend.util.silence_warnings import SilenceWarnings from ...backend.stable_diffusion.diffusers_pipeline import ( - ControlNetData, StableDiffusionGeneratorPipeline, T2IAdapterData, - image_resized_to_grid_as_tensor) + ControlNetData, + StableDiffusionGeneratorPipeline, + T2IAdapterData, + image_resized_to_grid_as_tensor, +) from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP from ...backend.util.devices import choose_precision, choose_torch_device -from .baseinvocation import (BaseInvocation, BaseInvocationOutput, invocation, - invocation_output) +from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output from .controlnet_image_processors import ControlField from .model import ModelIdentifierField, UNetField, VAEField diff --git a/invokeai/app/invocations/mask.py b/invokeai/app/invocations/mask.py index acacaedaed..a7f3207764 100644 --- a/invokeai/app/invocations/mask.py +++ b/invokeai/app/invocations/mask.py @@ -24,9 +24,9 @@ class RectangleMaskInvocation(BaseInvocation, WithMetadata): def invoke(self, context: InvocationContext) -> MaskOutput: mask = torch.zeros((1, self.height, self.width), dtype=torch.bool) - mask[ - :, self.y_top : self.y_top + self.rectangle_height, self.x_left : self.x_left + self.rectangle_width - ] = True + mask[:, self.y_top : self.y_top + self.rectangle_height, self.x_left : self.x_left + self.rectangle_width] = ( + True + ) mask_tensor_name = context.tensors.save(mask) return MaskOutput( diff --git a/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py b/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py index 47ed5da505..9b2444dae4 100644 --- a/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py +++ b/invokeai/app/services/shared/sqlite_migrator/sqlite_migrator_common.py @@ -17,8 +17,7 @@ class MigrateCallback(Protocol): See :class:`Migration` for an example. """ - def __call__(self, cursor: sqlite3.Cursor) -> None: - ... + def __call__(self, cursor: sqlite3.Cursor) -> None: ... class MigrationError(RuntimeError): diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 5a42d9572e..2a15e4fbe2 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -156,7 +156,6 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) if self._ip_adapter_attention_weights["skip"]: - ip_key = ipa_weights.to_k_ip(ip_hidden_states) ip_value = ipa_weights.to_v_ip(ip_hidden_states) diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index 65992a62b5..05011e3d9a 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -33,10 +33,8 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: - ip_adapter_attention_weights: IPAdapterAttentionWeights = {"ip_adapter_weights": [], "skip": False} for ip_adapter in self._ip_adapters: - ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = False for block in ip_adapter["target_blocks"]: From 9fc73743b2e52be37be05be13e8d3984297cde83 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:30:39 +0530 Subject: [PATCH 07/21] feat: support SD1.5 --- invokeai/app/invocations/ip_adapter.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py index b239e89bb6..d4868b5082 100644 --- a/invokeai/app/invocations/ip_adapter.py +++ b/invokeai/app/invocations/ip_adapter.py @@ -116,11 +116,17 @@ class IPAdapterInvocation(BaseInvocation): image_encoder_model = self._get_image_encoder(context, image_encoder_model_name) - target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] + target_blocks = ["block"] if self.method == "style": - target_blocks = ["up_blocks.0.attentions.1"] + if ip_adapter_info.base == "sd-1": + target_blocks = ["up_blocks.1"] + if ip_adapter_info.base == "sdxl": + target_blocks = ["up_blocks.0.attentions.1"] elif self.method == "composition": - target_blocks = ["down_blocks.2.attentions.1"] + if ip_adapter_info.base == "sd1": + target_blocks = ["down_blocks.2", "mid_block", "up_blocks.1"] + if ip_adapter_info.base == "sdxl": + target_blocks = ["down_blocks.2.attentions.1"] return IPAdapterOutput( ip_adapter=IPAdapterField( From f50b156511e3713c22dd06af31e587150fa688b9 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 12:43:49 +0530 Subject: [PATCH 08/21] chore: do not include custom nodes in schema --- .../frontend/web/src/services/api/schema.ts | 114 +----------------- 1 file changed, 2 insertions(+), 112 deletions(-) diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 6a8c292374..277880f669 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -1238,39 +1238,6 @@ export type components = { */ type: "boolean_output"; }; - /** - * BRIA AI Background Removal - * @description Uses the new Bria 1.4 model to remove backgrounds from images. - */ - BriaRemoveBackgroundInvocation: { - /** @description Optional metadata to be saved with the image */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** @description The image to crop */ - image?: components["schemas"]["ImageField"]; - /** - * type - * @default bria_bg_remove - * @constant - */ - type: "bria_bg_remove"; - }; /** CLIPField */ CLIPField: { /** @description Info to load tokenizer submodel */ @@ -4158,7 +4125,7 @@ export type components = { * @description The nodes in this graph */ nodes: { - [key: string]: components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["CanvasPasteBackInvocation"]; + [key: string]: components["schemas"]["ImagePasteInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ScaleLatentsInvocation"]; }; /** * Edges @@ -4195,7 +4162,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["LatentsOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["String2Output"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["MaskOutput"]; + [key: string]: components["schemas"]["MetadataItemOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["ColorCollectionOutput"]; }; /** * Errors @@ -4246,83 +4213,6 @@ export type components = { /** Detail */ detail?: components["schemas"]["ValidationError"][]; }; - /** - * Hand Depth w/ MeshGraphormer - * @description Generate hand depth maps to inpaint with using ControlNet - */ - HandDepthMeshGraphormerProcessor: { - /** @description Optional metadata to be saved with the image */ - metadata?: components["schemas"]["MetadataField"] | null; - /** - * Id - * @description The id of this instance of an invocation. Must be unique among all instances of invocations. - */ - id: string; - /** - * Is Intermediate - * @description Whether or not this is an intermediate invocation. - * @default false - */ - is_intermediate?: boolean; - /** - * Use Cache - * @description Whether or not to use the cache - * @default true - */ - use_cache?: boolean; - /** @description The image to process */ - image?: components["schemas"]["ImageField"]; - /** - * Resolution - * @description Pixel resolution for output image - * @default 512 - */ - resolution?: number; - /** - * Mask Padding - * @description Amount to pad the hand mask by - * @default 30 - */ - mask_padding?: number; - /** - * Offload - * @description Offload model after usage - * @default false - */ - offload?: boolean; - /** - * type - * @default hand_depth_mesh_graphormer_image_processor - * @constant - */ - type: "hand_depth_mesh_graphormer_image_processor"; - }; - /** - * HandDepthOutput - * @description Base class for to output Meshgraphormer results - */ - HandDepthOutput: { - /** @description Improved hands depth map */ - image: components["schemas"]["ImageField"]; - /** @description Hands area mask */ - mask: components["schemas"]["ImageField"]; - /** - * Width - * @description The width of the depth map in pixels - */ - width: number; - /** - * Height - * @description The height of the depth map in pixels - */ - height: number; - /** - * type - * @default meshgraphormer_output - * @constant - */ - type: "meshgraphormer_output"; - }; /** * HED (softedge) Processor * @description Applies HED edge detection to image From 27466ffa1a04a9727f48c29e8c5b0dfd6e64fef7 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 13:38:26 +0530 Subject: [PATCH 09/21] chore: update the ip adapter node version --- invokeai/app/invocations/ip_adapter.py | 2 +- invokeai/frontend/web/src/services/api/schema.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py index d4868b5082..6fb80cc861 100644 --- a/invokeai/app/invocations/ip_adapter.py +++ b/invokeai/app/invocations/ip_adapter.py @@ -58,7 +58,7 @@ class IPAdapterOutput(BaseInvocationOutput): CLIP_VISION_MODEL_MAP = {"ViT-H": "ip_adapter_sd_image_encoder", "ViT-G": "ip_adapter_sdxl_image_encoder"} -@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.3.0") +@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.4.0") class IPAdapterInvocation(BaseInvocation): """Collects IP-Adapter info to pass to other nodes.""" diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 277880f669..7157de227b 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -4125,7 +4125,7 @@ export type components = { * @description The nodes in this graph */ nodes: { - [key: string]: components["schemas"]["ImagePasteInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ScaleLatentsInvocation"]; + [key: string]: components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"]; }; /** * Edges @@ -4162,7 +4162,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["MetadataItemOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["ColorCollectionOutput"]; + [key: string]: components["schemas"]["ColorCollectionOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["String2Output"] | components["schemas"]["MaskOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["TileToPropertiesOutput"]; }; /** * Errors From 2d5786d3bb6f1079554f727037a6bc147eeb7f23 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 13:42:10 +0530 Subject: [PATCH 10/21] fix: Incorrect composition blocks for SD1.5 --- invokeai/app/invocations/ip_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py index 6fb80cc861..48610b8c80 100644 --- a/invokeai/app/invocations/ip_adapter.py +++ b/invokeai/app/invocations/ip_adapter.py @@ -123,8 +123,8 @@ class IPAdapterInvocation(BaseInvocation): if ip_adapter_info.base == "sdxl": target_blocks = ["up_blocks.0.attentions.1"] elif self.method == "composition": - if ip_adapter_info.base == "sd1": - target_blocks = ["down_blocks.2", "mid_block", "up_blocks.1"] + if ip_adapter_info.base == "sd-1": + target_blocks = ["down_blocks.2", "mid_block"] if ip_adapter_info.base == "sdxl": target_blocks = ["down_blocks.2.attentions.1"] From 9cb0f63c445c2539bb020b7edc44cb39e85537e4 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sat, 13 Apr 2024 14:17:25 +0530 Subject: [PATCH 11/21] refactor: fix a bunch of type issues in custom_attention --- .../diffusion/custom_atttention.py | 27 +++++++++++-------- .../diffusion/unet_attention_patcher.py | 4 +-- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 2a15e4fbe2..1dc4a43b2f 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -1,4 +1,4 @@ -from typing import List, Optional, TypedDict +from typing import List, Optional, TypedDict, cast import torch import torch.nn.functional as F @@ -40,15 +40,17 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): def __call__( self, attn: Attention, - hidden_states: torch.FloatTensor, - encoder_hidden_states: Optional[torch.FloatTensor] = None, - attention_mask: Optional[torch.FloatTensor] = None, - temb: Optional[torch.FloatTensor] = None, - # For regional prompting: + hidden_states: torch.Tensor, + encoder_hidden_states: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + temb: Optional[torch.Tensor] = None, + # For Regional Prompting: regional_prompt_data: Optional[RegionalPromptData] = None, - percent_through: Optional[torch.FloatTensor] = None, + percent_through: Optional[torch.Tensor] = None, # For IP-Adapter: regional_ip_data: Optional[RegionalIPData] = None, + *args, + **kwargs, ) -> torch.FloatTensor: """Apply attention. Args: @@ -155,16 +157,18 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) - if self._ip_adapter_attention_weights["skip"]: + if not self._ip_adapter_attention_weights["skip"]: ip_key = ipa_weights.to_k_ip(ip_hidden_states) ip_value = ipa_weights.to_v_ip(ip_hidden_states) - # Expected ip_key and ip_value shape: (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) + # Expected ip_key and ip_value shape: + # (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - # Expected ip_key and ip_value shape: (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) + # Expected ip_key and ip_value shape: + # (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) # TODO: add support for attn.scale when we move to Torch 2.1 ip_hidden_states = F.scaled_dot_product_attention( @@ -193,6 +197,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): hidden_states = attn.to_out[1](hidden_states) if input_ndim == 4: + batch_size, channel, height, width = hidden_states.shape hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width) if attn.residual_connection: @@ -200,4 +205,4 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): hidden_states = hidden_states / attn.rescale_output_factor - return hidden_states + return cast(torch.FloatTensor, hidden_states) diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index 05011e3d9a..52cfc2c573 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -36,10 +36,10 @@ class UNetAttentionPatcher: ip_adapter_attention_weights: IPAdapterAttentionWeights = {"ip_adapter_weights": [], "skip": False} for ip_adapter in self._ip_adapters: ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) - skip = False + skip = True for block in ip_adapter["target_blocks"]: if block in name: - skip = True + skip = False break ip_adapter_attention_weights.update({"ip_adapter_weights": [ip_adapter_weight], "skip": skip}) From 8426f1e7b2b61abc425c9440dcc7a9f49686014c Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Sun, 14 Apr 2024 12:19:19 +0530 Subject: [PATCH 12/21] fix(experimental): Possible fix for conflict with regional embed length mismatch Pushing this so people can test it out and see if this needs to be handled in a different way. --- .../stable_diffusion/diffusion/custom_atttention.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 1dc4a43b2f..3386c72556 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -1,3 +1,4 @@ +from itertools import cycle, islice from typing import List, Optional, TypedDict, cast import torch @@ -137,12 +138,22 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): if self._ip_adapter_attention_weights: assert regional_ip_data is not None ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len) + + # Pad weight tensor list to match size of regional embeds + self._ip_adapter_attention_weights["ip_adapter_weights"] = list( + islice( + cycle(self._ip_adapter_attention_weights["ip_adapter_weights"]), + len(regional_ip_data.image_prompt_embeds), + ) + ) + assert ( len(regional_ip_data.image_prompt_embeds) == len(self._ip_adapter_attention_weights["ip_adapter_weights"]) == len(regional_ip_data.scales) == ip_masks.shape[1] ) + for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds): ipa_weights = self._ip_adapter_attention_weights["ip_adapter_weights"][ipa_index] ipa_scale = regional_ip_data.scales[ipa_index] From cd76a31a8f329807ddbd6bab920c63978698ec46 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Mon, 15 Apr 2024 22:29:32 +0530 Subject: [PATCH 13/21] fix: IP Adapter method not being recalled --- invokeai/frontend/web/src/features/metadata/util/parsers.ts | 6 +++++- invokeai/frontend/web/src/features/nodes/types/common.ts | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/invokeai/frontend/web/src/features/metadata/util/parsers.ts b/invokeai/frontend/web/src/features/metadata/util/parsers.ts index 9a07ea1d80..ab88591e8e 100644 --- a/invokeai/frontend/web/src/features/metadata/util/parsers.ts +++ b/invokeai/frontend/web/src/features/metadata/util/parsers.ts @@ -369,6 +369,10 @@ const parseIPAdapter: MetadataParseFunc = async (metada .nullish() .catch(null) .parse(await getProperty(metadataItem, 'weight')); + const method = zIPAdapterField.shape.method + .nullish() + .catch(null) + .parse(await getProperty(metadataItem, 'method')); const begin_step_percent = zIPAdapterField.shape.begin_step_percent .nullish() .catch(null) @@ -386,7 +390,7 @@ const parseIPAdapter: MetadataParseFunc = async (metada clipVisionModel: 'ViT-H', controlImage: image?.image_name ?? null, weight: weight ?? initialIPAdapter.weight, - method: 'full', + method: method ?? initialIPAdapter.method, beginStepPct: begin_step_percent ?? initialIPAdapter.beginStepPct, endStepPct: end_step_percent ?? initialIPAdapter.endStepPct, }; diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index 06d5ecd5c7..e570054258 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -109,6 +109,7 @@ export const zIPAdapterField = z.object({ image: zImageField, ip_adapter_model: zModelIdentifierField, weight: z.number(), + method: z.enum(['full', 'style', 'composition']), begin_step_percent: z.number().optional(), end_step_percent: z.number().optional(), }); From 5f6c6abf9c2e55e88ff5e0bdaab578659a076e9a Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Mon, 15 Apr 2024 23:38:55 +0530 Subject: [PATCH 14/21] chore: change IPAdapterAttentionWeights to a dataclass --- .../diffusion/custom_atttention.py | 20 ++++++++++--------- .../diffusion/unet_attention_patcher.py | 6 +++--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 3386c72556..ac53bf911d 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -1,5 +1,6 @@ +from dataclasses import dataclass from itertools import cycle, islice -from typing import List, Optional, TypedDict, cast +from typing import List, Optional, cast import torch import torch.nn.functional as F @@ -10,7 +11,8 @@ from invokeai.backend.stable_diffusion.diffusion.regional_ip_data import Regiona from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData -class IPAdapterAttentionWeights(TypedDict): +@dataclass +class IPAdapterAttentionWeights: ip_adapter_weights: List[IPAttentionProcessorWeights] skip: bool @@ -63,7 +65,6 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): is_cross_attention = encoder_hidden_states is not None # Start unmodified block from AttnProcessor2_0. - # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv residual = hidden_states if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -77,7 +78,6 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): batch_size, sequence_length, _ = ( hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape ) - # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # End unmodified block from AttnProcessor2_0. _, query_seq_len, _ = hidden_states.shape @@ -140,22 +140,22 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len) # Pad weight tensor list to match size of regional embeds - self._ip_adapter_attention_weights["ip_adapter_weights"] = list( + self._ip_adapter_attention_weights.ip_adapter_weights = list( islice( - cycle(self._ip_adapter_attention_weights["ip_adapter_weights"]), + cycle(self._ip_adapter_attention_weights.ip_adapter_weights), len(regional_ip_data.image_prompt_embeds), ) ) assert ( len(regional_ip_data.image_prompt_embeds) - == len(self._ip_adapter_attention_weights["ip_adapter_weights"]) + == len(self._ip_adapter_attention_weights.ip_adapter_weights) == len(regional_ip_data.scales) == ip_masks.shape[1] ) for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds): - ipa_weights = self._ip_adapter_attention_weights["ip_adapter_weights"][ipa_index] + ipa_weights = self._ip_adapter_attention_weights.ip_adapter_weights[ipa_index] ipa_scale = regional_ip_data.scales[ipa_index] ip_mask = ip_masks[0, ipa_index, ...] @@ -168,7 +168,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) - if not self._ip_adapter_attention_weights["skip"]: + if not self._ip_adapter_attention_weights.skip: ip_key = ipa_weights.to_k_ip(ip_hidden_states) ip_value = ipa_weights.to_v_ip(ip_hidden_states) @@ -215,5 +215,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): hidden_states = hidden_states + residual hidden_states = hidden_states / attn.rescale_output_factor + # End of unmodified block from AttnProcessor2_0 + # casting torch.Tensor to torch.FloatTensor to avoid type issues return cast(torch.FloatTensor, hidden_states) diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index 52cfc2c573..df9b8d6386 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -33,7 +33,7 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: - ip_adapter_attention_weights: IPAdapterAttentionWeights = {"ip_adapter_weights": [], "skip": False} + ip_adapter_attention_weights = IPAdapterAttentionWeights(ip_adapter_weights=[], skip=False) for ip_adapter in self._ip_adapters: ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = True @@ -41,8 +41,8 @@ class UNetAttentionPatcher: if block in name: skip = False break - - ip_adapter_attention_weights.update({"ip_adapter_weights": [ip_adapter_weight], "skip": skip}) + ip_adapter_attention_weights.ip_adapter_weights = [ip_adapter_weight] + ip_adapter_attention_weights.skip = skip # Collect the weights from each IP Adapter for the idx'th attention processor. From f6b7bc5d989d15cc5cc8c9712a6f9ea314962b45 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 16 Apr 2024 01:18:43 +0530 Subject: [PATCH 15/21] fix: Dynamically adapt height of control adapter opts --- .../controlAdapters/components/ControlAdapterConfig.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx index 445e421df4..fcc816d75f 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterConfig.tsx @@ -112,7 +112,7 @@ const ControlAdapterConfig = (props: { id: string; number: number }) => { - + From a148c4322c597e02f23255ee9e9a924838fc02ed Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 16 Apr 2024 04:10:41 +0530 Subject: [PATCH 16/21] fix: IP Adapter weights being incorrectly applied They were being overwritten rather than being appended --- .../diffusion/custom_atttention.py | 47 ++++++++----------- .../diffusion/unet_attention_patcher.py | 11 +++-- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index ac53bf911d..8d7245ae3b 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from itertools import cycle, islice from typing import List, Optional, cast import torch @@ -13,7 +12,7 @@ from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import Reg @dataclass class IPAdapterAttentionWeights: - ip_adapter_weights: List[IPAttentionProcessorWeights] + ip_adapter_weights: Optional[IPAttentionProcessorWeights] skip: bool @@ -28,7 +27,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): def __init__( self, - ip_adapter_attention_weights: Optional[IPAdapterAttentionWeights] = None, + ip_adapter_attention_weights: Optional[List[IPAdapterAttentionWeights]] = None, ): """Initialize a CustomAttnProcessor2_0. Note: Arguments that are the same for all attention layers are passed to __call__(). Arguments that are @@ -139,23 +138,15 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): assert regional_ip_data is not None ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len) - # Pad weight tensor list to match size of regional embeds - self._ip_adapter_attention_weights.ip_adapter_weights = list( - islice( - cycle(self._ip_adapter_attention_weights.ip_adapter_weights), - len(regional_ip_data.image_prompt_embeds), - ) - ) - assert ( len(regional_ip_data.image_prompt_embeds) - == len(self._ip_adapter_attention_weights.ip_adapter_weights) + == len(self._ip_adapter_attention_weights) == len(regional_ip_data.scales) == ip_masks.shape[1] ) for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds): - ipa_weights = self._ip_adapter_attention_weights.ip_adapter_weights[ipa_index] + ipa_weights = self._ip_adapter_attention_weights[ipa_index].ip_adapter_weights ipa_scale = regional_ip_data.scales[ipa_index] ip_mask = ip_masks[0, ipa_index, ...] @@ -168,33 +159,33 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) - if not self._ip_adapter_attention_weights.skip: - ip_key = ipa_weights.to_k_ip(ip_hidden_states) - ip_value = ipa_weights.to_v_ip(ip_hidden_states) + if not self._ip_adapter_attention_weights[ipa_index].skip: + if ipa_weights: + ip_key = ipa_weights.to_k_ip(ip_hidden_states) + ip_value = ipa_weights.to_v_ip(ip_hidden_states) - # Expected ip_key and ip_value shape: - # (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) + # Expected ip_key and ip_value shape: + # (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) - ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - # Expected ip_key and ip_value shape: - # (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) + # Expected ip_key and ip_value shape: + # (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) - # TODO: add support for attn.scale when we move to Torch 2.1 - ip_hidden_states = F.scaled_dot_product_attention( - query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False - ) + # TODO: add support for attn.scale when we move to Torch 2.1 + ip_hidden_states = F.scaled_dot_product_attention( + query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False + ) # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) - ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape( batch_size, -1, attn.heads * head_dim ) + ip_hidden_states = ip_hidden_states.to(query.dtype) # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) - hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask else: # If IP-Adapter is not enabled, then regional_ip_data should not be passed in. diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index df9b8d6386..e94d78decb 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -33,20 +33,25 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: - ip_adapter_attention_weights = IPAdapterAttentionWeights(ip_adapter_weights=[], skip=False) + total_ip_adapter_attention_weights: list[IPAdapterAttentionWeights] = [] + for ip_adapter in self._ip_adapters: + ip_adapter_attention_weights: IPAdapterAttentionWeights = IPAdapterAttentionWeights( + ip_adapter_weights=None, skip=False + ) ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = True for block in ip_adapter["target_blocks"]: if block in name: skip = False break - ip_adapter_attention_weights.ip_adapter_weights = [ip_adapter_weight] + ip_adapter_attention_weights.ip_adapter_weights = ip_adapter_weight ip_adapter_attention_weights.skip = skip + total_ip_adapter_attention_weights.append(ip_adapter_attention_weights) # Collect the weights from each IP Adapter for the idx'th attention processor. - attn_procs[name] = CustomAttnProcessor2_0(ip_adapter_attention_weights) + attn_procs[name] = CustomAttnProcessor2_0(total_ip_adapter_attention_weights) return attn_procs From b39ce642b6193f2813e51fa0e9d464d2847d049f Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 16 Apr 2024 04:12:30 +0530 Subject: [PATCH 17/21] cleanup: raise ValueErrors when target_blocks dont match base model --- invokeai/app/invocations/ip_adapter.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py index 48610b8c80..34a30628da 100644 --- a/invokeai/app/invocations/ip_adapter.py +++ b/invokeai/app/invocations/ip_adapter.py @@ -116,17 +116,24 @@ class IPAdapterInvocation(BaseInvocation): image_encoder_model = self._get_image_encoder(context, image_encoder_model_name) - target_blocks = ["block"] if self.method == "style": if ip_adapter_info.base == "sd-1": target_blocks = ["up_blocks.1"] - if ip_adapter_info.base == "sdxl": + elif ip_adapter_info.base == "sdxl": target_blocks = ["up_blocks.0.attentions.1"] + else: + raise ValueError(f"Unsupported IP-Adapter base type: '{ip_adapter_info.base}'.") elif self.method == "composition": if ip_adapter_info.base == "sd-1": target_blocks = ["down_blocks.2", "mid_block"] - if ip_adapter_info.base == "sdxl": + elif ip_adapter_info.base == "sdxl": target_blocks = ["down_blocks.2.attentions.1"] + else: + raise ValueError(f"Unsupported IP-Adapter base type: '{ip_adapter_info.base}'.") + elif self.method == "full": + target_blocks = ["block"] + else: + raise ValueError(f"Unexpected IP-Adapter method: '{self.method}'.") return IPAdapterOutput( ip_adapter=IPAdapterField( From 7ee3fef2dbb6cc1d7eb9bfdc0eb7c05d8a9149fe Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 16 Apr 2024 04:23:50 +0530 Subject: [PATCH 18/21] cleanup: better var names for the ip adapter weight collection block --- .../diffusion/unet_attention_patcher.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index e94d78decb..f74359c614 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -33,25 +33,24 @@ class UNetAttentionPatcher: # "attn1" processors do not use IP-Adapters. attn_procs[name] = CustomAttnProcessor2_0() else: - total_ip_adapter_attention_weights: list[IPAdapterAttentionWeights] = [] + # Collect the weights from each IP Adapter for the idx'th attention processor. + ip_adapter_attention_weights_collection: list[IPAdapterAttentionWeights] = [] for ip_adapter in self._ip_adapters: ip_adapter_attention_weights: IPAdapterAttentionWeights = IPAdapterAttentionWeights( ip_adapter_weights=None, skip=False ) - ip_adapter_weight = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) + ip_adapter_weights = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = True for block in ip_adapter["target_blocks"]: if block in name: skip = False break - ip_adapter_attention_weights.ip_adapter_weights = ip_adapter_weight + ip_adapter_attention_weights.ip_adapter_weights = ip_adapter_weights ip_adapter_attention_weights.skip = skip - total_ip_adapter_attention_weights.append(ip_adapter_attention_weights) + ip_adapter_attention_weights_collection.append(ip_adapter_attention_weights) - # Collect the weights from each IP Adapter for the idx'th attention processor. - - attn_procs[name] = CustomAttnProcessor2_0(total_ip_adapter_attention_weights) + attn_procs[name] = CustomAttnProcessor2_0(ip_adapter_attention_weights_collection) return attn_procs From d27907cc6d4f7939ac796dd1c2fdffb1652dc5bf Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 16 Apr 2024 04:29:53 +0530 Subject: [PATCH 19/21] fix: entire reshaping block needs to be skipped --- .../diffusion/custom_atttention.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 8d7245ae3b..16617d049a 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -178,15 +178,15 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False ) - # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) - ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape( - batch_size, -1, attn.heads * head_dim - ) + # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) + ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape( + batch_size, -1, attn.heads * head_dim + ) - ip_hidden_states = ip_hidden_states.to(query.dtype) + ip_hidden_states = ip_hidden_states.to(query.dtype) - # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) - hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask + # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) + hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask else: # If IP-Adapter is not enabled, then regional_ip_data should not be passed in. assert regional_ip_data is None From fce6b3e44c8b42bd78d8fa3f3ce5747eb285345f Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 14 Apr 2024 23:18:41 -0400 Subject: [PATCH 20/21] maybe solve race issue --- invokeai/app/services/model_install/model_install_default.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/invokeai/app/services/model_install/model_install_default.py b/invokeai/app/services/model_install/model_install_default.py index 5aa0f199fc..6a3117bcb8 100644 --- a/invokeai/app/services/model_install/model_install_default.py +++ b/invokeai/app/services/model_install/model_install_default.py @@ -754,6 +754,8 @@ class ModelInstallService(ModelInstallServiceBase): self._download_cache[download_job.source] = install_job # matches a download job to an install job install_job.download_parts.add(download_job) + # only start the jobs once install_job.download_parts is fully populated + for download_job in install_job.download_parts: self._download_queue.submit_download_job( download_job, on_start=self._download_started_callback, @@ -762,6 +764,7 @@ class ModelInstallService(ModelInstallServiceBase): on_error=self._download_error_callback, on_cancelled=self._download_cancelled_callback, ) + return install_job def _stat_size(self, path: Path) -> int: From f46bbaf8c43ccd4e49e85b74ed848737d662012a Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 16 Apr 2024 21:12:45 +0530 Subject: [PATCH 21/21] fix: make ip-adapter weights not be optional --- .../diffusion/custom_atttention.py | 44 ++++++++++--------- .../diffusion/unet_attention_patcher.py | 8 ++-- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py index 16617d049a..1334313fe6 100644 --- a/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py +++ b/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py @@ -12,7 +12,7 @@ from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import Reg @dataclass class IPAdapterAttentionWeights: - ip_adapter_weights: Optional[IPAttentionProcessorWeights] + ip_adapter_weights: IPAttentionProcessorWeights skip: bool @@ -64,6 +64,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): is_cross_attention = encoder_hidden_states is not None # Start unmodified block from AttnProcessor2_0. + # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv residual = hidden_states if attn.spatial_norm is not None: hidden_states = attn.spatial_norm(hidden_states, temb) @@ -77,6 +78,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): batch_size, sequence_length, _ = ( hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape ) + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # End unmodified block from AttnProcessor2_0. _, query_seq_len, _ = hidden_states.shape @@ -160,33 +162,32 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding) if not self._ip_adapter_attention_weights[ipa_index].skip: - if ipa_weights: - ip_key = ipa_weights.to_k_ip(ip_hidden_states) - ip_value = ipa_weights.to_v_ip(ip_hidden_states) + ip_key = ipa_weights.to_k_ip(ip_hidden_states) + ip_value = ipa_weights.to_v_ip(ip_hidden_states) - # Expected ip_key and ip_value shape: - # (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) + # Expected ip_key and ip_value shape: + # (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads) - ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) + ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2) - # Expected ip_key and ip_value shape: - # (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) + # Expected ip_key and ip_value shape: + # (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim) - # TODO: add support for attn.scale when we move to Torch 2.1 - ip_hidden_states = F.scaled_dot_product_attention( - query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False - ) + # TODO: add support for attn.scale when we move to Torch 2.1 + ip_hidden_states = F.scaled_dot_product_attention( + query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False + ) - # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) - ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape( - batch_size, -1, attn.heads * head_dim - ) + # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim) + ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape( + batch_size, -1, attn.heads * head_dim + ) - ip_hidden_states = ip_hidden_states.to(query.dtype) + ip_hidden_states = ip_hidden_states.to(query.dtype) - # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) - hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask + # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim) + hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask else: # If IP-Adapter is not enabled, then regional_ip_data should not be passed in. assert regional_ip_data is None @@ -206,6 +207,7 @@ class CustomAttnProcessor2_0(AttnProcessor2_0): hidden_states = hidden_states + residual hidden_states = hidden_states / attn.rescale_output_factor + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # End of unmodified block from AttnProcessor2_0 # casting torch.Tensor to torch.FloatTensor to avoid type issues diff --git a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py index f74359c614..ac00a8e06e 100644 --- a/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py +++ b/invokeai/backend/stable_diffusion/diffusion/unet_attention_patcher.py @@ -37,17 +37,15 @@ class UNetAttentionPatcher: ip_adapter_attention_weights_collection: list[IPAdapterAttentionWeights] = [] for ip_adapter in self._ip_adapters: - ip_adapter_attention_weights: IPAdapterAttentionWeights = IPAdapterAttentionWeights( - ip_adapter_weights=None, skip=False - ) ip_adapter_weights = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx) skip = True for block in ip_adapter["target_blocks"]: if block in name: skip = False break - ip_adapter_attention_weights.ip_adapter_weights = ip_adapter_weights - ip_adapter_attention_weights.skip = skip + ip_adapter_attention_weights: IPAdapterAttentionWeights = IPAdapterAttentionWeights( + ip_adapter_weights=ip_adapter_weights, skip=skip + ) ip_adapter_attention_weights_collection.append(ip_adapter_attention_weights) attn_procs[name] = CustomAttnProcessor2_0(ip_adapter_attention_weights_collection)