From b013d0e06454ee4248c7e752d57c9fb7f402d07c Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Sun, 24 Mar 2024 01:40:28 +0530
Subject: [PATCH 01/14] wip: Initial implementation of safetensor support for
 IP Adapter

---
 invokeai/app/invocations/ip_adapter.py        | 23 +++++----
 invokeai/backend/ip_adapter/ip_adapter.py     | 50 ++++++++++++-------
 invokeai/backend/ip_adapter/resampler.py      | 40 +++++++++------
 invokeai/backend/model_manager/config.py      | 22 ++++++--
 .../load/model_loaders/ip_adapter.py          |  3 +-
 invokeai/backend/model_manager/probe.py       | 18 ++++---
 6 files changed, 103 insertions(+), 53 deletions(-)

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index e302c2b97a..165a6bee24 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -4,18 +4,19 @@ from typing import List, Union
 from pydantic import BaseModel, Field, field_validator, model_validator
 from typing_extensions import Self
 
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    invocation,
-    invocation_output,
-)
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
 from invokeai.app.invocations.model import ModelIdentifierField
 from invokeai.app.invocations.primitives import ImageField
 from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, IPAdapterConfig, ModelType
+from invokeai.backend.model_manager.config import (
+    AnyModelConfig,
+    BaseModelType,
+    IPAdapterCheckpointConfig,
+    IPAdapterDiffusersConfig,
+    ModelType,
+)
 
 
 class IPAdapterField(BaseModel):
@@ -86,8 +87,12 @@ class IPAdapterInvocation(BaseInvocation):
     def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
-        assert isinstance(ip_adapter_info, IPAdapterConfig)
-        image_encoder_model_id = ip_adapter_info.image_encoder_model_id
+        assert isinstance(ip_adapter_info, (IPAdapterDiffusersConfig, IPAdapterCheckpointConfig))
+        image_encoder_model_id = (
+            ip_adapter_info.image_encoder_model_id
+            if isinstance(ip_adapter_info, IPAdapterDiffusersConfig)
+            else "InvokeAI/ip_adapter_sd_image_encoder"
+        )
         image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
         image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
         return IPAdapterOutput(
diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index e51966c779..81514a9f8b 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -1,10 +1,11 @@
 # copied from https://github.com/tencent-ailab/IP-Adapter (Apache License 2.0)
 #   and modified as needed
 
-from typing import Optional, Union
+from typing import List, Optional, TypedDict, Union
 
 import torch
 from PIL import Image
+from safetensors import safe_open
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 
 from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionWeights
@@ -13,10 +14,17 @@ from ..raw_model import RawModel
 from .resampler import Resampler
 
 
+class IPAdapterStateDict(TypedDict):
+    ip_adapter: dict[str, torch.Tensor]
+    image_proj: dict[str, torch.Tensor]
+
+
 class ImageProjModel(torch.nn.Module):
     """Image Projection Model"""
 
-    def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024, clip_extra_context_tokens=4):
+    def __init__(
+        self, cross_attention_dim: int = 1024, clip_embeddings_dim: int = 1024, clip_extra_context_tokens: int = 4
+    ):
         super().__init__()
 
         self.cross_attention_dim = cross_attention_dim
@@ -25,7 +33,7 @@ class ImageProjModel(torch.nn.Module):
         self.norm = torch.nn.LayerNorm(cross_attention_dim)
 
     @classmethod
-    def from_state_dict(cls, state_dict: dict[torch.Tensor], clip_extra_context_tokens=4):
+    def from_state_dict(cls, state_dict: dict[str, torch.Tensor], clip_extra_context_tokens: int = 4):
         """Initialize an ImageProjModel from a state_dict.
 
         The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict.
@@ -57,7 +65,7 @@ class ImageProjModel(torch.nn.Module):
 class MLPProjModel(torch.nn.Module):
     """SD model with image prompt"""
 
-    def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024):
+    def __init__(self, cross_attention_dim: int = 1024, clip_embeddings_dim: int = 1024):
         super().__init__()
 
         self.proj = torch.nn.Sequential(
@@ -68,7 +76,7 @@ class MLPProjModel(torch.nn.Module):
         )
 
     @classmethod
-    def from_state_dict(cls, state_dict: dict[torch.Tensor]):
+    def from_state_dict(cls, state_dict: dict[str, torch.Tensor]):
         """Initialize an MLPProjModel from a state_dict.
 
         The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict.
@@ -97,7 +105,7 @@ class IPAdapter(RawModel):
 
     def __init__(
         self,
-        state_dict: dict[str, torch.Tensor],
+        state_dict: IPAdapterStateDict,
         device: torch.device,
         dtype: torch.dtype = torch.float16,
         num_tokens: int = 4,
@@ -129,13 +137,11 @@ class IPAdapter(RawModel):
 
         return calc_model_size_by_data(self._image_proj_model) + calc_model_size_by_data(self.attn_weights)
 
-    def _init_image_proj_model(self, state_dict):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
         return ImageProjModel.from_state_dict(state_dict, self._num_tokens).to(self.device, dtype=self.dtype)
 
     @torch.inference_mode()
-    def get_image_embeds(self, pil_image, image_encoder: CLIPVisionModelWithProjection):
-        if isinstance(pil_image, Image.Image):
-            pil_image = [pil_image]
+    def get_image_embeds(self, pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image_embeds = image_encoder(clip_image.to(self.device, dtype=self.dtype)).image_embeds
         image_prompt_embeds = self._image_proj_model(clip_image_embeds)
@@ -146,7 +152,7 @@ class IPAdapter(RawModel):
 class IPAdapterPlus(IPAdapter):
     """IP-Adapter with fine-grained features"""
 
-    def _init_image_proj_model(self, state_dict):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
         return Resampler.from_state_dict(
             state_dict=state_dict,
             depth=4,
@@ -157,9 +163,7 @@ class IPAdapterPlus(IPAdapter):
         ).to(self.device, dtype=self.dtype)
 
     @torch.inference_mode()
-    def get_image_embeds(self, pil_image, image_encoder: CLIPVisionModelWithProjection):
-        if isinstance(pil_image, Image.Image):
-            pil_image = [pil_image]
+    def get_image_embeds(self, pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image = clip_image.to(self.device, dtype=self.dtype)
         clip_image_embeds = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
@@ -174,14 +178,14 @@ class IPAdapterPlus(IPAdapter):
 class IPAdapterFull(IPAdapterPlus):
     """IP-Adapter Plus with full features."""
 
-    def _init_image_proj_model(self, state_dict: dict[torch.Tensor]):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
         return MLPProjModel.from_state_dict(state_dict).to(self.device, dtype=self.dtype)
 
 
 class IPAdapterPlusXL(IPAdapterPlus):
     """IP-Adapter Plus for SDXL."""
 
-    def _init_image_proj_model(self, state_dict):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
         return Resampler.from_state_dict(
             state_dict=state_dict,
             depth=4,
@@ -195,7 +199,19 @@ class IPAdapterPlusXL(IPAdapterPlus):
 def build_ip_adapter(
     ip_adapter_ckpt_path: str, device: torch.device, dtype: torch.dtype = torch.float16
 ) -> Union[IPAdapter, IPAdapterPlus]:
-    state_dict = torch.load(ip_adapter_ckpt_path, map_location="cpu")
+    state_dict: IPAdapterStateDict = {"ip_adapter": {}, "image_proj": {}}
+
+    if ip_adapter_ckpt_path.endswith("safetensors"):
+        state_dict = {"ip_adapter": {}, "image_proj": {}}
+        model = safe_open(ip_adapter_ckpt_path, device=device.type, framework="pt")
+        for key in model.keys():
+            if key.startswith("image_proj."):
+                state_dict["image_proj"][key.replace("image_proj.", "")] = model.get_tensor(key)
+            if key.startswith("ip_adapter."):
+                state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = model.get_tensor(key)
+    else:
+        ip_adapter_diffusers_checkpoint_path = ip_adapter_ckpt_path + "/ip_adapter.bin"
+        state_dict = torch.load(ip_adapter_diffusers_checkpoint_path, map_location="cpu")
 
     if "proj.weight" in state_dict["image_proj"]:  # IPAdapter (with ImageProjModel).
         return IPAdapter(state_dict, device=device, dtype=dtype)
diff --git a/invokeai/backend/ip_adapter/resampler.py b/invokeai/backend/ip_adapter/resampler.py
index a8db22c0fd..a32eeacfdc 100644
--- a/invokeai/backend/ip_adapter/resampler.py
+++ b/invokeai/backend/ip_adapter/resampler.py
@@ -9,8 +9,8 @@ import torch.nn as nn
 
 
 # FFN
-def FeedForward(dim, mult=4):
-    inner_dim = int(dim * mult)
+def FeedForward(dim: int, mult: int = 4):
+    inner_dim = dim * mult
     return nn.Sequential(
         nn.LayerNorm(dim),
         nn.Linear(dim, inner_dim, bias=False),
@@ -19,8 +19,8 @@ def FeedForward(dim, mult=4):
     )
 
 
-def reshape_tensor(x, heads):
-    bs, length, width = x.shape
+def reshape_tensor(x: torch.Tensor, heads: int):
+    bs, length, _ = x.shape
     # (bs, length, width) --> (bs, length, n_heads, dim_per_head)
     x = x.view(bs, length, heads, -1)
     # (bs, length, n_heads, dim_per_head) --> (bs, n_heads, length, dim_per_head)
@@ -31,7 +31,7 @@ def reshape_tensor(x, heads):
 
 
 class PerceiverAttention(nn.Module):
-    def __init__(self, *, dim, dim_head=64, heads=8):
+    def __init__(self, *, dim: int, dim_head: int = 64, heads: int = 8):
         super().__init__()
         self.scale = dim_head**-0.5
         self.dim_head = dim_head
@@ -45,7 +45,7 @@ class PerceiverAttention(nn.Module):
         self.to_kv = nn.Linear(dim, inner_dim * 2, bias=False)
         self.to_out = nn.Linear(inner_dim, dim, bias=False)
 
-    def forward(self, x, latents):
+    def forward(self, x: torch.Tensor, latents: torch.Tensor):
         """
         Args:
             x (torch.Tensor): image features
@@ -80,14 +80,14 @@ class PerceiverAttention(nn.Module):
 class Resampler(nn.Module):
     def __init__(
         self,
-        dim=1024,
-        depth=8,
-        dim_head=64,
-        heads=16,
-        num_queries=8,
-        embedding_dim=768,
-        output_dim=1024,
-        ff_mult=4,
+        dim: int = 1024,
+        depth: int = 8,
+        dim_head: int = 64,
+        heads: int = 16,
+        num_queries: int = 8,
+        embedding_dim: int = 768,
+        output_dim: int = 1024,
+        ff_mult: int = 4,
     ):
         super().__init__()
 
@@ -110,7 +110,15 @@ class Resampler(nn.Module):
             )
 
     @classmethod
-    def from_state_dict(cls, state_dict: dict[torch.Tensor], depth=8, dim_head=64, heads=16, num_queries=8, ff_mult=4):
+    def from_state_dict(
+        cls,
+        state_dict: dict[str, torch.Tensor],
+        depth: int = 8,
+        dim_head: int = 64,
+        heads: int = 16,
+        num_queries: int = 8,
+        ff_mult: int = 4,
+    ):
         """A convenience function that initializes a Resampler from a state_dict.
 
         Some of the shape parameters are inferred from the state_dict (e.g. dim, embedding_dim, etc.). At the time of
@@ -145,7 +153,7 @@ class Resampler(nn.Module):
         model.load_state_dict(state_dict)
         return model
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor):
         latents = self.latents.repeat(x.size(0), 1, 1)
 
         x = self.proj_in(x)
diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py
index 524e39b2a1..172045d3fc 100644
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -323,10 +323,13 @@ class MainDiffusersConfig(DiffusersConfigBase, MainConfigBase):
         return Tag(f"{ModelType.Main.value}.{ModelFormat.Diffusers.value}")
 
 
-class IPAdapterConfig(ModelConfigBase):
-    """Model config for IP Adaptor format models."""
-
+class IPAdapterBaseConfig(ModelConfigBase):
     type: Literal[ModelType.IPAdapter] = ModelType.IPAdapter
+
+
+class IPAdapterDiffusersConfig(IPAdapterBaseConfig):
+    """Model config for IP Adapter diffusers format models."""
+
     image_encoder_model_id: str
     format: Literal[ModelFormat.InvokeAI]
 
@@ -335,6 +338,16 @@ class IPAdapterConfig(ModelConfigBase):
         return Tag(f"{ModelType.IPAdapter.value}.{ModelFormat.InvokeAI.value}")
 
 
+class IPAdapterCheckpointConfig(IPAdapterBaseConfig):
+    """Model config for IP Adapter checkpoint format models."""
+
+    format: Literal[ModelFormat.Checkpoint]
+
+    @staticmethod
+    def get_tag() -> Tag:
+        return Tag(f"{ModelType.IPAdapter.value}.{ModelFormat.Checkpoint.value}")
+
+
 class CLIPVisionDiffusersConfig(DiffusersConfigBase):
     """Model config for CLIPVision."""
 
@@ -390,7 +403,8 @@ AnyModelConfig = Annotated[
         Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()],
         Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()],
         Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()],
-        Annotated[IPAdapterConfig, IPAdapterConfig.get_tag()],
+        Annotated[IPAdapterDiffusersConfig, IPAdapterDiffusersConfig.get_tag()],
+        Annotated[IPAdapterCheckpointConfig, IPAdapterCheckpointConfig.get_tag()],
         Annotated[T2IAdapterConfig, T2IAdapterConfig.get_tag()],
         Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()],
     ],
diff --git a/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py b/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
index 89c54948ff..a149cedde2 100644
--- a/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
+++ b/invokeai/backend/model_manager/load/model_loaders/ip_adapter.py
@@ -19,6 +19,7 @@ from invokeai.backend.model_manager.load import ModelLoader, ModelLoaderRegistry
 
 
 @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.IPAdapter, format=ModelFormat.InvokeAI)
+@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.IPAdapter, format=ModelFormat.Checkpoint)
 class IPAdapterInvokeAILoader(ModelLoader):
     """Class to load IP Adapter diffusers models."""
 
@@ -31,7 +32,7 @@ class IPAdapterInvokeAILoader(ModelLoader):
         if submodel_type is not None:
             raise ValueError("There are no submodels in an IP-Adapter model.")
         model = build_ip_adapter(
-            ip_adapter_ckpt_path=str(model_path / "ip_adapter.bin"),
+            ip_adapter_ckpt_path=str(model_path),
             device=torch.device("cpu"),
             dtype=self._torch_dtype,
         )
diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
index ddd9e99eda..ed73fc56c6 100644
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -230,9 +230,10 @@ class ModelProbe(object):
                 return ModelType.LoRA
             elif any(key.startswith(v) for v in {"controlnet", "control_model", "input_blocks"}):
                 return ModelType.ControlNet
+            elif any(key.startswith(v) for v in {"image_proj.", "ip_adapter."}):
+                return ModelType.IPAdapter
             elif key in {"emb_params", "string_to_param"}:
                 return ModelType.TextualInversion
-
         else:
             # diffusers-ti
             if len(ckpt) < 10 and all(isinstance(v, torch.Tensor) for v in ckpt.values()):
@@ -527,8 +528,15 @@ class ControlNetCheckpointProbe(CheckpointProbeBase):
 
 
 class IPAdapterCheckpointProbe(CheckpointProbeBase):
+    """Class for probing IP Adapters"""
+
     def get_base_type(self) -> BaseModelType:
-        raise NotImplementedError()
+        checkpoint = self.checkpoint
+        for key in checkpoint.keys():
+            if not key.startswith(("image_proj.", "ip_adapter.")):
+                continue
+            return BaseModelType.StableDiffusionXL
+        raise InvalidModelConfigException(f"{self.model_path}: Unable to determine base type")
 
 
 class CLIPVisionCheckpointProbe(CheckpointProbeBase):
@@ -689,9 +697,7 @@ class ControlNetFolderProbe(FolderProbeBase):
             else (
                 BaseModelType.StableDiffusion2
                 if dimension == 1024
-                else BaseModelType.StableDiffusionXL
-                if dimension == 2048
-                else None
+                else BaseModelType.StableDiffusionXL if dimension == 2048 else None
             )
         )
         if not base_model:
@@ -768,7 +774,7 @@ class T2IAdapterFolderProbe(FolderProbeBase):
             )
 
 
-############## register probe classes ######
+# Register probe classes
 ModelProbe.register_probe("diffusers", ModelType.Main, PipelineFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.VAE, VaeFolderProbe)
 ModelProbe.register_probe("diffusers", ModelType.LoRA, LoRAFolderProbe)

From 60bf0caca331460e683ec0387f23cc54f80c9825 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Sun, 24 Mar 2024 01:58:46 +0530
Subject: [PATCH 02/14] feat: add base model recognition for ip adapter
 safetensor files

---
 invokeai/app/invocations/latent.py      | 61 +++++++++----------------
 invokeai/backend/model_manager/probe.py | 13 +++++-
 2 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py
index bc79efdeba..8ad1684bcb 100644
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@@ -14,12 +14,10 @@ from diffusers import AutoencoderKL, AutoencoderTiny
 from diffusers.configuration_utils import ConfigMixin
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.models.adapter import T2IAdapter
-from diffusers.models.attention_processor import (
-    AttnProcessor2_0,
-    LoRAAttnProcessor2_0,
-    LoRAXFormersAttnProcessor,
-    XFormersAttnProcessor,
-)
+from diffusers.models.attention_processor import (AttnProcessor2_0,
+                                                  LoRAAttnProcessor2_0,
+                                                  LoRAXFormersAttnProcessor,
+                                                  XFormersAttnProcessor)
 from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
 from diffusers.schedulers import DPMSolverSDEScheduler
 from diffusers.schedulers import SchedulerMixin as Scheduler
@@ -28,26 +26,17 @@ from pydantic import field_validator
 from torchvision.transforms.functional import resize as tv_resize
 from transformers import CLIPVisionModelWithProjection
 
-from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
-from invokeai.app.invocations.fields import (
-    ConditioningField,
-    DenoiseMaskField,
-    FieldDescriptions,
-    ImageField,
-    Input,
-    InputField,
-    LatentsField,
-    OutputField,
-    UIType,
-    WithBoard,
-    WithMetadata,
-)
+from invokeai.app.invocations.constants import (LATENT_SCALE_FACTOR,
+                                                SCHEDULER_NAME_VALUES)
+from invokeai.app.invocations.fields import (ConditioningField,
+                                             DenoiseMaskField,
+                                             FieldDescriptions, ImageField,
+                                             Input, InputField, LatentsField,
+                                             OutputField, UIType, WithBoard,
+                                             WithMetadata)
 from invokeai.app.invocations.ip_adapter import IPAdapterField
-from invokeai.app.invocations.primitives import (
-    DenoiseMaskOutput,
-    ImageOutput,
-    LatentsOutput,
-)
+from invokeai.app.invocations.primitives import (DenoiseMaskOutput,
+                                                 ImageOutput, LatentsOutput)
 from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
@@ -55,25 +44,19 @@ from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
 from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import BaseModelType, LoadedModel
 from invokeai.backend.model_patcher import ModelPatcher
-from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData, IPAdapterConditioningInfo
+from invokeai.backend.stable_diffusion import (PipelineIntermediateState,
+                                               set_seamless)
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
+    ConditioningData, IPAdapterConditioningInfo)
 from invokeai.backend.util.silence_warnings import SilenceWarnings
 
 from ...backend.stable_diffusion.diffusers_pipeline import (
-    ControlNetData,
-    IPAdapterData,
-    StableDiffusionGeneratorPipeline,
-    T2IAdapterData,
-    image_resized_to_grid_as_tensor,
-)
+    ControlNetData, IPAdapterData, StableDiffusionGeneratorPipeline,
+    T2IAdapterData, image_resized_to_grid_as_tensor)
 from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from ...backend.util.devices import choose_precision, choose_torch_device
-from .baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    invocation,
-    invocation_output,
-)
+from .baseinvocation import (BaseInvocation, BaseInvocationOutput, invocation,
+                             invocation_output)
 from .controlnet_image_processors import ControlField
 from .model import ModelIdentifierField, UNetField, VAEField
 
diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
index ed73fc56c6..bd47cc1a48 100644
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -535,7 +535,18 @@ class IPAdapterCheckpointProbe(CheckpointProbeBase):
         for key in checkpoint.keys():
             if not key.startswith(("image_proj.", "ip_adapter.")):
                 continue
-            return BaseModelType.StableDiffusionXL
+            cross_attention_dim = checkpoint["ip_adapter.1.to_k_ip.weight"].shape[-1]
+            print(cross_attention_dim)
+            if cross_attention_dim == 768:
+                return BaseModelType.StableDiffusion1
+            elif cross_attention_dim == 1024:
+                return BaseModelType.StableDiffusion2
+            elif cross_attention_dim == 2048:
+                return BaseModelType.StableDiffusionXL
+            else:
+                raise InvalidModelConfigException(
+                    f"IP-Adapter had unexpected cross-attention dimension: {cross_attention_dim}."
+                )
         raise InvalidModelConfigException(f"{self.model_path}: Unable to determine base type")
 
 

From 4ed2bf53ca2d57da7c3df037fdd39e6b47c63eac Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Sun, 24 Mar 2024 02:27:38 +0530
Subject: [PATCH 03/14] fix: cleanup across various ip adapter files

---
 invokeai/app/invocations/ip_adapter.py       |  2 +-
 invokeai/app/invocations/latent.py           | 52 +++++++++++---------
 invokeai/backend/model_manager/probe.py      | 24 +++------
 invokeai/frontend/web/public/locales/en.json |  1 +
 4 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 165a6bee24..94bb909433 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -91,7 +91,7 @@ class IPAdapterInvocation(BaseInvocation):
         image_encoder_model_id = (
             ip_adapter_info.image_encoder_model_id
             if isinstance(ip_adapter_info, IPAdapterDiffusersConfig)
-            else "InvokeAI/ip_adapter_sd_image_encoder"
+            else "ip_adapter_sd_image_encoder"
         )
         image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
         image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py
index 8ad1684bcb..3c66b7014f 100644
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@@ -14,10 +14,12 @@ from diffusers import AutoencoderKL, AutoencoderTiny
 from diffusers.configuration_utils import ConfigMixin
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.models.adapter import T2IAdapter
-from diffusers.models.attention_processor import (AttnProcessor2_0,
-                                                  LoRAAttnProcessor2_0,
-                                                  LoRAXFormersAttnProcessor,
-                                                  XFormersAttnProcessor)
+from diffusers.models.attention_processor import (
+    AttnProcessor2_0,
+    LoRAAttnProcessor2_0,
+    LoRAXFormersAttnProcessor,
+    XFormersAttnProcessor,
+)
 from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
 from diffusers.schedulers import DPMSolverSDEScheduler
 from diffusers.schedulers import SchedulerMixin as Scheduler
@@ -26,17 +28,22 @@ from pydantic import field_validator
 from torchvision.transforms.functional import resize as tv_resize
 from transformers import CLIPVisionModelWithProjection
 
-from invokeai.app.invocations.constants import (LATENT_SCALE_FACTOR,
-                                                SCHEDULER_NAME_VALUES)
-from invokeai.app.invocations.fields import (ConditioningField,
-                                             DenoiseMaskField,
-                                             FieldDescriptions, ImageField,
-                                             Input, InputField, LatentsField,
-                                             OutputField, UIType, WithBoard,
-                                             WithMetadata)
+from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR, SCHEDULER_NAME_VALUES
+from invokeai.app.invocations.fields import (
+    ConditioningField,
+    DenoiseMaskField,
+    FieldDescriptions,
+    ImageField,
+    Input,
+    InputField,
+    LatentsField,
+    OutputField,
+    UIType,
+    WithBoard,
+    WithMetadata,
+)
 from invokeai.app.invocations.ip_adapter import IPAdapterField
-from invokeai.app.invocations.primitives import (DenoiseMaskOutput,
-                                                 ImageOutput, LatentsOutput)
+from invokeai.app.invocations.primitives import DenoiseMaskOutput, ImageOutput, LatentsOutput
 from invokeai.app.invocations.t2i_adapter import T2IAdapterField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
@@ -44,19 +51,20 @@ from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
 from invokeai.backend.lora import LoRAModelRaw
 from invokeai.backend.model_manager import BaseModelType, LoadedModel
 from invokeai.backend.model_patcher import ModelPatcher
-from invokeai.backend.stable_diffusion import (PipelineIntermediateState,
-                                               set_seamless)
-from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
-    ConditioningData, IPAdapterConditioningInfo)
+from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData, IPAdapterConditioningInfo
 from invokeai.backend.util.silence_warnings import SilenceWarnings
 
 from ...backend.stable_diffusion.diffusers_pipeline import (
-    ControlNetData, IPAdapterData, StableDiffusionGeneratorPipeline,
-    T2IAdapterData, image_resized_to_grid_as_tensor)
+    ControlNetData,
+    IPAdapterData,
+    StableDiffusionGeneratorPipeline,
+    T2IAdapterData,
+    image_resized_to_grid_as_tensor,
+)
 from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from ...backend.util.devices import choose_precision, choose_torch_device
-from .baseinvocation import (BaseInvocation, BaseInvocationOutput, invocation,
-                             invocation_output)
+from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from .controlnet_image_processors import ControlField
 from .model import ModelIdentifierField, UNetField, VAEField
 
diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
index bd47cc1a48..75f156ce21 100644
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -9,23 +9,16 @@ from picklescan.scanner import scan_file_path
 
 import invokeai.backend.util.logging as logger
 from invokeai.app.util.misc import uuid_string
-from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
+from invokeai.backend.model_hash.model_hash import (HASHING_ALGORITHMS,
+                                                    ModelHash)
 from invokeai.backend.util.util import SilenceWarnings
 
-from .config import (
-    AnyModelConfig,
-    BaseModelType,
-    ControlAdapterDefaultSettings,
-    InvalidModelConfigException,
-    MainModelDefaultSettings,
-    ModelConfigFactory,
-    ModelFormat,
-    ModelRepoVariant,
-    ModelSourceType,
-    ModelType,
-    ModelVariantType,
-    SchedulerPredictionType,
-)
+from .config import (AnyModelConfig, BaseModelType,
+                     ControlAdapterDefaultSettings,
+                     InvalidModelConfigException, MainModelDefaultSettings,
+                     ModelConfigFactory, ModelFormat, ModelRepoVariant,
+                     ModelSourceType, ModelType, ModelVariantType,
+                     SchedulerPredictionType)
 from .util.model_util import lora_token_vector_length, read_checkpoint_meta
 
 CkptType = Dict[str | int, Any]
@@ -536,7 +529,6 @@ class IPAdapterCheckpointProbe(CheckpointProbeBase):
             if not key.startswith(("image_proj.", "ip_adapter.")):
                 continue
             cross_attention_dim = checkpoint["ip_adapter.1.to_k_ip.weight"].shape[-1]
-            print(cross_attention_dim)
             if cross_attention_dim == 768:
                 return BaseModelType.StableDiffusion1
             elif cross_attention_dim == 1024:
diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json
index 1601169e03..d2402c61be 100644
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -655,6 +655,7 @@
         "install": "Install",
         "installAll": "Install All",
         "installRepo": "Install Repo",
+        "ipAdapters": "IP Adapters",
         "load": "Load",
         "localOnly": "local only",
         "manual": "Manual",

From c4a856de4abc25970b82371a70649653df0f0fd1 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Sun, 24 Mar 2024 02:58:57 +0530
Subject: [PATCH 04/14] ui: update the new ip adapter configs on the frontend

---
 invokeai/app/invocations/ip_adapter.py        |   2 +
 .../frontend/web/src/services/api/schema.ts   | 245 +++++++++++++++++-
 .../frontend/web/src/services/api/types.ts    |   2 +-
 3 files changed, 237 insertions(+), 12 deletions(-)

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 94bb909433..2874c92701 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -88,6 +88,7 @@ class IPAdapterInvocation(BaseInvocation):
         # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
         assert isinstance(ip_adapter_info, (IPAdapterDiffusersConfig, IPAdapterCheckpointConfig))
+
         image_encoder_model_id = (
             ip_adapter_info.image_encoder_model_id
             if isinstance(ip_adapter_info, IPAdapterDiffusersConfig)
@@ -95,6 +96,7 @@ class IPAdapterInvocation(BaseInvocation):
         )
         image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
         image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
+
         return IPAdapterOutput(
             ip_adapter=IPAdapterField(
                 image=self.image,
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index 55e5743629..70cd37376a 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -159,6 +159,12 @@ export type paths = {
     /** Get Starter Models */
     get: operations["get_starter_models"];
   };
+  "/api/v2/models/hf_login": {
+    /** Get Hf Login Status */
+    get: operations["get_hf_login_status"];
+    /** Do Hf Login */
+    post: operations["do_hf_login"];
+  };
   "/api/v1/download_queue/": {
     /**
      * List Downloads
@@ -1022,6 +1028,14 @@ export type components = {
        */
       image_names: string[];
     };
+    /** Body_do_hf_login */
+    Body_do_hf_login: {
+      /**
+       * Token
+       * @description Hugging Face token to use for login
+       */
+      token: string;
+    };
     /** Body_download */
     Body_download: {
       /**
@@ -1248,6 +1262,39 @@ export type components = {
        */
       type: "boolean_output";
     };
+    /**
+     * BRIA AI Background Removal
+     * @description Uses the new Bria 1.4 model to remove backgrounds from images.
+     */
+    BriaRemoveBackgroundInvocation: {
+      /** @description Optional metadata to be saved with the image */
+      metadata?: components["schemas"]["MetadataField"] | null;
+      /**
+       * Id
+       * @description The id of this instance of an invocation. Must be unique among all instances of invocations.
+       */
+      id: string;
+      /**
+       * Is Intermediate
+       * @description Whether or not this is an intermediate invocation.
+       * @default false
+       */
+      is_intermediate?: boolean;
+      /**
+       * Use Cache
+       * @description Whether or not to use the cache
+       * @default true
+       */
+      use_cache?: boolean;
+      /** @description The image to crop */
+      image?: components["schemas"]["ImageField"];
+      /**
+       * type
+       * @default bria_bg_remove
+       * @constant
+       */
+      type: "bria_bg_remove";
+    };
     /** CLIPField */
     CLIPField: {
       /** @description Info to load tokenizer submodel */
@@ -4122,7 +4169,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["VAELoaderInvocation"];
+        [key: string]: components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["CoreMetadataInvocation"];
       };
       /**
        * Edges
@@ -4159,7 +4206,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["UNetOutput"];
+        [key: string]: components["schemas"]["BooleanOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["String2Output"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["CLIPSkipInvocationOutput"];
       };
       /**
        * Errors
@@ -4205,11 +4252,93 @@ export type components = {
        */
       type?: "hf";
     };
+    /**
+     * HFTokenStatus
+     * @enum {string}
+     */
+    HFTokenStatus: "valid" | "invalid" | "unknown";
     /** HTTPValidationError */
     HTTPValidationError: {
       /** Detail */
       detail?: components["schemas"]["ValidationError"][];
     };
+    /**
+     * Hand Depth w/ MeshGraphormer
+     * @description Generate hand depth maps to inpaint with using ControlNet
+     */
+    HandDepthMeshGraphormerProcessor: {
+      /** @description Optional metadata to be saved with the image */
+      metadata?: components["schemas"]["MetadataField"] | null;
+      /**
+       * Id
+       * @description The id of this instance of an invocation. Must be unique among all instances of invocations.
+       */
+      id: string;
+      /**
+       * Is Intermediate
+       * @description Whether or not this is an intermediate invocation.
+       * @default false
+       */
+      is_intermediate?: boolean;
+      /**
+       * Use Cache
+       * @description Whether or not to use the cache
+       * @default true
+       */
+      use_cache?: boolean;
+      /** @description The image to process */
+      image?: components["schemas"]["ImageField"];
+      /**
+       * Resolution
+       * @description Pixel resolution for output image
+       * @default 512
+       */
+      resolution?: number;
+      /**
+       * Mask Padding
+       * @description Amount to pad the hand mask by
+       * @default 30
+       */
+      mask_padding?: number;
+      /**
+       * Offload
+       * @description Offload model after usage
+       * @default false
+       */
+      offload?: boolean;
+      /**
+       * type
+       * @default hand_depth_mesh_graphormer_image_processor
+       * @constant
+       */
+      type: "hand_depth_mesh_graphormer_image_processor";
+    };
+    /**
+     * HandDepthOutput
+     * @description Base class for to output Meshgraphormer results
+     */
+    HandDepthOutput: {
+      /** @description Improved hands depth map */
+      image: components["schemas"]["ImageField"];
+      /** @description Hands area mask */
+      mask: components["schemas"]["ImageField"];
+      /**
+       * Width
+       * @description The width of the depth map in pixels
+       */
+      width: number;
+      /**
+       * Height
+       * @description The height of the depth map in pixels
+       */
+      height: number;
+      /**
+       * type
+       * @default meshgraphormer_output
+       * @constant
+       */
+      type: "meshgraphormer_output";
+    };
     /**
      * HED (softedge) Processor
      * @description Applies HED edge detection to image
@@ -4320,10 +4449,71 @@ export type components = {
       is_diffusers: boolean;
     };
     /**
-     * IPAdapterConfig
-     * @description Model config for IP Adaptor format models.
+     * IPAdapterCheckpointConfig
+     * @description Model config for IP Adapter checkpoint format models.
      */
-    IPAdapterConfig: {
+    IPAdapterCheckpointConfig: {
+      /**
+       * Key
+       * @description A unique key for this model.
+       */
+      key: string;
+      /**
+       * Hash
+       * @description The hash of the model file(s).
+       */
+      hash: string;
+      /**
+       * Path
+       * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory.
+       */
+      path: string;
+      /**
+       * Name
+       * @description Name of the model.
+       */
+      name: string;
+      /** @description The base model. */
+      base: components["schemas"]["BaseModelType"];
+      /**
+       * Description
+       * @description Model description
+       */
+      description?: string | null;
+      /**
+       * Source
+       * @description The original source of the model (path, URL or repo_id).
+       */
+      source: string;
+      /** @description The type of source */
+      source_type: components["schemas"]["ModelSourceType"];
+      /**
+       * Source Api Response
+       * @description The original API response from the source, as stringified JSON.
+       */
+      source_api_response?: string | null;
+      /**
+       * Cover Image
+       * @description Url for image to preview model
+       */
+      cover_image?: string | null;
+      /**
+       * Type
+       * @default ip_adapter
+       * @constant
+       */
+      type: "ip_adapter";
+      /**
+       * Format
+       * @constant
+       */
+      format: "checkpoint";
+    };
+    /**
+     * IPAdapterDiffusersConfig
+     * @description Model config for IP Adapter diffusers format models.
+     */
+    IPAdapterDiffusersConfig: {
       /**
        * Key
        * @description A unique key for this model.
@@ -7481,7 +7671,7 @@ export type components = {
        * Config Out
        * @description After successful installation, this will hold the configuration object.
        */
-      config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null;
+      config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null;
       /**
        * Inplace
        * @description Leave model in its current location; otherwise install under models directory
@@ -7636,7 +7826,7 @@ export type components = {
      */
     ModelsList: {
       /** Models */
-      models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[];
+      models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[];
     };
     /**
      * Multiply Integers
@@ -11165,7 +11355,7 @@ export type operations = {
       /** @description Successful Response */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Validation Error */
@@ -11191,7 +11381,7 @@ export type operations = {
       /** @description The model configuration was retrieved successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11273,7 +11463,7 @@ export type operations = {
       /** @description The model was updated successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11672,7 +11862,7 @@ export type operations = {
       /** @description Model converted successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11706,6 +11896,39 @@ export type operations = {
       };
     };
   };
+  /** Get Hf Login Status */
+  get_hf_login_status: {
+    responses: {
+      /** @description Successful Response */
+      200: {
+        content: {
+          "application/json": components["schemas"]["HFTokenStatus"];
+        };
+      };
+    };
+  };
+  /** Do Hf Login */
+  do_hf_login: {
+    requestBody: {
+      content: {
+        "application/json": components["schemas"]["Body_do_hf_login"];
+      };
+    };
+    responses: {
+      /** @description Successful Response */
+      200: {
+        content: {
+          "application/json": components["schemas"]["HFTokenStatus"];
+        };
+      };
+      /** @description Validation Error */
+      422: {
+        content: {
+          "application/json": components["schemas"]["HTTPValidationError"];
+        };
+      };
+    };
+  };
   /**
    * List Downloads
    * @description Get a list of active and inactive jobs.
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index 6a81b7b6dc..d24d4bddd5 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -46,7 +46,7 @@ export type LoRAModelConfig = S['LoRADiffusersConfig'] | S['LoRALyCORISConfig'];
 // TODO(MM2): Can we rename this from Vae -> VAE
 export type VAEModelConfig = S['VAECheckpointConfig'] | S['VAEDiffusersConfig'];
 export type ControlNetModelConfig = S['ControlNetDiffusersConfig'] | S['ControlNetCheckpointConfig'];
-export type IPAdapterModelConfig = S['IPAdapterConfig'];
+export type IPAdapterModelConfig = S['IPAdapterDiffusersConfig'] | S['IPAdapterCheckpointConfig'];
 export type T2IAdapterModelConfig = S['T2IAdapterConfig'];
 type TextualInversionModelConfig = S['TextualInversionFileConfig'] | S['TextualInversionFolderConfig'];
 type DiffusersModelConfig = S['MainDiffusersConfig'];

From 318bc938fe0281dc5a14bdc3da2a97cc418baa9e Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Sun, 24 Mar 2024 03:06:41 +0530
Subject: [PATCH 05/14] fix: Update ModelView to accommodate for the new config
 changes to IP Adapter

---
 .../features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
index adb123f24d..0618af5dd0 100644
--- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
+++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelPanel/ModelView.tsx
@@ -53,7 +53,7 @@ export const ModelView = () => {
             </>
           )}
 
-          {data.type === 'ip_adapter' && (
+          {data.type === 'ip_adapter' && data.format === 'invokeai' && (
             <Flex gap={2}>
               <ModelAttrView label={t('modelManager.imageEncoderModelId')} value={data.image_encoder_model_id} />
             </Flex>

From 688a0f30bbdfaf5bad2410244b51f382ca6fd145 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Sun, 24 Mar 2024 08:34:11 +0530
Subject: [PATCH 06/14] chore: improve types in ip_adapter backend file

---
 invokeai/backend/ip_adapter/ip_adapter.py | 46 ++++++++++++++---------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index 81514a9f8b..5444c76c8c 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -53,7 +53,7 @@ class ImageProjModel(torch.nn.Module):
         model.load_state_dict(state_dict)
         return model
 
-    def forward(self, image_embeds):
+    def forward(self, image_embeds: torch.Tensor):
         embeds = image_embeds
         clip_extra_context_tokens = self.proj(embeds).reshape(
             -1, self.clip_extra_context_tokens, self.cross_attention_dim
@@ -95,7 +95,7 @@ class MLPProjModel(torch.nn.Module):
         model.load_state_dict(state_dict)
         return model
 
-    def forward(self, image_embeds):
+    def forward(self, image_embeds: torch.Tensor):
         clip_extra_context_tokens = self.proj(image_embeds)
         return clip_extra_context_tokens
 
@@ -137,7 +137,9 @@ class IPAdapter(RawModel):
 
         return calc_model_size_by_data(self._image_proj_model) + calc_model_size_by_data(self.attn_weights)
 
-    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
+    def _init_image_proj_model(
+        self, state_dict: dict[str, torch.Tensor]
+    ) -> Union[ImageProjModel, Resampler, MLPProjModel]:
         return ImageProjModel.from_state_dict(state_dict, self._num_tokens).to(self.device, dtype=self.dtype)
 
     @torch.inference_mode()
@@ -152,7 +154,7 @@ class IPAdapter(RawModel):
 class IPAdapterPlus(IPAdapter):
     """IP-Adapter with fine-grained features"""
 
-    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]):
+    def _init_image_proj_model(self, state_dict: dict[str, torch.Tensor]) -> Union[Resampler, MLPProjModel]:
         return Resampler.from_state_dict(
             state_dict=state_dict,
             depth=4,
@@ -196,36 +198,46 @@ class IPAdapterPlusXL(IPAdapterPlus):
         ).to(self.device, dtype=self.dtype)
 
 
-def build_ip_adapter(
-    ip_adapter_ckpt_path: str, device: torch.device, dtype: torch.dtype = torch.float16
-) -> Union[IPAdapter, IPAdapterPlus]:
+def load_ip_adapter_tensors(ip_adapter_ckpt_path: str, device: str) -> IPAdapterStateDict:
     state_dict: IPAdapterStateDict = {"ip_adapter": {}, "image_proj": {}}
 
     if ip_adapter_ckpt_path.endswith("safetensors"):
-        state_dict = {"ip_adapter": {}, "image_proj": {}}
-        model = safe_open(ip_adapter_ckpt_path, device=device.type, framework="pt")
+        model = safe_open(ip_adapter_ckpt_path, device=device, framework="pt")
         for key in model.keys():
             if key.startswith("image_proj."):
                 state_dict["image_proj"][key.replace("image_proj.", "")] = model.get_tensor(key)
-            if key.startswith("ip_adapter."):
+            elif key.startswith("ip_adapter."):
                 state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = model.get_tensor(key)
     else:
         ip_adapter_diffusers_checkpoint_path = ip_adapter_ckpt_path + "/ip_adapter.bin"
         state_dict = torch.load(ip_adapter_diffusers_checkpoint_path, map_location="cpu")
 
-    if "proj.weight" in state_dict["image_proj"]:  # IPAdapter (with ImageProjModel).
+    return state_dict
+
+
+def build_ip_adapter(
+    ip_adapter_ckpt_path: str, device: torch.device, dtype: torch.dtype = torch.float16
+) -> Union[IPAdapter, IPAdapterPlus, IPAdapterPlusXL, IPAdapterPlus]:
+    state_dict = load_ip_adapter_tensors(ip_adapter_ckpt_path, device.type)
+
+    # IPAdapter (with ImageProjModel)
+    if "proj.weight" in state_dict["image_proj"]:
         return IPAdapter(state_dict, device=device, dtype=dtype)
-    elif "proj_in.weight" in state_dict["image_proj"]:  # IPAdaterPlus or IPAdapterPlusXL (with Resampler).
+
+    # IPAdaterPlus or IPAdapterPlusXL (with Resampler)
+    elif "proj_in.weight" in state_dict["image_proj"]:
         cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1]
         if cross_attention_dim == 768:
-            # SD1 IP-Adapter Plus
-            return IPAdapterPlus(state_dict, device=device, dtype=dtype)
+            return IPAdapterPlus(state_dict, device=device, dtype=dtype)  # SD1 IP-Adapter Plus
         elif cross_attention_dim == 2048:
-            # SDXL IP-Adapter Plus
-            return IPAdapterPlusXL(state_dict, device=device, dtype=dtype)
+            return IPAdapterPlusXL(state_dict, device=device, dtype=dtype)  # SDXL IP-Adapter Plus
         else:
             raise Exception(f"Unsupported IP-Adapter Plus cross-attention dimension: {cross_attention_dim}.")
-    elif "proj.0.weight" in state_dict["image_proj"]:  # IPAdapterFull (with MLPProjModel).
+
+    # IPAdapterFull (with MLPProjModel)
+    elif "proj.0.weight" in state_dict["image_proj"]:
         return IPAdapterFull(state_dict, device=device, dtype=dtype)
+
+    # Unrecognized IP Adapter Architectures
     else:
         raise ValueError(f"'{ip_adapter_ckpt_path}' has an unrecognized IP-Adapter model architecture.")

From 16c366a0600d7bac352f3d6f3714b8fb329f99b5 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Wed, 27 Mar 2024 20:32:41 +0530
Subject: [PATCH 07/14] feat: Let users pick CLIP Vision model for Checkpoint
 IP Adapters

---
 invokeai/app/invocations/ip_adapter.py        | 53 +++++++++------
 invokeai/app/invocations/metadata.py          | 13 +---
 invokeai/frontend/web/public/locales/en.json  |  1 +
 .../parameters/ParamControlAdapterModel.tsx   | 64 +++++++++++++++----
 .../hooks/useControlAdapterCLIPVisionModel.ts | 24 +++++++
 .../store/controlAdaptersSlice.ts             |  9 +++
 .../features/controlAdapters/store/types.ts   |  3 +
 .../util/buildControlAdapter.ts               |  1 +
 .../util/graph/addIPAdapterToLinearGraph.ts   |  6 +-
 .../frontend/web/src/services/api/schema.ts   | 17 ++++-
 10 files changed, 145 insertions(+), 46 deletions(-)
 create mode 100644 invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 2874c92701..603a85148d 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -1,5 +1,5 @@
 from builtins import float
-from typing import List, Union
+from typing import List, Literal, Union
 
 from pydantic import BaseModel, Field, field_validator, model_validator
 from typing_extensions import Self
@@ -49,12 +49,15 @@ class IPAdapterOutput(BaseInvocationOutput):
     ip_adapter: IPAdapterField = OutputField(description=FieldDescriptions.ip_adapter, title="IP-Adapter")
 
 
+CLIP_VISION_MODEL_MAP = {"ViT-H": "ip_adapter_sd_image_encoder", "ViT-G": "ip_adapter_sdxl_image_encoder"}
+
+
 @invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.2.2")
 class IPAdapterInvocation(BaseInvocation):
     """Collects IP-Adapter info to pass to other nodes."""
 
     # Inputs
-    image: Union[ImageField, List[ImageField]] = InputField(description="The IP-Adapter image prompt(s).")
+    image: Union[ImageField, List[ImageField]] = InputField(description="The IP-Adapter image prompt(s).", ui_order=1)
     ip_adapter_model: ModelIdentifierField = InputField(
         description="The IP-Adapter model.",
         title="IP-Adapter Model",
@@ -62,7 +65,9 @@ class IPAdapterInvocation(BaseInvocation):
         ui_order=-1,
         ui_type=UIType.IPAdapterModel,
     )
-
+    clip_vision_model: Literal["ViT-H", "ViT-G"] = InputField(
+        description="CLIP Vision model to use", default="ViT-H", ui_order=2
+    )
     weight: Union[float, List[float]] = InputField(
         default=1, description="The weight given to the IP-Adapter", title="Weight"
     )
@@ -89,12 +94,12 @@ class IPAdapterInvocation(BaseInvocation):
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
         assert isinstance(ip_adapter_info, (IPAdapterDiffusersConfig, IPAdapterCheckpointConfig))
 
-        image_encoder_model_id = (
-            ip_adapter_info.image_encoder_model_id
-            if isinstance(ip_adapter_info, IPAdapterDiffusersConfig)
-            else "ip_adapter_sd_image_encoder"
-        )
-        image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+        if isinstance(ip_adapter_info, IPAdapterDiffusersConfig):
+            image_encoder_model_id = ip_adapter_info.image_encoder_model_id
+            image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+        else:
+            image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
+
         image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
 
         return IPAdapterOutput(
@@ -109,19 +114,25 @@ class IPAdapterInvocation(BaseInvocation):
         )
 
     def _get_image_encoder(self, context: InvocationContext, image_encoder_model_name: str) -> AnyModelConfig:
-        found = False
-        while not found:
+        image_encoder_models = context.models.search_by_attrs(
+            name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision
+        )
+
+        if not len(image_encoder_models) > 0:
+            context.logger.warning(
+                f"The image encoder required by this IP Adapter ({image_encoder_model_name}) is not installed. \
+                    Downloading and installing now. This may take a while."
+            )
+
+            installer = context._services.model_manager.install
+            job = installer.heuristic_import(f"InvokeAI/{image_encoder_model_name}")
+            installer.wait_for_job(job, timeout=600)  # Wait for up to 10 minutes
             image_encoder_models = context.models.search_by_attrs(
                 name=image_encoder_model_name, base=BaseModelType.Any, type=ModelType.CLIPVision
             )
-            found = len(image_encoder_models) > 0
-            if not found:
-                context.logger.warning(
-                    f"The image encoder required by this IP Adapter ({image_encoder_model_name}) is not installed."
-                )
-                context.logger.warning("Downloading and installing now. This may take a while.")
-                installer = context._services.model_manager.install
-                job = installer.heuristic_import(f"InvokeAI/{image_encoder_model_name}")
-                installer.wait_for_job(job, timeout=600)  # wait up to 10 minutes - then raise a TimeoutException
-        assert len(image_encoder_models) == 1
+
+            if len(image_encoder_models) == 0:
+                context.logger.error("Error while fetching CLIP Vision Image Encoder")
+                assert len(image_encoder_models) == 1
+
         return image_encoder_models[0]
diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py
index 6fc72a1c3f..2da482c833 100644
--- a/invokeai/app/invocations/metadata.py
+++ b/invokeai/app/invocations/metadata.py
@@ -2,16 +2,8 @@ from typing import Any, Literal, Optional, Union
 
 from pydantic import BaseModel, ConfigDict, Field
 
-from invokeai.app.invocations.baseinvocation import (
-    BaseInvocation,
-    BaseInvocationOutput,
-    invocation,
-    invocation_output,
-)
-from invokeai.app.invocations.controlnet_image_processors import (
-    CONTROLNET_MODE_VALUES,
-    CONTROLNET_RESIZE_VALUES,
-)
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.controlnet_image_processors import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES
 from invokeai.app.invocations.fields import (
     FieldDescriptions,
     ImageField,
@@ -43,6 +35,7 @@ class IPAdapterMetadataField(BaseModel):
 
     image: ImageField = Field(description="The IP-Adapter image prompt.")
     ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model.")
+    clip_vision_model: Literal["ViT-H", "ViT-G"] = Field(description="The CLIP Vision model")
     weight: Union[float, list[float]] = Field(description="The weight given to the IP-Adapter")
     begin_step_percent: float = Field(description="When the IP-Adapter is first applied (% of total steps)")
     end_step_percent: float = Field(description="When the IP-Adapter is last applied (% of total steps)")
diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json
index d2402c61be..5872d22dfe 100644
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@@ -217,6 +217,7 @@
         "saveControlImage": "Save Control Image",
         "scribble": "scribble",
         "selectModel": "Select a model",
+        "selectCLIPVisionModel": "Select a CLIP Vision model",
         "setControlImageDimensions": "Set Control Image Dimensions To W/H",
         "showAdvanced": "Show Advanced",
         "small": "Small",
diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
index 25d327e54e..380a33185d 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
+++ b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
@@ -1,12 +1,18 @@
-import { Combobox, FormControl, Tooltip } from '@invoke-ai/ui-library';
+import type { ComboboxOnChange, ComboboxOption } from '@invoke-ai/ui-library';
+import { Combobox, Flex, FormControl, Tooltip } from '@invoke-ai/ui-library';
 import { createMemoizedSelector } from 'app/store/createMemoizedSelector';
 import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
 import { useGroupedModelCombobox } from 'common/hooks/useGroupedModelCombobox';
+import { useControlAdapterCLIPVisionModel } from 'features/controlAdapters/hooks/useControlAdapterCLIPVisionModel';
 import { useControlAdapterIsEnabled } from 'features/controlAdapters/hooks/useControlAdapterIsEnabled';
 import { useControlAdapterModel } from 'features/controlAdapters/hooks/useControlAdapterModel';
 import { useControlAdapterModels } from 'features/controlAdapters/hooks/useControlAdapterModels';
 import { useControlAdapterType } from 'features/controlAdapters/hooks/useControlAdapterType';
-import { controlAdapterModelChanged } from 'features/controlAdapters/store/controlAdaptersSlice';
+import {
+  controlAdapterCLIPVisionModelChanged,
+  controlAdapterModelChanged,
+} from 'features/controlAdapters/store/controlAdaptersSlice';
+import type { CLIPVisionModel } from 'features/controlAdapters/store/types';
 import { selectGenerationSlice } from 'features/parameters/store/generationSlice';
 import { memo, useCallback, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
@@ -29,6 +35,7 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
   const { modelConfig } = useControlAdapterModel(id);
   const dispatch = useAppDispatch();
   const currentBaseModel = useAppSelector((s) => s.generation.model?.base);
+  const currentCLIPVisionModel = useControlAdapterCLIPVisionModel(id);
   const mainModel = useAppSelector(selectMainModel);
   const { t } = useTranslation();
 
@@ -49,6 +56,16 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
     [dispatch, id]
   );
 
+  const onCLIPVisionModelChange = useCallback<ComboboxOnChange>(
+    (v) => {
+      if (!v?.value) {
+        return;
+      }
+      dispatch(controlAdapterCLIPVisionModelChanged({ id, clipVisionModel: v.value as CLIPVisionModel }));
+    },
+    [dispatch, id]
+  );
+
   const selectedModel = useMemo(
     () => (modelConfig && controlAdapterType ? { ...modelConfig, model_type: controlAdapterType } : null),
     [controlAdapterType, modelConfig]
@@ -71,17 +88,42 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
     isLoading,
   });
 
+  const clipVisionOptions = useMemo<ComboboxOption[]>(
+    () => [
+      { label: 'ViT-H', value: 'ViT-H' },
+      { label: 'ViT-G', value: 'ViT-G' },
+    ],
+    []
+  );
+
+  const clipVisionModel = useMemo(
+    () => clipVisionOptions.find((o) => o.value === currentCLIPVisionModel),
+    [clipVisionOptions, currentCLIPVisionModel]
+  );
+
   return (
     <Tooltip label={value?.description}>
-      <FormControl isDisabled={!isEnabled} isInvalid={!value || mainModel?.base !== modelConfig?.base}>
-        <Combobox
-          options={options}
-          placeholder={t('controlnet.selectModel')}
-          value={value}
-          onChange={onChange}
-          noOptionsMessage={noOptionsMessage}
-        />
-      </FormControl>
+      <Flex flexDirection="row" gap={2}>
+        <FormControl isDisabled={!isEnabled} isInvalid={!value || mainModel?.base !== modelConfig?.base}>
+          <Combobox
+            options={options}
+            placeholder={t('controlnet.selectModel')}
+            value={value}
+            onChange={onChange}
+            noOptionsMessage={noOptionsMessage}
+          />
+        </FormControl>
+        {modelConfig?.type === 'ip_adapter' && modelConfig.format === 'checkpoint' && (
+          <FormControl isDisabled={!isEnabled} isInvalid={!value || mainModel?.base !== modelConfig?.base}>
+            <Combobox
+              options={clipVisionOptions}
+              placeholder={t('controlnet.selectCLIPVisionModel')}
+              value={clipVisionModel}
+              onChange={onCLIPVisionModelChange}
+            />
+          </FormControl>
+        )}
+      </Flex>
     </Tooltip>
   );
 };
diff --git a/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts b/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts
new file mode 100644
index 0000000000..249d2022fe
--- /dev/null
+++ b/invokeai/frontend/web/src/features/controlAdapters/hooks/useControlAdapterCLIPVisionModel.ts
@@ -0,0 +1,24 @@
+import { createMemoizedSelector } from 'app/store/createMemoizedSelector';
+import { useAppSelector } from 'app/store/storeHooks';
+import {
+  selectControlAdapterById,
+  selectControlAdaptersSlice,
+} from 'features/controlAdapters/store/controlAdaptersSlice';
+import { useMemo } from 'react';
+
+export const useControlAdapterCLIPVisionModel = (id: string) => {
+  const selector = useMemo(
+    () =>
+      createMemoizedSelector(selectControlAdaptersSlice, (controlAdapters) => {
+        const cn = selectControlAdapterById(controlAdapters, id);
+        if (cn && cn?.type === 'ip_adapter') {
+          return cn.clipVisionModel;
+        }
+      }),
+    [id]
+  );
+
+  const clipVisionModel = useAppSelector(selector);
+
+  return clipVisionModel;
+};
diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts b/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts
index 39dc0dce3d..3fb19b50a7 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/store/controlAdaptersSlice.ts
@@ -13,6 +13,7 @@ import { v4 as uuidv4 } from 'uuid';
 import { controlAdapterImageProcessed } from './actions';
 import { CONTROLNET_PROCESSORS } from './constants';
 import type {
+  CLIPVisionModel,
   ControlAdapterConfig,
   ControlAdapterProcessorType,
   ControlAdaptersState,
@@ -243,6 +244,13 @@ export const controlAdaptersSlice = createSlice({
       }
       caAdapter.updateOne(state, { id, changes: { controlMode } });
     },
+    controlAdapterCLIPVisionModelChanged: (
+      state,
+      action: PayloadAction<{ id: string; clipVisionModel: CLIPVisionModel }>
+    ) => {
+      const { id, clipVisionModel } = action.payload;
+      caAdapter.updateOne(state, { id, changes: { clipVisionModel } });
+    },
     controlAdapterResizeModeChanged: (
       state,
       action: PayloadAction<{
@@ -380,6 +388,7 @@ export const {
   controlAdapterProcessedImageChanged,
   controlAdapterIsEnabledChanged,
   controlAdapterModelChanged,
+  controlAdapterCLIPVisionModelChanged,
   controlAdapterWeightChanged,
   controlAdapterBeginStepPctChanged,
   controlAdapterEndStepPctChanged,
diff --git a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
index 93d4915cdf..329c318759 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
@@ -243,12 +243,15 @@ export type T2IAdapterConfig = {
   shouldAutoConfig: boolean;
 };
 
+export type CLIPVisionModel = 'ViT-H' | 'ViT-G';
+
 export type IPAdapterConfig = {
   type: 'ip_adapter';
   id: string;
   isEnabled: boolean;
   controlImage: string | null;
   model: ParameterIPAdapterModel | null;
+  clipVisionModel: CLIPVisionModel;
   weight: number;
   beginStepPct: number;
   endStepPct: number;
diff --git a/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts b/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts
index 94a867cf88..c2cdd9ccd9 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/util/buildControlAdapter.ts
@@ -45,6 +45,7 @@ export const initialIPAdapter: Omit<IPAdapterConfig, 'id'> = {
   isEnabled: true,
   controlImage: null,
   model: null,
+  clipVisionModel: 'ViT-H',
   weight: 1,
   beginStepPct: 0,
   endStepPct: 1,
diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts
index 2298e84d43..ad563de468 100644
--- a/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graph/addIPAdapterToLinearGraph.ts
@@ -48,7 +48,7 @@ export const addIPAdapterToLinearGraph = async (
       if (!ipAdapter.model) {
         return;
       }
-      const { id, weight, model, beginStepPct, endStepPct, controlImage } = ipAdapter;
+      const { id, weight, model, clipVisionModel, beginStepPct, endStepPct, controlImage } = ipAdapter;
 
       assert(controlImage, 'IP Adapter image is required');
 
@@ -58,6 +58,7 @@ export const addIPAdapterToLinearGraph = async (
         is_intermediate: true,
         weight: weight,
         ip_adapter_model: model,
+        clip_vision_model: clipVisionModel,
         begin_step_percent: beginStepPct,
         end_step_percent: endStepPct,
         image: {
@@ -83,7 +84,7 @@ export const addIPAdapterToLinearGraph = async (
 };
 
 const buildIPAdapterMetadata = (ipAdapter: IPAdapterConfig): S['IPAdapterMetadataField'] => {
-  const { controlImage, beginStepPct, endStepPct, model, weight } = ipAdapter;
+  const { controlImage, beginStepPct, endStepPct, model, clipVisionModel, weight } = ipAdapter;
 
   assert(model, 'IP Adapter model is required');
 
@@ -99,6 +100,7 @@ const buildIPAdapterMetadata = (ipAdapter: IPAdapterConfig): S['IPAdapterMetadat
 
   return {
     ip_adapter_model: model,
+    clip_vision_model: clipVisionModel,
     weight,
     begin_step_percent: beginStepPct,
     end_step_percent: endStepPct,
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index 70cd37376a..fcc7e593b4 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -4169,7 +4169,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["CoreMetadataInvocation"];
+        [key: string]: components["schemas"]["SaveImageInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["FloatInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["IntegerCollectionInvocation"];
       };
       /**
        * Edges
@@ -4206,7 +4206,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["BooleanOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["String2Output"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["CLIPSkipInvocationOutput"];
+        [key: string]: components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["String2Output"] | components["schemas"]["UNetOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerOutput"];
       };
       /**
        * Errors
@@ -4634,6 +4634,13 @@ export type components = {
        * @description The IP-Adapter model.
        */
       ip_adapter_model: components["schemas"]["ModelIdentifierField"];
+      /**
+       * Clip Vision Model
+       * @description CLIP Vision model to use
+       * @default ViT-H
+       * @enum {string}
+       */
+      clip_vision_model?: "ViT-H" | "ViT-G";
       /**
        * Weight
        * @description The weight given to the IP-Adapter
@@ -4668,6 +4675,12 @@ export type components = {
       image: components["schemas"]["ImageField"];
       /** @description The IP-Adapter model. */
       ip_adapter_model: components["schemas"]["ModelIdentifierField"];
+      /**
+       * Clip Vision Model
+       * @description The CLIP Vision model
+       * @enum {string}
+       */
+      clip_vision_model: "ViT-H" | "ViT-G";
       /**
        * Weight
        * @description The weight given to the IP-Adapter

From 1a93f56d06b6de2b87ab7575c6f6e317cb082a5f Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Wed, 27 Mar 2024 22:05:53 +0530
Subject: [PATCH 08/14] ui: improve the clip vision model picker layout

---
 invokeai/backend/model_manager/probe.py       | 23 +++++---
 .../parameters/ParamControlAdapterModel.tsx   | 38 ++++++++-----
 .../frontend/web/src/services/api/schema.ts   | 56 +------------------
 3 files changed, 40 insertions(+), 77 deletions(-)

diff --git a/invokeai/backend/model_manager/probe.py b/invokeai/backend/model_manager/probe.py
index 75f156ce21..7fc8c99e29 100644
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@@ -9,16 +9,23 @@ from picklescan.scanner import scan_file_path
 
 import invokeai.backend.util.logging as logger
 from invokeai.app.util.misc import uuid_string
-from invokeai.backend.model_hash.model_hash import (HASHING_ALGORITHMS,
-                                                    ModelHash)
+from invokeai.backend.model_hash.model_hash import HASHING_ALGORITHMS, ModelHash
 from invokeai.backend.util.util import SilenceWarnings
 
-from .config import (AnyModelConfig, BaseModelType,
-                     ControlAdapterDefaultSettings,
-                     InvalidModelConfigException, MainModelDefaultSettings,
-                     ModelConfigFactory, ModelFormat, ModelRepoVariant,
-                     ModelSourceType, ModelType, ModelVariantType,
-                     SchedulerPredictionType)
+from .config import (
+    AnyModelConfig,
+    BaseModelType,
+    ControlAdapterDefaultSettings,
+    InvalidModelConfigException,
+    MainModelDefaultSettings,
+    ModelConfigFactory,
+    ModelFormat,
+    ModelRepoVariant,
+    ModelSourceType,
+    ModelType,
+    ModelVariantType,
+    SchedulerPredictionType,
+)
 from .util.model_util import lora_token_vector_length, read_checkpoint_meta
 
 CkptType = Dict[str | int, Any]
diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
index 380a33185d..91f8822352 100644
--- a/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
+++ b/invokeai/frontend/web/src/features/controlAdapters/components/parameters/ParamControlAdapterModel.tsx
@@ -102,9 +102,13 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
   );
 
   return (
-    <Tooltip label={value?.description}>
-      <Flex flexDirection="row" gap={2}>
-        <FormControl isDisabled={!isEnabled} isInvalid={!value || mainModel?.base !== modelConfig?.base}>
+    <Flex sx={{ gap: 2 }}>
+      <Tooltip label={value?.description}>
+        <FormControl
+          isDisabled={!isEnabled}
+          isInvalid={!value || mainModel?.base !== modelConfig?.base}
+          sx={{ width: '100%' }}
+        >
           <Combobox
             options={options}
             placeholder={t('controlnet.selectModel')}
@@ -113,18 +117,22 @@ const ParamControlAdapterModel = ({ id }: ParamControlAdapterModelProps) => {
             noOptionsMessage={noOptionsMessage}
           />
         </FormControl>
-        {modelConfig?.type === 'ip_adapter' && modelConfig.format === 'checkpoint' && (
-          <FormControl isDisabled={!isEnabled} isInvalid={!value || mainModel?.base !== modelConfig?.base}>
-            <Combobox
-              options={clipVisionOptions}
-              placeholder={t('controlnet.selectCLIPVisionModel')}
-              value={clipVisionModel}
-              onChange={onCLIPVisionModelChange}
-            />
-          </FormControl>
-        )}
-      </Flex>
-    </Tooltip>
+      </Tooltip>
+      {modelConfig?.type === 'ip_adapter' && modelConfig.format === 'checkpoint' && (
+        <FormControl
+          isDisabled={!isEnabled}
+          isInvalid={!value || mainModel?.base !== modelConfig?.base}
+          sx={{ width: 'max-content', minWidth: 28 }}
+        >
+          <Combobox
+            options={clipVisionOptions}
+            placeholder={t('controlnet.selectCLIPVisionModel')}
+            value={clipVisionModel}
+            onChange={onCLIPVisionModelChange}
+          />
+        </FormControl>
+      )}
+    </Flex>
   );
 };
 
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index fcc7e593b4..9b78deccac 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -159,12 +159,6 @@ export type paths = {
     /** Get Starter Models */
     get: operations["get_starter_models"];
   };
-  "/api/v2/models/hf_login": {
-    /** Get Hf Login Status */
-    get: operations["get_hf_login_status"];
-    /** Do Hf Login */
-    post: operations["do_hf_login"];
-  };
   "/api/v1/download_queue/": {
     /**
      * List Downloads
@@ -1028,14 +1022,6 @@ export type components = {
        */
       image_names: string[];
     };
-    /** Body_do_hf_login */
-    Body_do_hf_login: {
-      /**
-       * Token
-       * @description Hugging Face token to use for login
-       */
-      token: string;
-    };
     /** Body_download */
     Body_download: {
       /**
@@ -4169,7 +4155,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["SaveImageInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["FloatInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["IntegerCollectionInvocation"];
+        [key: string]: components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ESRGANInvocation"];
       };
       /**
        * Edges
@@ -4206,7 +4192,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["String2Output"] | components["schemas"]["UNetOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerOutput"];
+        [key: string]: components["schemas"]["BooleanOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["String2Output"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ColorOutput"];
       };
       /**
        * Errors
@@ -4252,11 +4238,6 @@ export type components = {
        */
       type?: "hf";
     };
-    /**
-     * HFTokenStatus
-     * @enum {string}
-     */
-    HFTokenStatus: "valid" | "invalid" | "unknown";
     /** HTTPValidationError */
     HTTPValidationError: {
       /** Detail */
@@ -11909,39 +11890,6 @@ export type operations = {
       };
     };
   };
-  /** Get Hf Login Status */
-  get_hf_login_status: {
-    responses: {
-      /** @description Successful Response */
-      200: {
-        content: {
-          "application/json": components["schemas"]["HFTokenStatus"];
-        };
-      };
-    };
-  };
-  /** Do Hf Login */
-  do_hf_login: {
-    requestBody: {
-      content: {
-        "application/json": components["schemas"]["Body_do_hf_login"];
-      };
-    };
-    responses: {
-      /** @description Successful Response */
-      200: {
-        content: {
-          "application/json": components["schemas"]["HFTokenStatus"];
-        };
-      };
-      /** @description Validation Error */
-      422: {
-        content: {
-          "application/json": components["schemas"]["HTTPValidationError"];
-        };
-      };
-    };
-  };
   /**
    * List Downloads
    * @description Get a list of active and inactive jobs.

From 0d8b5351312d87087fd052a08229ee0892c46260 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Fri, 29 Mar 2024 11:50:18 +0530
Subject: [PATCH 09/14] chore: rename IPAdapterDiffusersConfig to
 IPAdapterInvokeAIConfig

---
 invokeai/app/invocations/ip_adapter.py        |   6 +-
 invokeai/backend/model_manager/config.py      |   4 +-
 .../frontend/web/src/services/api/schema.ts   | 252 +++++-------------
 .../frontend/web/src/services/api/types.ts    |   2 +-
 4 files changed, 77 insertions(+), 187 deletions(-)

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 603a85148d..5e24721b2f 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -14,7 +14,7 @@ from invokeai.backend.model_manager.config import (
     AnyModelConfig,
     BaseModelType,
     IPAdapterCheckpointConfig,
-    IPAdapterDiffusersConfig,
+    IPAdapterInvokeAIConfig,
     ModelType,
 )
 
@@ -92,9 +92,9 @@ class IPAdapterInvocation(BaseInvocation):
     def invoke(self, context: InvocationContext) -> IPAdapterOutput:
         # Lookup the CLIP Vision encoder that is intended to be used with the IP-Adapter model.
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
-        assert isinstance(ip_adapter_info, (IPAdapterDiffusersConfig, IPAdapterCheckpointConfig))
+        assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
 
-        if isinstance(ip_adapter_info, IPAdapterDiffusersConfig):
+        if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
             image_encoder_model_id = ip_adapter_info.image_encoder_model_id
             image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
         else:
diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py
index 172045d3fc..82f88c0e81 100644
--- a/invokeai/backend/model_manager/config.py
+++ b/invokeai/backend/model_manager/config.py
@@ -327,7 +327,7 @@ class IPAdapterBaseConfig(ModelConfigBase):
     type: Literal[ModelType.IPAdapter] = ModelType.IPAdapter
 
 
-class IPAdapterDiffusersConfig(IPAdapterBaseConfig):
+class IPAdapterInvokeAIConfig(IPAdapterBaseConfig):
     """Model config for IP Adapter diffusers format models."""
 
     image_encoder_model_id: str
@@ -403,7 +403,7 @@ AnyModelConfig = Annotated[
         Annotated[LoRADiffusersConfig, LoRADiffusersConfig.get_tag()],
         Annotated[TextualInversionFileConfig, TextualInversionFileConfig.get_tag()],
         Annotated[TextualInversionFolderConfig, TextualInversionFolderConfig.get_tag()],
-        Annotated[IPAdapterDiffusersConfig, IPAdapterDiffusersConfig.get_tag()],
+        Annotated[IPAdapterInvokeAIConfig, IPAdapterInvokeAIConfig.get_tag()],
         Annotated[IPAdapterCheckpointConfig, IPAdapterCheckpointConfig.get_tag()],
         Annotated[T2IAdapterConfig, T2IAdapterConfig.get_tag()],
         Annotated[CLIPVisionDiffusersConfig, CLIPVisionDiffusersConfig.get_tag()],
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index 9b78deccac..6557da78ff 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -1248,39 +1248,6 @@ export type components = {
        */
       type: "boolean_output";
     };
-    /**
-     * BRIA AI Background Removal
-     * @description Uses the new Bria 1.4 model to remove backgrounds from images.
-     */
-    BriaRemoveBackgroundInvocation: {
-      /** @description Optional metadata to be saved with the image */
-      metadata?: components["schemas"]["MetadataField"] | null;
-      /**
-       * Id
-       * @description The id of this instance of an invocation. Must be unique among all instances of invocations.
-       */
-      id: string;
-      /**
-       * Is Intermediate
-       * @description Whether or not this is an intermediate invocation.
-       * @default false
-       */
-      is_intermediate?: boolean;
-      /**
-       * Use Cache
-       * @description Whether or not to use the cache
-       * @default true
-       */
-      use_cache?: boolean;
-      /** @description The image to crop */
-      image?: components["schemas"]["ImageField"];
-      /**
-       * type
-       * @default bria_bg_remove
-       * @constant
-       */
-      type: "bria_bg_remove";
-    };
     /** CLIPField */
     CLIPField: {
       /** @description Info to load tokenizer submodel */
@@ -4155,7 +4122,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["BriaRemoveBackgroundInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["HandDepthMeshGraphormerProcessor"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ESRGANInvocation"];
+        [key: string]: components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["DivideInvocation"];
       };
       /**
        * Edges
@@ -4192,7 +4159,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["BooleanOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["String2Output"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["HandDepthOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ColorOutput"];
+        [key: string]: components["schemas"]["StringCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ControlOutput"];
       };
       /**
        * Errors
@@ -4243,83 +4210,6 @@ export type components = {
       /** Detail */
       detail?: components["schemas"]["ValidationError"][];
     };
-    /**
-     * Hand Depth w/ MeshGraphormer
-     * @description Generate hand depth maps to inpaint with using ControlNet
-     */
-    HandDepthMeshGraphormerProcessor: {
-      /** @description Optional metadata to be saved with the image */
-      metadata?: components["schemas"]["MetadataField"] | null;
-      /**
-       * Id
-       * @description The id of this instance of an invocation. Must be unique among all instances of invocations.
-       */
-      id: string;
-      /**
-       * Is Intermediate
-       * @description Whether or not this is an intermediate invocation.
-       * @default false
-       */
-      is_intermediate?: boolean;
-      /**
-       * Use Cache
-       * @description Whether or not to use the cache
-       * @default true
-       */
-      use_cache?: boolean;
-      /** @description The image to process */
-      image?: components["schemas"]["ImageField"];
-      /**
-       * Resolution
-       * @description Pixel resolution for output image
-       * @default 512
-       */
-      resolution?: number;
-      /**
-       * Mask Padding
-       * @description Amount to pad the hand mask by
-       * @default 30
-       */
-      mask_padding?: number;
-      /**
-       * Offload
-       * @description Offload model after usage
-       * @default false
-       */
-      offload?: boolean;
-      /**
-       * type
-       * @default hand_depth_mesh_graphormer_image_processor
-       * @constant
-       */
-      type: "hand_depth_mesh_graphormer_image_processor";
-    };
-    /**
-     * HandDepthOutput
-     * @description Base class for to output Meshgraphormer results
-     */
-    HandDepthOutput: {
-      /** @description Improved hands depth map */
-      image: components["schemas"]["ImageField"];
-      /** @description Hands area mask */
-      mask: components["schemas"]["ImageField"];
-      /**
-       * Width
-       * @description The width of the depth map in pixels
-       */
-      width: number;
-      /**
-       * Height
-       * @description The height of the depth map in pixels
-       */
-      height: number;
-      /**
-       * type
-       * @default meshgraphormer_output
-       * @constant
-       */
-      type: "meshgraphormer_output";
-    };
     /**
      * HED (softedge) Processor
      * @description Applies HED edge detection to image
@@ -4490,69 +4380,6 @@ export type components = {
        */
       format: "checkpoint";
     };
-    /**
-     * IPAdapterDiffusersConfig
-     * @description Model config for IP Adapter diffusers format models.
-     */
-    IPAdapterDiffusersConfig: {
-      /**
-       * Key
-       * @description A unique key for this model.
-       */
-      key: string;
-      /**
-       * Hash
-       * @description The hash of the model file(s).
-       */
-      hash: string;
-      /**
-       * Path
-       * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory.
-       */
-      path: string;
-      /**
-       * Name
-       * @description Name of the model.
-       */
-      name: string;
-      /** @description The base model. */
-      base: components["schemas"]["BaseModelType"];
-      /**
-       * Description
-       * @description Model description
-       */
-      description?: string | null;
-      /**
-       * Source
-       * @description The original source of the model (path, URL or repo_id).
-       */
-      source: string;
-      /** @description The type of source */
-      source_type: components["schemas"]["ModelSourceType"];
-      /**
-       * Source Api Response
-       * @description The original API response from the source, as stringified JSON.
-       */
-      source_api_response?: string | null;
-      /**
-       * Cover Image
-       * @description Url for image to preview model
-       */
-      cover_image?: string | null;
-      /**
-       * Type
-       * @default ip_adapter
-       * @constant
-       */
-      type: "ip_adapter";
-      /** Image Encoder Model Id */
-      image_encoder_model_id: string;
-      /**
-       * Format
-       * @constant
-       */
-      format: "invokeai";
-    };
     /** IPAdapterField */
     IPAdapterField: {
       /**
@@ -4647,6 +4474,69 @@ export type components = {
        */
       type: "ip_adapter";
     };
+    /**
+     * IPAdapterInvokeAIConfig
+     * @description Model config for IP Adapter diffusers format models.
+     */
+    IPAdapterInvokeAIConfig: {
+      /**
+       * Key
+       * @description A unique key for this model.
+       */
+      key: string;
+      /**
+       * Hash
+       * @description The hash of the model file(s).
+       */
+      hash: string;
+      /**
+       * Path
+       * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory.
+       */
+      path: string;
+      /**
+       * Name
+       * @description Name of the model.
+       */
+      name: string;
+      /** @description The base model. */
+      base: components["schemas"]["BaseModelType"];
+      /**
+       * Description
+       * @description Model description
+       */
+      description?: string | null;
+      /**
+       * Source
+       * @description The original source of the model (path, URL or repo_id).
+       */
+      source: string;
+      /** @description The type of source */
+      source_type: components["schemas"]["ModelSourceType"];
+      /**
+       * Source Api Response
+       * @description The original API response from the source, as stringified JSON.
+       */
+      source_api_response?: string | null;
+      /**
+       * Cover Image
+       * @description Url for image to preview model
+       */
+      cover_image?: string | null;
+      /**
+       * Type
+       * @default ip_adapter
+       * @constant
+       */
+      type: "ip_adapter";
+      /** Image Encoder Model Id */
+      image_encoder_model_id: string;
+      /**
+       * Format
+       * @constant
+       */
+      format: "invokeai";
+    };
     /**
      * IPAdapterMetadataField
      * @description IP Adapter Field, minus the CLIP Vision Encoder model
@@ -7665,7 +7555,7 @@ export type components = {
        * Config Out
        * @description After successful installation, this will hold the configuration object.
        */
-      config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null;
+      config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"]) | null;
       /**
        * Inplace
        * @description Leave model in its current location; otherwise install under models directory
@@ -7820,7 +7710,7 @@ export type components = {
      */
     ModelsList: {
       /** Models */
-      models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[];
+      models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"])[];
     };
     /**
      * Multiply Integers
@@ -11349,7 +11239,7 @@ export type operations = {
       /** @description Successful Response */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Validation Error */
@@ -11375,7 +11265,7 @@ export type operations = {
       /** @description The model configuration was retrieved successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11457,7 +11347,7 @@ export type operations = {
       /** @description The model was updated successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
@@ -11856,7 +11746,7 @@ export type operations = {
       /** @description Model converted successfully */
       200: {
         content: {
-          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterDiffusersConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
+          "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"];
         };
       };
       /** @description Bad request */
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts
index d24d4bddd5..bdbbc93963 100644
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@@ -46,7 +46,7 @@ export type LoRAModelConfig = S['LoRADiffusersConfig'] | S['LoRALyCORISConfig'];
 // TODO(MM2): Can we rename this from Vae -> VAE
 export type VAEModelConfig = S['VAECheckpointConfig'] | S['VAEDiffusersConfig'];
 export type ControlNetModelConfig = S['ControlNetDiffusersConfig'] | S['ControlNetCheckpointConfig'];
-export type IPAdapterModelConfig = S['IPAdapterDiffusersConfig'] | S['IPAdapterCheckpointConfig'];
+export type IPAdapterModelConfig = S['IPAdapterInvokeAIConfig'] | S['IPAdapterCheckpointConfig'];
 export type T2IAdapterModelConfig = S['T2IAdapterConfig'];
 type TextualInversionModelConfig = S['TextualInversionFileConfig'] | S['TextualInversionFolderConfig'];
 type DiffusersModelConfig = S['MainDiffusersConfig'];

From cd078b1865681e0a45c4cd1768963ed94dd58652 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Fri, 29 Mar 2024 11:58:10 +0530
Subject: [PATCH 10/14] fix: Raise a better error when incorrect CLIP Vision
 model is used

---
 invokeai/backend/ip_adapter/ip_adapter.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index 5444c76c8c..1155e571ae 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -146,9 +146,12 @@ class IPAdapter(RawModel):
     def get_image_embeds(self, pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image_embeds = image_encoder(clip_image.to(self.device, dtype=self.dtype)).image_embeds
-        image_prompt_embeds = self._image_proj_model(clip_image_embeds)
-        uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
-        return image_prompt_embeds, uncond_image_prompt_embeds
+        try:
+            image_prompt_embeds = self._image_proj_model(clip_image_embeds)
+            uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
+            return image_prompt_embeds, uncond_image_prompt_embeds
+        except RuntimeError:
+            raise RuntimeError("Selected CLIP Vision Model is incompatible with the current IP Adapter")
 
 
 class IPAdapterPlus(IPAdapter):
@@ -169,12 +172,15 @@ class IPAdapterPlus(IPAdapter):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image = clip_image.to(self.device, dtype=self.dtype)
         clip_image_embeds = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
-        image_prompt_embeds = self._image_proj_model(clip_image_embeds)
         uncond_clip_image_embeds = image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[
             -2
         ]
-        uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
-        return image_prompt_embeds, uncond_image_prompt_embeds
+        try:
+            image_prompt_embeds = self._image_proj_model(clip_image_embeds)
+            uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
+            return image_prompt_embeds, uncond_image_prompt_embeds
+        except RuntimeError:
+            raise RuntimeError("Selected CLIP Vision Model is incompatible with the current IP Adapter")
 
 
 class IPAdapterFull(IPAdapterPlus):

From 56ed697c237d54ce808cda937c1871e15657a590 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Fri, 29 Mar 2024 12:12:16 +0530
Subject: [PATCH 11/14] fix: clip vision model auto param

Setting to 'auto' works only for InvokeAI config and auto detects the SD model but will override if user explicitly sets it. If auto used with checkpoint models, we raise an error. Checkpoints will always need to set to non-auto.
---
 invokeai/app/invocations/ip_adapter.py          | 17 ++++++++++++-----
 .../frontend/web/src/services/api/schema.ts     | 10 +++++-----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py
index 5e24721b2f..40a667c9d0 100644
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@@ -65,8 +65,10 @@ class IPAdapterInvocation(BaseInvocation):
         ui_order=-1,
         ui_type=UIType.IPAdapterModel,
     )
-    clip_vision_model: Literal["ViT-H", "ViT-G"] = InputField(
-        description="CLIP Vision model to use", default="ViT-H", ui_order=2
+    clip_vision_model: Literal["auto", "ViT-H", "ViT-G"] = InputField(
+        description="CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.",
+        default="auto",
+        ui_order=2,
     )
     weight: Union[float, List[float]] = InputField(
         default=1, description="The weight given to the IP-Adapter", title="Weight"
@@ -94,9 +96,14 @@ class IPAdapterInvocation(BaseInvocation):
         ip_adapter_info = context.models.get_config(self.ip_adapter_model.key)
         assert isinstance(ip_adapter_info, (IPAdapterInvokeAIConfig, IPAdapterCheckpointConfig))
 
-        if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
-            image_encoder_model_id = ip_adapter_info.image_encoder_model_id
-            image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+        if self.clip_vision_model == "auto":
+            if isinstance(ip_adapter_info, IPAdapterInvokeAIConfig):
+                image_encoder_model_id = ip_adapter_info.image_encoder_model_id
+                image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
+            else:
+                raise RuntimeError(
+                    "You need to set the appropriate CLIP Vision model for checkpoint IP Adapter models."
+                )
         else:
             image_encoder_model_name = CLIP_VISION_MODEL_MAP[self.clip_vision_model]
 
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index 6557da78ff..b75463a8c3 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -4122,7 +4122,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["DivideInvocation"];
+        [key: string]: components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"];
       };
       /**
        * Edges
@@ -4159,7 +4159,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["StringCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ControlOutput"];
+        [key: string]: components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ControlOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["GradientMaskOutput"];
       };
       /**
        * Errors
@@ -4444,11 +4444,11 @@ export type components = {
       ip_adapter_model: components["schemas"]["ModelIdentifierField"];
       /**
        * Clip Vision Model
-       * @description CLIP Vision model to use
-       * @default ViT-H
+       * @description CLIP Vision model to use. Overrides model settings. Mandatory for checkpoint models.
+       * @default auto
        * @enum {string}
        */
-      clip_vision_model?: "ViT-H" | "ViT-G";
+      clip_vision_model?: "auto" | "ViT-H" | "ViT-G";
       /**
        * Weight
        * @description The weight given to the IP-Adapter

From 6e4c2d3685c9aa765c88228baae8607994452cde Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Fri, 29 Mar 2024 12:34:56 +0530
Subject: [PATCH 12/14] fix: Fail when unexpected keys are found in IP Adapter
 models

---
 invokeai/backend/ip_adapter/ip_adapter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index 1155e571ae..02788c0ba6 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -214,6 +214,8 @@ def load_ip_adapter_tensors(ip_adapter_ckpt_path: str, device: str) -> IPAdapter
                 state_dict["image_proj"][key.replace("image_proj.", "")] = model.get_tensor(key)
             elif key.startswith("ip_adapter."):
                 state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = model.get_tensor(key)
+            else:
+                raise RuntimeError(f"Encountered unexpected IP Adapter state dict key: '{key}'.")
     else:
         ip_adapter_diffusers_checkpoint_path = ip_adapter_ckpt_path + "/ip_adapter.bin"
         state_dict = torch.load(ip_adapter_diffusers_checkpoint_path, map_location="cpu")

From 298cae5bb9389d63ca5cd72bf26f2ea965879a11 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Fri, 29 Mar 2024 12:41:10 +0530
Subject: [PATCH 13/14] Update schema.ts

---
 .../frontend/web/src/services/api/schema.ts   | 39 +------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
index b75463a8c3..497a55fc08 100644
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@@ -136,16 +136,6 @@ export type paths = {
      */
     delete: operations["cancel_model_install_job"];
   };
-  "/api/v2/models/sync": {
-    /**
-     * Sync Models To Config
-     * @description Traverse the models and autoimport directories.
-     *
-     * Model files without a corresponding
-     * record in the database are added. Orphan records without a models file are deleted.
-     */
-    patch: operations["sync_models_to_config"];
-  };
   "/api/v2/models/convert/{key}": {
     /**
      * Convert Model
@@ -4122,7 +4112,7 @@ export type components = {
        * @description The nodes in this graph
        */
       nodes: {
-        [key: string]: components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"];
+        [key: string]: components["schemas"]["FloatInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["DepthAnythingImageProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["DWOpenposeImageProcessorInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["CLIPSkipInvocation"];
       };
       /**
        * Edges
@@ -4159,7 +4149,7 @@ export type components = {
        * @description The results of node executions
        */
       results: {
-        [key: string]: components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ControlOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["GradientMaskOutput"];
+        [key: string]: components["schemas"]["ConditioningOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FloatCollectionOutput"];
       };
       /**
        * Errors
@@ -11704,31 +11694,6 @@ export type operations = {
       };
     };
   };
-  /**
-   * Sync Models To Config
-   * @description Traverse the models and autoimport directories.
-   *
-   * Model files without a corresponding
-   * record in the database are added. Orphan records without a models file are deleted.
-   */
-  sync_models_to_config: {
-    responses: {
-      /** @description Successful Response */
-      200: {
-        content: {
-          "application/json": unknown;
-        };
-      };
-      /** @description Model config record database resynced with files on disk */
-      204: {
-        content: never;
-      };
-      /** @description Bad request */
-      400: {
-        content: never;
-      };
-    };
-  };
   /**
    * Convert Model
    * @description Permanently convert a model into diffusers format, replacing the safetensors version.

From 23390f151698ef56663ce329f0d093c3445c1226 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Mon, 1 Apr 2024 06:37:38 +0530
Subject: [PATCH 14/14] cleanup: use load_file of safetensors directly for
 loading ip adapters

---
 invokeai/backend/ip_adapter/ip_adapter.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index 02788c0ba6..920cb3780a 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -3,12 +3,14 @@
 
 from typing import List, Optional, TypedDict, Union
 
+import safetensors
+import safetensors.torch
 import torch
 from PIL import Image
-from safetensors import safe_open
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 
 from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionWeights
+from invokeai.backend.util.devices import choose_torch_device
 
 from ..raw_model import RawModel
 from .resampler import Resampler
@@ -208,12 +210,12 @@ def load_ip_adapter_tensors(ip_adapter_ckpt_path: str, device: str) -> IPAdapter
     state_dict: IPAdapterStateDict = {"ip_adapter": {}, "image_proj": {}}
 
     if ip_adapter_ckpt_path.endswith("safetensors"):
-        model = safe_open(ip_adapter_ckpt_path, device=device, framework="pt")
+        model = safetensors.torch.load_file(ip_adapter_ckpt_path, device=device)
         for key in model.keys():
             if key.startswith("image_proj."):
-                state_dict["image_proj"][key.replace("image_proj.", "")] = model.get_tensor(key)
+                state_dict["image_proj"][key.replace("image_proj.", "")] = model[key]
             elif key.startswith("ip_adapter."):
-                state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = model.get_tensor(key)
+                state_dict["ip_adapter"][key.replace("ip_adapter.", "")] = model[key]
             else:
                 raise RuntimeError(f"Encountered unexpected IP Adapter state dict key: '{key}'.")
     else: