Lookup IP-Adapter linked image encoder from disk instead of storing in model config metadata.

2024-08-30 20:32:17 +00:00 · 2023-09-14 23:06:57 -04:00 · 2023-09-14 23:06:57 -04:00 · 990ce9a1da
commit 990ce9a1da
parent 18095ecc44
5 changed files with 53 additions and 39 deletions
--- a/invokeai/app/invocations/ip_adapter.py
+++ b/invokeai/app/invocations/ip_adapter.py
@ -1,3 +1,5 @@
+import os
+
 from pydantic import BaseModel, Field

 from invokeai.app.invocations.baseinvocation import (
@ -14,6 +16,9 @@ from invokeai.app.invocations.baseinvocation import (
 )
 from invokeai.app.invocations.primitives import ImageField
 from invokeai.backend.model_management.models.base import BaseModelType, ModelType
+from invokeai.backend.model_management.models.ip_adapter import (
+    get_ip_adapter_image_encoder_model_id,
+)


 class IPAdapterModelField(BaseModel):
@ -57,7 +62,15 @@ class IPAdapterInvocation(BaseInvocation):
        ip_adapter_info = context.services.model_manager.model_info(
            self.ip_adapter_model.model_name, self.ip_adapter_model.base_model, ModelType.IPAdapter
        )
-        image_encoder_model_name = ip_adapter_info["image_encoder_model"].split("/")[-1].strip()
+        # HACK(ryand): This is bad for a couple of reasons: 1) we are bypassing the model manager to read the model
+        # directly, and 2) we are reading from disk every time this invocation is called without caching the result.
+        # A better solution would be to store the image encoder model reference in the IP-Adapter model info, but this
+        # is currently messy due to differences between how the model info is generated when installing a model from
+        # disk vs. downloading the model.
+        image_encoder_model_id = get_ip_adapter_image_encoder_model_id(
+            os.path.join(context.services.configuration.get_config().models_path, ip_adapter_info["path"])
+        )
+        image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
        image_encoder_model = CLIPVisionModelField(
            model_name=image_encoder_model_name,
            base_model=BaseModelType.Any,
--- a/invokeai/backend/ip_adapter/README.md
+++ b/invokeai/backend/ip_adapter/README.md
@ -31,6 +31,15 @@ ip_adapter_sd15/

 The weights in `ip_adapter.bin` are stored in a nested dict, which is not supported by `safetensors`. This could be solved by splitting `ip_adapter.bin` into multiple files, but for now we have decided to maintain consistency with the checkpoint structure used in the official [h94/IP-Adapter](https://huggingface.co/h94/IP-Adapter) repo.

-## InvokeAI-Hosted IP-Adapters
+## InvokeAI Hosted IP-Adapters

-TODO(ryand): Add list
+Image Encoders:
+- [InvokeAI/ip_adapter_sd_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sd_image_encoder)
+- [InvokeAI/ip_adapter_sdxl_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sdxl_image_encoder)
+
+IP-Adapters:
+- [InvokeAI/ip_adapter_sd15](https://huggingface.co/InvokeAI/ip_adapter_sd15)
+- [InvokeAI/ip_adapter_plus_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_sd15)
+- [InvokeAI/ip_adapter_plus_face_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_face_sd15)
+- [InvokeAI/ip_adapter_sdxl](https://huggingface.co/InvokeAI/ip_adapter_sdxl)
+- [InvokeAI/ip_adapter_sdxl_vit_h](https://huggingface.co/InvokeAI/ip_adapter_sdxl_vit_h)
--- a/invokeai/backend/model_management/model_probe.py
+++ b/invokeai/backend/model_management/model_probe.py
@ -511,9 +511,7 @@ class ControlNetFolderProbe(FolderProbeBase):
            else (
                BaseModelType.StableDiffusion2
                if dimension == 1024
-                else BaseModelType.StableDiffusionXL
-                if dimension == 2048
-                else None
+                else BaseModelType.StableDiffusionXL if dimension == 2048 else None
            )
        )
        if not base_model:
@ -543,7 +541,7 @@ class IPAdapterFolderProbe(FolderProbeBase):
        if not model_file.exists():
            raise InvalidModelException("Unknown IP-Adapter model format.")

-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, map_location="cpu")
        cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1]
        if cross_attention_dim == 768:
            return BaseModelType.StableDiffusion1
--- a/invokeai/backend/model_management/models/ip_adapter.py
+++ b/invokeai/backend/model_management/models/ip_adapter.py
@ -29,7 +29,6 @@ class IPAdapterModelFormat(str, Enum):
 class IPAdapterModel(ModelBase):
    class InvokeAIConfig(ModelConfigBase):
        model_format: Literal[IPAdapterModelFormat.InvokeAI]
-        image_encoder_model: str

    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
        assert model_type == ModelType.IPAdapter
@ -50,19 +49,6 @@ class IPAdapterModel(ModelBase):

        raise InvalidModelException(f"Unexpected IP-Adapter model format: {path}")

-    @classmethod
-    def probe_config(cls, path: str, **kwargs) -> ModelConfigBase:
-        image_encoder_config_file = os.path.join(path, "image_encoder.txt")
-
-        with open(image_encoder_config_file, "r") as f:
-            image_encoder_model = f.readline().strip()
-
-        return cls.create_config(
-            path=path,
-            model_format=cls.detect_format(path),
-            image_encoder_model=image_encoder_model,
-        )
-
    @classproperty
    def save_to_config(cls) -> bool:
        return True
@ -98,3 +84,13 @@ class IPAdapterModel(ModelBase):
            return model_path
        else:
            raise ValueError(f"Unsupported format: '{format}'.")
+
+
+def get_ip_adapter_image_encoder_model_id(model_path: str):
+    """Read the ID of the image encoder associated with the IP-Adapter at `model_path`."""
+    image_encoder_config_file = os.path.join(model_path, "image_encoder.txt")
+
+    with open(image_encoder_config_file, "r") as f:
+        image_encoder_model = f.readline().strip()
+
+    return image_encoder_model
--- a/invokeai/frontend/web/src/services/api/schema.d.ts
+++ b/invokeai/frontend/web/src/services/api/schema.d.ts
@ -2549,8 +2549,6 @@ export type components = {
       */
      model_format: "invokeai";
      error?: components["schemas"]["ModelError"];
-      /** Image Encoder Model */
-      image_encoder_model: string;
    };
    /**
     * IPAdapterOutput
@ -7262,17 +7260,29 @@ export type components = {
      ui_order?: number;
    };
    /**
-     * StableDiffusion2ModelFormat
+     * ControlNetModelFormat
     * @description An enumeration.
     * @enum {string}
     */
-    StableDiffusion2ModelFormat: "checkpoint" | "diffusers";
+    ControlNetModelFormat: "checkpoint" | "diffusers";
+    /**
+     * StableDiffusionXLModelFormat
+     * @description An enumeration.
+     * @enum {string}
+     */
+    StableDiffusionXLModelFormat: "checkpoint" | "diffusers";
    /**
     * StableDiffusionOnnxModelFormat
     * @description An enumeration.
     * @enum {string}
     */
    StableDiffusionOnnxModelFormat: "olive" | "onnx";
+    /**
+     * IPAdapterModelFormat
+     * @description An enumeration.
+     * @enum {string}
+     */
+    IPAdapterModelFormat: "invokeai";
    /**
     * StableDiffusion1ModelFormat
     * @description An enumeration.
@ -7286,23 +7296,11 @@ export type components = {
     */
    CLIPVisionModelFormat: "diffusers";
    /**
-     * StableDiffusionXLModelFormat
+     * StableDiffusion2ModelFormat
     * @description An enumeration.
     * @enum {string}
     */
-    StableDiffusionXLModelFormat: "checkpoint" | "diffusers";
-    /**
-     * IPAdapterModelFormat
-     * @description An enumeration.
-     * @enum {string}
-     */
-    IPAdapterModelFormat: "invokeai";
-    /**
-     * ControlNetModelFormat
-     * @description An enumeration.
-     * @enum {string}
-     */
-    ControlNetModelFormat: "checkpoint" | "diffusers";
+    StableDiffusion2ModelFormat: "checkpoint" | "diffusers";
  };
  responses: never;
  parameters: never;