diff --git a/invokeai/app/invocations/ip_adapter.py b/invokeai/app/invocations/ip_adapter.py index 84183d8a06..9857054b47 100644 --- a/invokeai/app/invocations/ip_adapter.py +++ b/invokeai/app/invocations/ip_adapter.py @@ -1,3 +1,5 @@ +import os + from pydantic import BaseModel, Field from invokeai.app.invocations.baseinvocation import ( @@ -14,6 +16,9 @@ from invokeai.app.invocations.baseinvocation import ( ) from invokeai.app.invocations.primitives import ImageField from invokeai.backend.model_management.models.base import BaseModelType, ModelType +from invokeai.backend.model_management.models.ip_adapter import ( + get_ip_adapter_image_encoder_model_id, +) class IPAdapterModelField(BaseModel): @@ -57,7 +62,15 @@ class IPAdapterInvocation(BaseInvocation): ip_adapter_info = context.services.model_manager.model_info( self.ip_adapter_model.model_name, self.ip_adapter_model.base_model, ModelType.IPAdapter ) - image_encoder_model_name = ip_adapter_info["image_encoder_model"].split("/")[-1].strip() + # HACK(ryand): This is bad for a couple of reasons: 1) we are bypassing the model manager to read the model + # directly, and 2) we are reading from disk every time this invocation is called without caching the result. + # A better solution would be to store the image encoder model reference in the IP-Adapter model info, but this + # is currently messy due to differences between how the model info is generated when installing a model from + # disk vs. downloading the model. + image_encoder_model_id = get_ip_adapter_image_encoder_model_id( + os.path.join(context.services.configuration.get_config().models_path, ip_adapter_info["path"]) + ) + image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip() image_encoder_model = CLIPVisionModelField( model_name=image_encoder_model_name, base_model=BaseModelType.Any, diff --git a/invokeai/backend/ip_adapter/README.md b/invokeai/backend/ip_adapter/README.md index 6ba18e7ade..c85acae498 100644 --- a/invokeai/backend/ip_adapter/README.md +++ b/invokeai/backend/ip_adapter/README.md @@ -31,6 +31,15 @@ ip_adapter_sd15/ The weights in `ip_adapter.bin` are stored in a nested dict, which is not supported by `safetensors`. This could be solved by splitting `ip_adapter.bin` into multiple files, but for now we have decided to maintain consistency with the checkpoint structure used in the official [h94/IP-Adapter](https://huggingface.co/h94/IP-Adapter) repo. -## InvokeAI-Hosted IP-Adapters +## InvokeAI Hosted IP-Adapters -TODO(ryand): Add list +Image Encoders: +- [InvokeAI/ip_adapter_sd_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sd_image_encoder) +- [InvokeAI/ip_adapter_sdxl_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sdxl_image_encoder) + +IP-Adapters: +- [InvokeAI/ip_adapter_sd15](https://huggingface.co/InvokeAI/ip_adapter_sd15) +- [InvokeAI/ip_adapter_plus_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_sd15) +- [InvokeAI/ip_adapter_plus_face_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_face_sd15) +- [InvokeAI/ip_adapter_sdxl](https://huggingface.co/InvokeAI/ip_adapter_sdxl) +- [InvokeAI/ip_adapter_sdxl_vit_h](https://huggingface.co/InvokeAI/ip_adapter_sdxl_vit_h) \ No newline at end of file diff --git a/invokeai/backend/model_management/model_probe.py b/invokeai/backend/model_management/model_probe.py index 046f596c1e..354ec0c675 100644 --- a/invokeai/backend/model_management/model_probe.py +++ b/invokeai/backend/model_management/model_probe.py @@ -511,9 +511,7 @@ class ControlNetFolderProbe(FolderProbeBase): else ( BaseModelType.StableDiffusion2 if dimension == 1024 - else BaseModelType.StableDiffusionXL - if dimension == 2048 - else None + else BaseModelType.StableDiffusionXL if dimension == 2048 else None ) ) if not base_model: @@ -543,7 +541,7 @@ class IPAdapterFolderProbe(FolderProbeBase): if not model_file.exists(): raise InvalidModelException("Unknown IP-Adapter model format.") - state_dict = torch.load(model_file) + state_dict = torch.load(model_file, map_location="cpu") cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1] if cross_attention_dim == 768: return BaseModelType.StableDiffusion1 diff --git a/invokeai/backend/model_management/models/ip_adapter.py b/invokeai/backend/model_management/models/ip_adapter.py index 70f42ec2a9..208d5e7c4d 100644 --- a/invokeai/backend/model_management/models/ip_adapter.py +++ b/invokeai/backend/model_management/models/ip_adapter.py @@ -29,7 +29,6 @@ class IPAdapterModelFormat(str, Enum): class IPAdapterModel(ModelBase): class InvokeAIConfig(ModelConfigBase): model_format: Literal[IPAdapterModelFormat.InvokeAI] - image_encoder_model: str def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType): assert model_type == ModelType.IPAdapter @@ -50,19 +49,6 @@ class IPAdapterModel(ModelBase): raise InvalidModelException(f"Unexpected IP-Adapter model format: {path}") - @classmethod - def probe_config(cls, path: str, **kwargs) -> ModelConfigBase: - image_encoder_config_file = os.path.join(path, "image_encoder.txt") - - with open(image_encoder_config_file, "r") as f: - image_encoder_model = f.readline().strip() - - return cls.create_config( - path=path, - model_format=cls.detect_format(path), - image_encoder_model=image_encoder_model, - ) - @classproperty def save_to_config(cls) -> bool: return True @@ -98,3 +84,13 @@ class IPAdapterModel(ModelBase): return model_path else: raise ValueError(f"Unsupported format: '{format}'.") + + +def get_ip_adapter_image_encoder_model_id(model_path: str): + """Read the ID of the image encoder associated with the IP-Adapter at `model_path`.""" + image_encoder_config_file = os.path.join(model_path, "image_encoder.txt") + + with open(image_encoder_config_file, "r") as f: + image_encoder_model = f.readline().strip() + + return image_encoder_model diff --git a/invokeai/frontend/web/src/services/api/schema.d.ts b/invokeai/frontend/web/src/services/api/schema.d.ts index 915c0c50ca..4252927b95 100644 --- a/invokeai/frontend/web/src/services/api/schema.d.ts +++ b/invokeai/frontend/web/src/services/api/schema.d.ts @@ -2549,8 +2549,6 @@ export type components = { */ model_format: "invokeai"; error?: components["schemas"]["ModelError"]; - /** Image Encoder Model */ - image_encoder_model: string; }; /** * IPAdapterOutput @@ -7262,17 +7260,29 @@ export type components = { ui_order?: number; }; /** - * StableDiffusion2ModelFormat + * ControlNetModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; + ControlNetModelFormat: "checkpoint" | "diffusers"; + /** + * StableDiffusionXLModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; /** * StableDiffusionOnnxModelFormat * @description An enumeration. * @enum {string} */ StableDiffusionOnnxModelFormat: "olive" | "onnx"; + /** + * IPAdapterModelFormat + * @description An enumeration. + * @enum {string} + */ + IPAdapterModelFormat: "invokeai"; /** * StableDiffusion1ModelFormat * @description An enumeration. @@ -7286,23 +7296,11 @@ export type components = { */ CLIPVisionModelFormat: "diffusers"; /** - * StableDiffusionXLModelFormat + * StableDiffusion2ModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; - /** - * IPAdapterModelFormat - * @description An enumeration. - * @enum {string} - */ - IPAdapterModelFormat: "invokeai"; - /** - * ControlNetModelFormat - * @description An enumeration. - * @enum {string} - */ - ControlNetModelFormat: "checkpoint" | "diffusers"; + StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; }; responses: never; parameters: never;