Lookup IP-Adapter linked image encoder from disk instead of storing in model config metadata.

This commit is contained in:
Ryan Dick 2023-09-14 23:06:57 -04:00
parent 18095ecc44
commit 990ce9a1da
5 changed files with 53 additions and 39 deletions

View File

@ -1,3 +1,5 @@
import os
from pydantic import BaseModel, Field
from invokeai.app.invocations.baseinvocation import (
@ -14,6 +16,9 @@ from invokeai.app.invocations.baseinvocation import (
)
from invokeai.app.invocations.primitives import ImageField
from invokeai.backend.model_management.models.base import BaseModelType, ModelType
from invokeai.backend.model_management.models.ip_adapter import (
get_ip_adapter_image_encoder_model_id,
)
class IPAdapterModelField(BaseModel):
@ -57,7 +62,15 @@ class IPAdapterInvocation(BaseInvocation):
ip_adapter_info = context.services.model_manager.model_info(
self.ip_adapter_model.model_name, self.ip_adapter_model.base_model, ModelType.IPAdapter
)
image_encoder_model_name = ip_adapter_info["image_encoder_model"].split("/")[-1].strip()
# HACK(ryand): This is bad for a couple of reasons: 1) we are bypassing the model manager to read the model
# directly, and 2) we are reading from disk every time this invocation is called without caching the result.
# A better solution would be to store the image encoder model reference in the IP-Adapter model info, but this
# is currently messy due to differences between how the model info is generated when installing a model from
# disk vs. downloading the model.
image_encoder_model_id = get_ip_adapter_image_encoder_model_id(
os.path.join(context.services.configuration.get_config().models_path, ip_adapter_info["path"])
)
image_encoder_model_name = image_encoder_model_id.split("/")[-1].strip()
image_encoder_model = CLIPVisionModelField(
model_name=image_encoder_model_name,
base_model=BaseModelType.Any,

View File

@ -31,6 +31,15 @@ ip_adapter_sd15/
The weights in `ip_adapter.bin` are stored in a nested dict, which is not supported by `safetensors`. This could be solved by splitting `ip_adapter.bin` into multiple files, but for now we have decided to maintain consistency with the checkpoint structure used in the official [h94/IP-Adapter](https://huggingface.co/h94/IP-Adapter) repo.
## InvokeAI-Hosted IP-Adapters
## InvokeAI Hosted IP-Adapters
TODO(ryand): Add list
Image Encoders:
- [InvokeAI/ip_adapter_sd_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sd_image_encoder)
- [InvokeAI/ip_adapter_sdxl_image_encoder](https://huggingface.co/InvokeAI/ip_adapter_sdxl_image_encoder)
IP-Adapters:
- [InvokeAI/ip_adapter_sd15](https://huggingface.co/InvokeAI/ip_adapter_sd15)
- [InvokeAI/ip_adapter_plus_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_sd15)
- [InvokeAI/ip_adapter_plus_face_sd15](https://huggingface.co/InvokeAI/ip_adapter_plus_face_sd15)
- [InvokeAI/ip_adapter_sdxl](https://huggingface.co/InvokeAI/ip_adapter_sdxl)
- [InvokeAI/ip_adapter_sdxl_vit_h](https://huggingface.co/InvokeAI/ip_adapter_sdxl_vit_h)

View File

@ -511,9 +511,7 @@ class ControlNetFolderProbe(FolderProbeBase):
else (
BaseModelType.StableDiffusion2
if dimension == 1024
else BaseModelType.StableDiffusionXL
if dimension == 2048
else None
else BaseModelType.StableDiffusionXL if dimension == 2048 else None
)
)
if not base_model:
@ -543,7 +541,7 @@ class IPAdapterFolderProbe(FolderProbeBase):
if not model_file.exists():
raise InvalidModelException("Unknown IP-Adapter model format.")
state_dict = torch.load(model_file)
state_dict = torch.load(model_file, map_location="cpu")
cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1]
if cross_attention_dim == 768:
return BaseModelType.StableDiffusion1

View File

@ -29,7 +29,6 @@ class IPAdapterModelFormat(str, Enum):
class IPAdapterModel(ModelBase):
class InvokeAIConfig(ModelConfigBase):
model_format: Literal[IPAdapterModelFormat.InvokeAI]
image_encoder_model: str
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.IPAdapter
@ -50,19 +49,6 @@ class IPAdapterModel(ModelBase):
raise InvalidModelException(f"Unexpected IP-Adapter model format: {path}")
@classmethod
def probe_config(cls, path: str, **kwargs) -> ModelConfigBase:
image_encoder_config_file = os.path.join(path, "image_encoder.txt")
with open(image_encoder_config_file, "r") as f:
image_encoder_model = f.readline().strip()
return cls.create_config(
path=path,
model_format=cls.detect_format(path),
image_encoder_model=image_encoder_model,
)
@classproperty
def save_to_config(cls) -> bool:
return True
@ -98,3 +84,13 @@ class IPAdapterModel(ModelBase):
return model_path
else:
raise ValueError(f"Unsupported format: '{format}'.")
def get_ip_adapter_image_encoder_model_id(model_path: str):
"""Read the ID of the image encoder associated with the IP-Adapter at `model_path`."""
image_encoder_config_file = os.path.join(model_path, "image_encoder.txt")
with open(image_encoder_config_file, "r") as f:
image_encoder_model = f.readline().strip()
return image_encoder_model

View File

@ -2549,8 +2549,6 @@ export type components = {
*/
model_format: "invokeai";
error?: components["schemas"]["ModelError"];
/** Image Encoder Model */
image_encoder_model: string;
};
/**
* IPAdapterOutput
@ -7262,17 +7260,29 @@ export type components = {
ui_order?: number;
};
/**
* StableDiffusion2ModelFormat
* ControlNetModelFormat
* @description An enumeration.
* @enum {string}
*/
StableDiffusion2ModelFormat: "checkpoint" | "diffusers";
ControlNetModelFormat: "checkpoint" | "diffusers";
/**
* StableDiffusionXLModelFormat
* @description An enumeration.
* @enum {string}
*/
StableDiffusionXLModelFormat: "checkpoint" | "diffusers";
/**
* StableDiffusionOnnxModelFormat
* @description An enumeration.
* @enum {string}
*/
StableDiffusionOnnxModelFormat: "olive" | "onnx";
/**
* IPAdapterModelFormat
* @description An enumeration.
* @enum {string}
*/
IPAdapterModelFormat: "invokeai";
/**
* StableDiffusion1ModelFormat
* @description An enumeration.
@ -7286,23 +7296,11 @@ export type components = {
*/
CLIPVisionModelFormat: "diffusers";
/**
* StableDiffusionXLModelFormat
* StableDiffusion2ModelFormat
* @description An enumeration.
* @enum {string}
*/
StableDiffusionXLModelFormat: "checkpoint" | "diffusers";
/**
* IPAdapterModelFormat
* @description An enumeration.
* @enum {string}
*/
IPAdapterModelFormat: "invokeai";
/**
* ControlNetModelFormat
* @description An enumeration.
* @enum {string}
*/
ControlNetModelFormat: "checkpoint" | "diffusers";
StableDiffusion2ModelFormat: "checkpoint" | "diffusers";
};
responses: never;
parameters: never;