Merge branch 'main' into refactor/model_manager_instantiate

2024-08-30 20:32:17 +00:00 · 2023-08-01 11:09:43 -07:00
parent bacdf985f1 403a6e88f2
commit 5998509888
63 changed files with 2665 additions and 595 deletions
--- a/invokeai/backend/install/model_install_backend.py
+++ b/invokeai/backend/install/model_install_backend.py
@ -7,11 +7,12 @@ import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import List, Dict, Callable, Union, Set, Optional
+from typing import Optional, List, Dict, Callable, Union, Set

 import requests
 from diffusers import DiffusionPipeline
 from diffusers import logging as dlogging
+import onnx
 from huggingface_hub import hf_hub_url, HfFolder, HfApi
 from omegaconf import OmegaConf
 from tqdm import tqdm
@ -86,8 +87,8 @@ class ModelLoadInfo:
    name: str
    model_type: ModelType
    base_type: BaseModelType
-    path: Path = None
-    repo_id: str = None
+    path: Optional[Path] = None
+    repo_id: Optional[str] = None
    description: str = ""
    installed: bool = False
    recommended: bool = False
@ -302,8 +303,10 @@ class ModelInstall(object):

        with TemporaryDirectory(dir=self.config.models_path) as staging:
            staging = Path(staging)
-            if "model_index.json" in files:
+            if "model_index.json" in files and "unet/model.onnx" not in files:
                location = self._download_hf_pipeline(repo_id, staging)  # pipeline
+            elif "unet/model.onnx" in files:
+                location = self._download_hf_model(repo_id, files, staging)
            else:
                for suffix in ["safetensors", "bin"]:
                    if f"pytorch_lora_weights.{suffix}" in files:
@ -368,7 +371,7 @@ class ModelInstall(object):
            model_format=info.format,
        )
        legacy_conf = None
-        if info.model_type == ModelType.Main:
+        if info.model_type == ModelType.Main or info.model_type == ModelType.ONNX:
            attributes.update(
                dict(
                    variant=info.variant_type,
@ -433,8 +436,13 @@ class ModelInstall(object):
        location = staging / name
        paths = list()
        for filename in files:
+            filePath = Path(filename)
            p = hf_download_with_resume(
-                repo_id, model_dir=location, model_name=filename, access_token=self.access_token
+                repo_id,
+                model_dir=location / filePath.parent,
+                model_name=filePath.name,
+                access_token=self.access_token,
+                subfolder=filePath.parent,
            )
            if p:
                paths.append(p)
@ -482,11 +490,12 @@ def hf_download_with_resume(
    model_name: str,
    model_dest: Path = None,
    access_token: str = None,
+    subfolder: str = None,
 ) -> Path:
    model_dest = model_dest or Path(os.path.join(model_dir, model_name))
    os.makedirs(model_dir, exist_ok=True)

-    url = hf_hub_url(repo_id, model_name)
+    url = hf_hub_url(repo_id, model_name, subfolder=subfolder)

    header = {"Authorization": f"Bearer {access_token}"} if access_token else {}
    open_mode = "wb"
--- a/invokeai/backend/model_management/init.py
+++ b/invokeai/backend/model_management/init.py
@ -3,6 +3,7 @@ Initialization file for invokeai.backend.model_management
 """
 from .model_manager import ModelManager, ModelInfo, AddModelResult, SchedulerPredictionType
 from .model_cache import ModelCache
+from .lora import ModelPatcher, ONNXModelPatcher
 from .models import (
    BaseModelType,
    ModelType,
--- a/invokeai/backend/model_management/lora.py
+++ b/invokeai/backend/model_management/lora.py
@ -6,11 +6,22 @@ from typing import Optional, Dict, Tuple, Any, Union, List
 from pathlib import Path

 import torch
+from safetensors.torch import load_file
+from torch.utils.hooks import RemovableHandle
+
+from diffusers.models import UNet2DConditionModel
+from transformers import CLIPTextModel
+from onnx import numpy_helper
+from onnxruntime import OrtValue
+import numpy as np
+
 from compel.embeddings_provider import BaseTextualInversionManager
 from diffusers.models import UNet2DConditionModel
 from safetensors.torch import load_file
 from transformers import CLIPTextModel, CLIPTokenizer

+# TODO: rename and split this file
+

 class LoRALayerBase:
    # rank: Optional[int]
@ -698,3 +709,186 @@ class TextualInversionManager(BaseTextualInversionManager):
                new_token_ids.extend(self.pad_tokens[token_id])

        return new_token_ids
+
+
+class ONNXModelPatcher:
+    from .models.base import IAIOnnxRuntimeModel, OnnxRuntimeModel
+
+    @classmethod
+    @contextmanager
+    def apply_lora_unet(
+        cls,
+        unet: OnnxRuntimeModel,
+        loras: List[Tuple[LoRAModel, float]],
+    ):
+        with cls.apply_lora(unet, loras, "lora_unet_"):
+            yield
+
+    @classmethod
+    @contextmanager
+    def apply_lora_text_encoder(
+        cls,
+        text_encoder: OnnxRuntimeModel,
+        loras: List[Tuple[LoRAModel, float]],
+    ):
+        with cls.apply_lora(text_encoder, loras, "lora_te_"):
+            yield
+
+    # based on
+    # https://github.com/ssube/onnx-web/blob/ca2e436f0623e18b4cfe8a0363fcfcf10508acf7/api/onnx_web/convert/diffusion/lora.py#L323
+    @classmethod
+    @contextmanager
+    def apply_lora(
+        cls,
+        model: IAIOnnxRuntimeModel,
+        loras: List[Tuple[LoraModel, float]],
+        prefix: str,
+    ):
+        from .models.base import IAIOnnxRuntimeModel
+
+        if not isinstance(model, IAIOnnxRuntimeModel):
+            raise Exception("Only IAIOnnxRuntimeModel models supported")
+
+        orig_weights = dict()
+
+        try:
+            blended_loras = dict()
+
+            for lora, lora_weight in loras:
+                for layer_key, layer in lora.layers.items():
+                    if not layer_key.startswith(prefix):
+                        continue
+
+                    layer.to(dtype=torch.float32)
+                    layer_key = layer_key.replace(prefix, "")
+                    layer_weight = layer.get_weight().detach().cpu().numpy() * lora_weight
+                    if layer_key is blended_loras:
+                        blended_loras[layer_key] += layer_weight
+                    else:
+                        blended_loras[layer_key] = layer_weight
+
+            node_names = dict()
+            for node in model.nodes.values():
+                node_names[node.name.replace("/", "_").replace(".", "_").lstrip("_")] = node.name
+
+            for layer_key, lora_weight in blended_loras.items():
+                conv_key = layer_key + "_Conv"
+                gemm_key = layer_key + "_Gemm"
+                matmul_key = layer_key + "_MatMul"
+
+                if conv_key in node_names or gemm_key in node_names:
+                    if conv_key in node_names:
+                        conv_node = model.nodes[node_names[conv_key]]
+                    else:
+                        conv_node = model.nodes[node_names[gemm_key]]
+
+                    weight_name = [n for n in conv_node.input if ".weight" in n][0]
+                    orig_weight = model.tensors[weight_name]
+
+                    if orig_weight.shape[-2:] == (1, 1):
+                        if lora_weight.shape[-2:] == (1, 1):
+                            new_weight = orig_weight.squeeze((3, 2)) + lora_weight.squeeze((3, 2))
+                        else:
+                            new_weight = orig_weight.squeeze((3, 2)) + lora_weight
+
+                        new_weight = np.expand_dims(new_weight, (2, 3))
+                    else:
+                        if orig_weight.shape != lora_weight.shape:
+                            new_weight = orig_weight + lora_weight.reshape(orig_weight.shape)
+                        else:
+                            new_weight = orig_weight + lora_weight
+
+                    orig_weights[weight_name] = orig_weight
+                    model.tensors[weight_name] = new_weight.astype(orig_weight.dtype)
+
+                elif matmul_key in node_names:
+                    weight_node = model.nodes[node_names[matmul_key]]
+                    matmul_name = [n for n in weight_node.input if "MatMul" in n][0]
+
+                    orig_weight = model.tensors[matmul_name]
+                    new_weight = orig_weight + lora_weight.transpose()
+
+                    orig_weights[matmul_name] = orig_weight
+                    model.tensors[matmul_name] = new_weight.astype(orig_weight.dtype)
+
+                else:
+                    # warn? err?
+                    pass
+
+            yield
+
+        finally:
+            # restore original weights
+            for name, orig_weight in orig_weights.items():
+                model.tensors[name] = orig_weight
+
+    @classmethod
+    @contextmanager
+    def apply_ti(
+        cls,
+        tokenizer: CLIPTokenizer,
+        text_encoder: IAIOnnxRuntimeModel,
+        ti_list: List[Any],
+    ) -> Tuple[CLIPTokenizer, TextualInversionManager]:
+        from .models.base import IAIOnnxRuntimeModel
+
+        if not isinstance(text_encoder, IAIOnnxRuntimeModel):
+            raise Exception("Only IAIOnnxRuntimeModel models supported")
+
+        orig_embeddings = None
+
+        try:
+            ti_tokenizer = copy.deepcopy(tokenizer)
+            ti_manager = TextualInversionManager(ti_tokenizer)
+
+            def _get_trigger(ti, index):
+                trigger = ti.name
+                if index > 0:
+                    trigger += f"-!pad-{i}"
+                return f"<{trigger}>"
+
+            # modify tokenizer
+            new_tokens_added = 0
+            for ti in ti_list:
+                for i in range(ti.embedding.shape[0]):
+                    new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti, i))
+
+            # modify text_encoder
+            orig_embeddings = text_encoder.tensors["text_model.embeddings.token_embedding.weight"]
+
+            embeddings = np.concatenate(
+                (np.copy(orig_embeddings), np.zeros((new_tokens_added, orig_embeddings.shape[1]))),
+                axis=0,
+            )
+
+            for ti in ti_list:
+                ti_tokens = []
+                for i in range(ti.embedding.shape[0]):
+                    embedding = ti.embedding[i].detach().numpy()
+                    trigger = _get_trigger(ti, i)
+
+                    token_id = ti_tokenizer.convert_tokens_to_ids(trigger)
+                    if token_id == ti_tokenizer.unk_token_id:
+                        raise RuntimeError(f"Unable to find token id for token '{trigger}'")
+
+                    if embeddings[token_id].shape != embedding.shape:
+                        raise ValueError(
+                            f"Cannot load embedding for {trigger}. It was trained on a model with token dimension {embedding.shape[0]}, but the current model has token dimension {embeddings[token_id].shape[0]}."
+                        )
+
+                    embeddings[token_id] = embedding
+                    ti_tokens.append(token_id)
+
+                if len(ti_tokens) > 1:
+                    ti_manager.pad_tokens[ti_tokens[0]] = ti_tokens[1:]
+
+            text_encoder.tensors["text_model.embeddings.token_embedding.weight"] = embeddings.astype(
+                orig_embeddings.dtype
+            )
+
+            yield ti_tokenizer, ti_manager
+
+        finally:
+            # restore
+            if orig_embeddings is not None:
+                text_encoder.tensors["text_model.embeddings.token_embedding.weight"] = orig_embeddings
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -360,7 +360,8 @@ class ModelCache(object):
            # 2 refs:
            # 1 from cache_entry
            # 1 from getrefcount function
-            if not cache_entry.locked and refs <= 2:
+            # 1 from onnx runtime object
+            if not cache_entry.locked and refs <= 3 if "onnx" in model_key else 2:
                self.logger.debug(
                    f"Unloading model {model_key} to free {(model_size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
                )
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -277,7 +277,7 @@ class ModelInfo:
    hash: str
    location: Union[Path, str]
    precision: torch.dtype
-    _cache: ModelCache = None
+    _cache: Optional[ModelCache] = None

    def __enter__(self):
        return self.context.__enter__()
--- a/invokeai/backend/model_management/model_probe.py
+++ b/invokeai/backend/model_management/model_probe.py
@ -27,7 +27,7 @@ class ModelProbeInfo(object):
    variant_type: ModelVariantType
    prediction_type: SchedulerPredictionType
    upcast_attention: bool
-    format: Literal["diffusers", "checkpoint", "lycoris"]
+    format: Literal["diffusers", "checkpoint", "lycoris", "olive", "onnx"]
    image_size: int


@ -41,6 +41,7 @@ class ModelProbe(object):
    PROBES = {
        "diffusers": {},
        "checkpoint": {},
+        "onnx": {},
    }

    CLASS2TYPE = {
@ -53,7 +54,9 @@ class ModelProbe(object):
    }

    @classmethod
-    def register_probe(cls, format: Literal["diffusers", "checkpoint"], model_type: ModelType, probe_class: ProbeBase):
+    def register_probe(
+        cls, format: Literal["diffusers", "checkpoint", "onnx"], model_type: ModelType, probe_class: ProbeBase
+    ):
        cls.PROBES[format][model_type] = probe_class

    @classmethod
@ -95,6 +98,7 @@ class ModelProbe(object):
                if format_type == "diffusers"
                else cls.get_model_type_from_checkpoint(model_path, model)
            )
+            format_type = "onnx" if model_type == ModelType.ONNX else format_type
            probe_class = cls.PROBES[format_type].get(model_type)
            if not probe_class:
                return None
@ -168,6 +172,8 @@ class ModelProbe(object):
        if model:
            class_name = model.__class__.__name__
        else:
+            if (folder_path / "unet/model.onnx").exists():
+                return ModelType.ONNX
            if (folder_path / "learned_embeds.bin").exists():
                return ModelType.TextualInversion

@ -460,6 +466,17 @@ class TextualInversionFolderProbe(FolderProbeBase):
        return TextualInversionCheckpointProbe(None, checkpoint=checkpoint).get_base_type()


+class ONNXFolderProbe(FolderProbeBase):
+    def get_format(self) -> str:
+        return "onnx"
+
+    def get_base_type(self) -> BaseModelType:
+        return BaseModelType.StableDiffusion1
+
+    def get_variant_type(self) -> ModelVariantType:
+        return ModelVariantType.Normal
+
+
 class ControlNetFolderProbe(FolderProbeBase):
    def get_base_type(self) -> BaseModelType:
        config_file = self.folder_path / "config.json"
@ -497,3 +514,4 @@ ModelProbe.register_probe("checkpoint", ModelType.Vae, VaeCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.Lora, LoRACheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.TextualInversion, TextualInversionCheckpointProbe)
 ModelProbe.register_probe("checkpoint", ModelType.ControlNet, ControlNetCheckpointProbe)
+ModelProbe.register_probe("onnx", ModelType.ONNX, ONNXFolderProbe)
--- a/invokeai/backend/model_management/models/init.py
+++ b/invokeai/backend/model_management/models/init.py
@ -23,8 +23,11 @@ from .lora import LoRAModel
 from .controlnet import ControlNetModel  # TODO:
 from .textual_inversion import TextualInversionModel

+from .stable_diffusion_onnx import ONNXStableDiffusion1Model, ONNXStableDiffusion2Model
+
 MODEL_CLASSES = {
    BaseModelType.StableDiffusion1: {
+        ModelType.ONNX: ONNXStableDiffusion1Model,
        ModelType.Main: StableDiffusion1Model,
        ModelType.Vae: VaeModel,
        ModelType.Lora: LoRAModel,
@ -32,6 +35,7 @@ MODEL_CLASSES = {
        ModelType.TextualInversion: TextualInversionModel,
    },
    BaseModelType.StableDiffusion2: {
+        ModelType.ONNX: ONNXStableDiffusion2Model,
        ModelType.Main: StableDiffusion2Model,
        ModelType.Vae: VaeModel,
        ModelType.Lora: LoRAModel,
@ -45,6 +49,7 @@ MODEL_CLASSES = {
        ModelType.Lora: LoRAModel,
        ModelType.ControlNet: ControlNetModel,
        ModelType.TextualInversion: TextualInversionModel,
+        ModelType.ONNX: ONNXStableDiffusion2Model,
    },
    BaseModelType.StableDiffusionXLRefiner: {
        ModelType.Main: StableDiffusionXLModel,
@ -53,6 +58,7 @@ MODEL_CLASSES = {
        ModelType.Lora: LoRAModel,
        ModelType.ControlNet: ControlNetModel,
        ModelType.TextualInversion: TextualInversionModel,
+        ModelType.ONNX: ONNXStableDiffusion2Model,
    },
    # BaseModelType.Kandinsky2_1: {
    #    ModelType.Main: Kandinsky2_1Model,
--- a/invokeai/backend/model_management/models/base.py
+++ b/invokeai/backend/model_management/models/base.py
@ -8,13 +8,23 @@ from abc import ABCMeta, abstractmethod
 from pathlib import Path
 from picklescan.scanner import scan_file_path
 import torch
+import numpy as np
 import safetensors.torch
-from diffusers import DiffusionPipeline, ConfigMixin
+from pathlib import Path
+from diffusers import DiffusionPipeline, ConfigMixin, OnnxRuntimeModel

 from contextlib import suppress
 from pydantic import BaseModel, Field
 from typing import List, Dict, Optional, Type, Literal, TypeVar, Generic, Callable, Any, Union

+import onnx
+from onnx import numpy_helper
+from onnxruntime import (
+    InferenceSession,
+    SessionOptions,
+    get_available_providers,
+)
+

 class DuplicateModelException(Exception):
    pass
@ -37,6 +47,7 @@ class BaseModelType(str, Enum):


 class ModelType(str, Enum):
+    ONNX = "onnx"
    Main = "main"
    Vae = "vae"
    Lora = "lora"
@ -51,6 +62,8 @@ class SubModelType(str, Enum):
    Tokenizer = "tokenizer"
    Tokenizer2 = "tokenizer_2"
    Vae = "vae"
+    VaeDecoder = "vae_decoder"
+    VaeEncoder = "vae_encoder"
    Scheduler = "scheduler"
    SafetyChecker = "safety_checker"
    # MoVQ = "movq"
@ -362,6 +375,8 @@ def calc_model_size_by_data(model) -> int:
        return _calc_pipeline_by_data(model)
    elif isinstance(model, torch.nn.Module):
        return _calc_model_by_data(model)
+    elif isinstance(model, IAIOnnxRuntimeModel):
+        return _calc_onnx_model_by_data(model)
    else:
        return 0

@ -382,6 +397,12 @@ def _calc_model_by_data(model) -> int:
    return mem


+def _calc_onnx_model_by_data(model) -> int:
+    tensor_size = model.tensors.size() * 2  # The session doubles this
+    mem = tensor_size  # in bytes
+    return mem
+
+
 def _fast_safetensors_reader(path: str):
    checkpoint = dict()
    device = torch.device("meta")
@ -449,3 +470,208 @@ class SilenceWarnings(object):
        transformers_logging.set_verbosity(self.transformers_verbosity)
        diffusers_logging.set_verbosity(self.diffusers_verbosity)
        warnings.simplefilter("default")
+
+
+ONNX_WEIGHTS_NAME = "model.onnx"
+
+
+class IAIOnnxRuntimeModel:
+    class _tensor_access:
+        def __init__(self, model):
+            self.model = model
+            self.indexes = dict()
+            for idx, obj in enumerate(self.model.proto.graph.initializer):
+                self.indexes[obj.name] = idx
+
+        def __getitem__(self, key: str):
+            value = self.model.proto.graph.initializer[self.indexes[key]]
+            return numpy_helper.to_array(value)
+
+        def __setitem__(self, key: str, value: np.ndarray):
+            new_node = numpy_helper.from_array(value)
+            # set_external_data(new_node, location="in-memory-location")
+            new_node.name = key
+            # new_node.ClearField("raw_data")
+            del self.model.proto.graph.initializer[self.indexes[key]]
+            self.model.proto.graph.initializer.insert(self.indexes[key], new_node)
+            # self.model.data[key] = OrtValue.ortvalue_from_numpy(value)
+
+        # __delitem__
+
+        def __contains__(self, key: str):
+            return self.indexes[key] in self.model.proto.graph.initializer
+
+        def items(self):
+            raise NotImplementedError("tensor.items")
+            # return [(obj.name, obj) for obj in self.raw_proto]
+
+        def keys(self):
+            return self.indexes.keys()
+
+        def values(self):
+            raise NotImplementedError("tensor.values")
+            # return [obj for obj in self.raw_proto]
+
+        def size(self):
+            bytesSum = 0
+            for node in self.model.proto.graph.initializer:
+                bytesSum += sys.getsizeof(node.raw_data)
+            return bytesSum
+
+    class _access_helper:
+        def __init__(self, raw_proto):
+            self.indexes = dict()
+            self.raw_proto = raw_proto
+            for idx, obj in enumerate(raw_proto):
+                self.indexes[obj.name] = idx
+
+        def __getitem__(self, key: str):
+            return self.raw_proto[self.indexes[key]]
+
+        def __setitem__(self, key: str, value):
+            index = self.indexes[key]
+            del self.raw_proto[index]
+            self.raw_proto.insert(index, value)
+
+        # __delitem__
+
+        def __contains__(self, key: str):
+            return key in self.indexes
+
+        def items(self):
+            return [(obj.name, obj) for obj in self.raw_proto]
+
+        def keys(self):
+            return self.indexes.keys()
+
+        def values(self):
+            return [obj for obj in self.raw_proto]
+
+    def __init__(self, model_path: str, provider: Optional[str]):
+        self.path = model_path
+        self.session = None
+        self.provider = provider
+        """
+        self.data_path = self.path + "_data"
+        if not os.path.exists(self.data_path):
+            print(f"Moving model tensors to separate file: {self.data_path}")
+            tmp_proto = onnx.load(model_path, load_external_data=True)
+            onnx.save_model(tmp_proto, self.path, save_as_external_data=True, all_tensors_to_one_file=True, location=os.path.basename(self.data_path), size_threshold=1024, convert_attribute=False)
+            del tmp_proto
+            gc.collect()
+
+        self.proto = onnx.load(model_path, load_external_data=False)
+        """
+
+        self.proto = onnx.load(model_path, load_external_data=True)
+        # self.data = dict()
+        # for tensor in self.proto.graph.initializer:
+        #     name = tensor.name
+
+        #     if tensor.HasField("raw_data"):
+        #         npt = numpy_helper.to_array(tensor)
+        #         orv = OrtValue.ortvalue_from_numpy(npt)
+        #         # self.data[name] = orv
+        #         # set_external_data(tensor, location="in-memory-location")
+        #         tensor.name = name
+        #         # tensor.ClearField("raw_data")
+
+        self.nodes = self._access_helper(self.proto.graph.node)
+        # self.initializers = self._access_helper(self.proto.graph.initializer)
+        # print(self.proto.graph.input)
+        # print(self.proto.graph.initializer)
+
+        self.tensors = self._tensor_access(self)
+
+    # TODO: integrate with model manager/cache
+    def create_session(self, height=None, width=None):
+        if self.session is None or self.session_width != width or self.session_height != height:
+            # onnx.save(self.proto, "tmp.onnx")
+            # onnx.save_model(self.proto, "tmp.onnx", save_as_external_data=True, all_tensors_to_one_file=True, location="tmp.onnx_data", size_threshold=1024, convert_attribute=False)
+            # TODO: something to be able to get weight when they already moved outside of model proto
+            # (trimmed_model, external_data) = buffer_external_data_tensors(self.proto)
+            sess = SessionOptions()
+            # self._external_data.update(**external_data)
+            # sess.add_external_initializers(list(self.data.keys()), list(self.data.values()))
+            # sess.enable_profiling = True
+
+            # sess.intra_op_num_threads = 1
+            # sess.inter_op_num_threads = 1
+            # sess.execution_mode = ExecutionMode.ORT_SEQUENTIAL
+            # sess.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
+            # sess.enable_cpu_mem_arena = True
+            # sess.enable_mem_pattern = True
+            # sess.add_session_config_entry("session.intra_op.use_xnnpack_threadpool", "1") ########### It's the key code
+            self.session_height = height
+            self.session_width = width
+            if height and width:
+                sess.add_free_dimension_override_by_name("unet_sample_batch", 2)
+                sess.add_free_dimension_override_by_name("unet_sample_channels", 4)
+                sess.add_free_dimension_override_by_name("unet_hidden_batch", 2)
+                sess.add_free_dimension_override_by_name("unet_hidden_sequence", 77)
+                sess.add_free_dimension_override_by_name("unet_sample_height", self.session_height)
+                sess.add_free_dimension_override_by_name("unet_sample_width", self.session_width)
+                sess.add_free_dimension_override_by_name("unet_time_batch", 1)
+            providers = []
+            if self.provider:
+                providers.append(self.provider)
+            else:
+                providers = get_available_providers()
+            if "TensorrtExecutionProvider" in providers:
+                providers.remove("TensorrtExecutionProvider")
+            try:
+                self.session = InferenceSession(self.proto.SerializeToString(), providers=providers, sess_options=sess)
+            except Exception as e:
+                raise e
+            # self.session = InferenceSession("tmp.onnx", providers=[self.provider], sess_options=self.sess_options)
+            # self.io_binding = self.session.io_binding()
+
+    def release_session(self):
+        self.session = None
+        import gc
+
+        gc.collect()
+        return
+
+    def __call__(self, **kwargs):
+        if self.session is None:
+            raise Exception("You should call create_session before running model")
+
+        inputs = {k: np.array(v) for k, v in kwargs.items()}
+        output_names = self.session.get_outputs()
+        # for k in inputs:
+        #     self.io_binding.bind_cpu_input(k, inputs[k])
+        # for name in output_names:
+        #     self.io_binding.bind_output(name.name)
+        # self.session.run_with_iobinding(self.io_binding, None)
+        # return self.io_binding.copy_outputs_to_cpu()
+        return self.session.run(None, inputs)
+
+    # compatability with diffusers load code
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_id: Union[str, Path],
+        subfolder: Union[str, Path] = None,
+        file_name: Optional[str] = None,
+        provider: Optional[str] = None,
+        sess_options: Optional["SessionOptions"] = None,
+        **kwargs,
+    ):
+        file_name = file_name or ONNX_WEIGHTS_NAME
+
+        if os.path.isdir(model_id):
+            model_path = model_id
+            if subfolder is not None:
+                model_path = os.path.join(model_path, subfolder)
+            model_path = os.path.join(model_path, file_name)
+
+        else:
+            model_path = model_id
+
+        # load model from local directory
+        if not os.path.isfile(model_path):
+            raise Exception(f"Model not found: {model_path}")
+
+        # TODO: session options
+        return cls(model_path, provider=provider)
--- a/invokeai/backend/model_management/models/stable_diffusion_onnx.py
+++ b/invokeai/backend/model_management/models/stable_diffusion_onnx.py
@ -0,0 +1,157 @@
+import os
+import json
+from enum import Enum
+from pydantic import Field
+from pathlib import Path
+from typing import Literal, Optional, Union
+from .base import (
+    ModelBase,
+    ModelConfigBase,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    ModelVariantType,
+    DiffusersModel,
+    SchedulerPredictionType,
+    SilenceWarnings,
+    read_checkpoint_meta,
+    classproperty,
+    OnnxRuntimeModel,
+    IAIOnnxRuntimeModel,
+)
+from invokeai.app.services.config import InvokeAIAppConfig
+
+
+class StableDiffusionOnnxModelFormat(str, Enum):
+    Olive = "olive"
+    Onnx = "onnx"
+
+
+class ONNXStableDiffusion1Model(DiffusersModel):
+    class Config(ModelConfigBase):
+        model_format: Literal[StableDiffusionOnnxModelFormat.Onnx]
+        variant: ModelVariantType
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert base_model == BaseModelType.StableDiffusion1
+        assert model_type == ModelType.ONNX
+        super().__init__(
+            model_path=model_path,
+            base_model=BaseModelType.StableDiffusion1,
+            model_type=ModelType.ONNX,
+        )
+
+        for child_name, child_type in self.child_types.items():
+            if child_type is OnnxRuntimeModel:
+                self.child_types[child_name] = IAIOnnxRuntimeModel
+
+            # TODO: check that no optimum models provided
+
+    @classmethod
+    def probe_config(cls, path: str, **kwargs):
+        model_format = cls.detect_format(path)
+        in_channels = 4  # TODO:
+
+        if in_channels == 9:
+            variant = ModelVariantType.Inpaint
+        elif in_channels == 4:
+            variant = ModelVariantType.Normal
+        else:
+            raise Exception("Unkown stable diffusion 1.* model format")
+
+        return cls.create_config(
+            path=path,
+            model_format=model_format,
+            variant=variant,
+        )
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return True
+
+    @classmethod
+    def detect_format(cls, model_path: str):
+        # TODO: Detect onnx vs olive
+        return StableDiffusionOnnxModelFormat.Onnx
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        return model_path
+
+
+class ONNXStableDiffusion2Model(DiffusersModel):
+    # TODO: check that configs overwriten properly
+    class Config(ModelConfigBase):
+        model_format: Literal[StableDiffusionOnnxModelFormat.Onnx]
+        variant: ModelVariantType
+        prediction_type: SchedulerPredictionType
+        upcast_attention: bool
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert base_model == BaseModelType.StableDiffusion2
+        assert model_type == ModelType.ONNX
+        super().__init__(
+            model_path=model_path,
+            base_model=BaseModelType.StableDiffusion2,
+            model_type=ModelType.ONNX,
+        )
+
+        for child_name, child_type in self.child_types.items():
+            if child_type is OnnxRuntimeModel:
+                self.child_types[child_name] = IAIOnnxRuntimeModel
+            # TODO: check that no optimum models provided
+
+    @classmethod
+    def probe_config(cls, path: str, **kwargs):
+        model_format = cls.detect_format(path)
+        in_channels = 4  # TODO:
+
+        if in_channels == 9:
+            variant = ModelVariantType.Inpaint
+        elif in_channels == 5:
+            variant = ModelVariantType.Depth
+        elif in_channels == 4:
+            variant = ModelVariantType.Normal
+        else:
+            raise Exception("Unkown stable diffusion 2.* model format")
+
+        if variant == ModelVariantType.Normal:
+            prediction_type = SchedulerPredictionType.VPrediction
+            upcast_attention = True
+
+        else:
+            prediction_type = SchedulerPredictionType.Epsilon
+            upcast_attention = False
+
+        return cls.create_config(
+            path=path,
+            model_format=model_format,
+            variant=variant,
+            prediction_type=prediction_type,
+            upcast_attention=upcast_attention,
+        )
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return True
+
+    @classmethod
+    def detect_format(cls, model_path: str):
+        # TODO: Detect onnx vs olive
+        return StableDiffusionOnnxModelFormat.Onnx
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        return model_path