Model Manager rewrite (#3335)

2025-07-25 12:55:55 +00:00 · 2023-06-14 08:44:04 -07:00
parent 0497bea264 82c2498043
commit 5c740452f6
56 changed files with 5132 additions and 2143 deletions
--- a/README.md
+++ b/README.md
@ -43,6 +43,23 @@ _Note: InvokeAI is rapidly evolving. Please use the
 [Issues](https://github.com/invoke-ai/InvokeAI/issues) tab to report bugs and make feature
 requests. Be sure to use the provided templates. They will help us diagnose issues faster._

+## FOR DEVELOPERS - MIGRATING TO THE 3.0.0 MODELS FORMAT
+
+The models directory and models.yaml have changed. To migrate to the
+new layout, please follow this recipe:
+
+1. Run `python scripts/migrate_models_to_3.0.py <path_to_root_directory>
+
+2. This will create a new models directory named `models-3.0` and a
+   new config directory named `models.yaml-3.0`, both in the current
+   working directory. If you prefer to name them something else, pass
+   the `--dest-directory` and/or `--dest-yaml` arguments.
+
+3. Check that the new models directory and yaml file look ok.
+
+4. Replace the existing directory and file, keeping backup copies just in
+case.
+
 <div align="center">

 ![canvas preview](https://github.com/invoke-ai/InvokeAI/raw/main/docs/assets/canvas_preview.png)
--- a/invokeai/app/api/dependencies.py
+++ b/invokeai/app/api/dependencies.py
@ -11,7 +11,6 @@ from invokeai.backend.util.logging import InvokeAILogger

 from ..services.default_graphs import create_system_graphs
 from ..services.latent_storage import DiskLatentsStorage, ForwardCacheLatentsStorage
-from ..services.model_manager_initializer import get_model_manager
 from ..services.restoration_services import RestorationServices
 from ..services.graph import GraphExecutionState, LibraryGraph
 from ..services.image_file_storage import DiskImageFileStorage
@ -20,6 +19,7 @@ from ..services.invocation_services import InvocationServices
 from ..services.invoker import Invoker
 from ..services.processor import DefaultInvocationProcessor
 from ..services.sqlite import SqliteItemStorage
+from ..services.model_manager_service import ModelManagerService
 from .events import FastAPIEventService


@ -83,7 +83,7 @@ class ApiDependencies:
        )

        services = InvocationServices(
-            model_manager=get_model_manager(config, logger),
+            model_manager=ModelManagerService(config,logger),
            events=events,
            latents=latents,
            images=images,
--- a/invokeai/app/api/routers/models.py
+++ b/invokeai/app/api/routers/models.py
@ -1,13 +1,14 @@
 # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654) and 2023 Kent Keirsey (https://github.com/hipsterusername)

-import shutil
-import asyncio
-from typing import Annotated, Any, List, Literal, Optional, Union
+from typing import Annotated, Literal, Optional, Union, Dict

+from fastapi import Query
 from fastapi.routing import APIRouter, HTTPException
 from pydantic import BaseModel, Field, parse_obj_as
-from pathlib import Path
 from ..dependencies import ApiDependencies
+from invokeai.backend import BaseModelType, ModelType
+from invokeai.backend.model_management.models import get_all_model_configs
+MODEL_CONFIGS = Union[tuple(get_all_model_configs())]

 models_router = APIRouter(prefix="/v1/models", tags=["models"])

@ -19,6 +20,15 @@ class VaeRepo(BaseModel):

 class ModelInfo(BaseModel):
    description: Optional[str] = Field(description="A description of the model")
+    model_name: str = Field(description="The name of the model")
+    model_type: str = Field(description="The type of the model")
+    
+class DiffusersModelInfo(ModelInfo):
+    format: Literal['folder'] = 'folder'
+
+    vae: Optional[VaeRepo] = Field(description="The VAE repo to use for this model")
+    repo_id: Optional[str] = Field(description="The repo ID to use for this model")
+    path: Optional[str] = Field(description="The path to the model")
    
 class CkptModelInfo(ModelInfo):
    format: Literal['ckpt'] = 'ckpt'
@ -29,12 +39,8 @@ class CkptModelInfo(ModelInfo):
    width: Optional[int] = Field(description="The width of the model")
    height: Optional[int] = Field(description="The height of the model")

-class DiffusersModelInfo(ModelInfo):
-    format: Literal['diffusers'] = 'diffusers'
-
-    vae: Optional[VaeRepo] = Field(description="The VAE repo to use for this model")
-    repo_id: Optional[str] = Field(description="The repo ID to use for this model")
-    path: Optional[str] = Field(description="The path to the model")
+class SafetensorsModelInfo(CkptModelInfo):
+    format: Literal['safetensors'] = 'safetensors'

 class CreateModelRequest(BaseModel):
    name: str = Field(description="The name of the model")
@ -56,7 +62,8 @@ class ConvertedModelResponse(BaseModel):
    info: DiffusersModelInfo = Field(description="The converted model info")

 class ModelsList(BaseModel):
-    models: dict[str, Annotated[Union[(CkptModelInfo,DiffusersModelInfo)], Field(discriminator="format")]]
+    models: Dict[BaseModelType, Dict[ModelType, Dict[str, MODEL_CONFIGS]]] # TODO: debug/discuss with frontend
+    #models: dict[SDModelType, dict[str, Annotated[Union[(DiffusersModelInfo,CkptModelInfo,SafetensorsModelInfo)], Field(discriminator="format")]]]


@models_router.get(
@ -64,9 +71,16 @@ class ModelsList(BaseModel):
    operation_id="list_models",
    responses={200: {"model": ModelsList }},
 )
-async def list_models() -> ModelsList:
+async def list_models(
+    base_model: BaseModelType = Query(
+        default=None, description="Base model"
+    ),
+    model_type: ModelType = Query(
+        default=None, description="The type of model to get"
+    ),
+) -> ModelsList:
    """Gets a list of models"""
-    models_raw = ApiDependencies.invoker.services.model_manager.list_models()
+    models_raw = ApiDependencies.invoker.services.model_manager.list_models(model_type)
    models = parse_obj_as(ModelsList, { "models": models_raw })
    return models

@ -121,7 +135,7 @@ async def delete_model(model_name: str) -> None:
        raise HTTPException(status_code=204, detail=f"Model '{model_name}' deleted successfully")
    
    else:
-        logger.error(f"Model not found")
+        logger.error("Model not found")
        raise HTTPException(status_code=404, detail=f"Model '{model_name}' not found")
    

--- a/invokeai/app/cli_app.py
+++ b/invokeai/app/cli_app.py
@ -6,10 +6,7 @@ import re
 import shlex
 import sys
 import time
-from typing import (
-    Union,
-    get_type_hints,
-)
+from typing import Union, get_type_hints

 from pydantic import BaseModel, ValidationError
 from pydantic.fields import Field
@ -26,23 +23,25 @@ from invokeai.app.services.images import ImageService
 from invokeai.app.services.metadata import CoreMetadataService
 from invokeai.app.services.resource_name import SimpleNameService
 from invokeai.app.services.urls import LocalUrlService
-
-from .services.default_graphs import create_system_graphs
+from .services.default_graphs import (default_text_to_image_graph_id,
+                                      create_system_graphs)
 from .services.latent_storage import DiskLatentsStorage, ForwardCacheLatentsStorage

-from .cli.commands import BaseCommand, CliContext, ExitCli, add_graph_parsers, add_parsers, SortedHelpFormatter
+from .cli.commands import (BaseCommand, CliContext, ExitCli,
+                           SortedHelpFormatter, add_graph_parsers, add_parsers)
 from .cli.completer import set_autocompleter
 from .invocations.baseinvocation import BaseInvocation
 from .services.events import EventServiceBase
-from .services.model_manager_initializer import get_model_manager
-from .services.restoration_services import RestorationServices
-from .services.graph import Edge, EdgeConnection, GraphExecutionState, GraphInvocation, LibraryGraph, are_connection_types_compatible
-from .services.default_graphs import default_text_to_image_graph_id
+from .services.graph import (Edge, EdgeConnection, GraphExecutionState,
+                             GraphInvocation, LibraryGraph,
+                             are_connection_types_compatible)
 from .services.image_file_storage import DiskImageFileStorage
 from .services.invocation_queue import MemoryInvocationQueue
 from .services.invocation_services import InvocationServices
 from .services.invoker import Invoker
+from .services.model_manager_service import ModelManagerService
 from .services.processor import DefaultInvocationProcessor
+from .services.restoration_services import RestorationServices
 from .services.sqlite import SqliteItemStorage


@ -197,7 +196,6 @@ def invoke_all(context: CliContext):
        raise SessionError()

 def invoke_cli():
-    
    # get the optional list of invocations to execute on the command line
    parser = config.get_parser()
    parser.add_argument('commands',nargs='*')
@ -208,8 +206,8 @@ def invoke_cli():
    if infile := config.from_file:
        sys.stdin = open(infile,"r")
    
-    model_manager = get_model_manager(config,logger=logger)
-    
+    model_manager = ModelManagerService(config,logger)
+
    events = EventServiceBase()
    output_folder = config.output_path

@ -257,9 +255,11 @@ def invoke_cli():
        logger=logger,
        configuration=config,
    )
+    

    system_graphs = create_system_graphs(services.graph_library)
    system_graph_names = set([g.name for g in system_graphs])
+    set_autocompleter(services)

    invoker = Invoker(services)
    session: GraphExecutionState = invoker.create_execution_state()
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@ -1,13 +1,15 @@
 from typing import Literal, Optional, Union
 from pydantic import BaseModel, Field
+from contextlib import ExitStack
+import re

-from invokeai.app.invocations.util.choose_model import choose_model
 from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
-from ...backend.prompting.conditioning import try_parse_legacy_blend
+from .model import ClipField

-from ...backend.util.devices import choose_torch_device, torch_dtype
+from ...backend.util.devices import torch_dtype
 from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent
-from ...backend.stable_diffusion.textual_inversion_manager import TextualInversionManager
+from ...backend.model_management import BaseModelType, ModelType, SubModelType
+from ...backend.model_management.lora import ModelPatcher

 from compel import Compel
 from compel.prompt_parser import (
@ -40,7 +42,7 @@ class CompelInvocation(BaseInvocation):
    type: Literal["compel"] = "compel"

    prompt: str = Field(default="", description="Prompt")
-    model: str = Field(default="", description="Model to use")
+    clip: ClipField = Field(None, description="Clip to use")

    # Schema customisation
    class Config(InvocationConfig):
@ -56,73 +58,74 @@ class CompelInvocation(BaseInvocation):

    def invoke(self, context: InvocationContext) -> CompelOutput:

-        # TODO: load without model
-        model = choose_model(context.services.model_manager, self.model)
-        pipeline = model["model"]
-        tokenizer = pipeline.tokenizer
-        text_encoder = pipeline.text_encoder
-
-        # TODO: global? input?
-        #use_full_precision = precision == "float32" or precision == "autocast"
-        #use_full_precision = False
-
-        # TODO: redo TI when separate model loding implemented
-        #textual_inversion_manager = TextualInversionManager(
-        #    tokenizer=tokenizer,
-        #    text_encoder=text_encoder,
-        #    full_precision=use_full_precision,
-        #)
-
-        def load_huggingface_concepts(concepts: list[str]):
-            pipeline.textual_inversion_manager.load_huggingface_concepts(concepts)
-
-        # apply the concepts library to the prompt
-        prompt_str = pipeline.textual_inversion_manager.hf_concepts_library.replace_concepts_with_triggers(
-            self.prompt,
-            lambda concepts: load_huggingface_concepts(concepts),
-            pipeline.textual_inversion_manager.get_all_trigger_strings(),
+        tokenizer_info = context.services.model_manager.get_model(
+            **self.clip.tokenizer.dict(),
        )
-
-        # lazy-load any deferred textual inversions.
-        # this might take a couple of seconds the first time a textual inversion is used.
-        pipeline.textual_inversion_manager.create_deferred_token_ids_for_any_trigger_terms(
-            prompt_str
+        text_encoder_info = context.services.model_manager.get_model(
+            **self.clip.text_encoder.dict(),
        )
+        with tokenizer_info as orig_tokenizer,\
+             text_encoder_info as text_encoder,\
+             ExitStack() as stack:

-        compel = Compel(
-            tokenizer=tokenizer,
-            text_encoder=text_encoder,
-            textual_inversion_manager=pipeline.textual_inversion_manager,
-            dtype_for_device_getter=torch_dtype,
-            truncate_long_prompts=False,
-        )
+            loras = [(stack.enter_context(context.services.model_manager.get_model(**lora.dict(exclude={"weight"}))), lora.weight) for lora in self.clip.loras]

-        legacy_blend = try_parse_legacy_blend(prompt_str, skip_normalize=False)
-        if legacy_blend is not None:
-            conjunction = legacy_blend
-        else:
-            conjunction = Compel.parse_prompt_string(prompt_str)
+            ti_list = []
+            for trigger in re.findall(r"<[a-zA-Z0-9., _-]+>", self.prompt):
+                name = trigger[1:-1]
+                try:
+                    ti_list.append(
+                        stack.enter_context(
+                            context.services.model_manager.get_model(
+                                model_name=name,
+                                base_model=self.clip.text_encoder.base_model,
+                                model_type=ModelType.TextualInversion,
+                            )
+                        )
+                    )
+                except Exception:
+                    #print(e)
+                    #import traceback
+                    #print(traceback.format_exc())
+                    print(f"Warn: trigger: \"{trigger}\" not found")

-        if context.services.configuration.log_tokenization:
-            log_tokenization_for_conjunction(conjunction, tokenizer)
+            with ModelPatcher.apply_lora_text_encoder(text_encoder, loras),\
+                 ModelPatcher.apply_ti(orig_tokenizer, text_encoder, ti_list) as (tokenizer, ti_manager):

-        c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)
+                compel = Compel(
+                    tokenizer=tokenizer,
+                    text_encoder=text_encoder,
+                    textual_inversion_manager=ti_manager,
+                    dtype_for_device_getter=torch_dtype,
+                    truncate_long_prompts=True, # TODO:
+                )
+                
+                conjunction = Compel.parse_prompt_string(self.prompt)
+                prompt: Union[FlattenedPrompt, Blend] = conjunction.prompts[0]

-        ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
-            tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
-            cross_attention_control_args=options.get("cross_attention_control", None),
-        )
+                if context.services.configuration.log_tokenization:
+                    log_tokenization_for_prompt_object(prompt, tokenizer)

-        conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"
+                c, options = compel.build_conditioning_tensor_for_prompt_object(prompt)
+                
+                # TODO: long prompt support
+                #if not self.truncate_long_prompts:
+                #    [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
+                ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
+                    tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
+                    cross_attention_control_args=options.get("cross_attention_control", None),
+                )
+                
+            conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"

-        # TODO: hacky but works ;D maybe rename latents somehow?
-        context.services.latents.save(conditioning_name, (c, ec))
+            # TODO: hacky but works ;D maybe rename latents somehow?
+            context.services.latents.save(conditioning_name, (c, ec))

-        return CompelOutput(
-            conditioning=ConditioningField(
-                conditioning_name=conditioning_name,
-            ),
-        )
+            return CompelOutput(
+                conditioning=ConditioningField(
+                    conditioning_name=conditioning_name,
+                ),
+            )


 def get_max_token_count(
--- a/invokeai/app/invocations/generate.py
+++ b/invokeai/app/invocations/generate.py
@ -3,23 +3,20 @@
 from functools import partial
 from typing import Literal, Optional, Union, get_args

-import numpy as np
-from diffusers import ControlNetModel
-from torch import Tensor
 import torch
-
+from diffusers import ControlNetModel
 from pydantic import BaseModel, Field

-from invokeai.app.models.image import ColorField, ImageField, ResourceOrigin
-from invokeai.app.invocations.util.choose_model import choose_model
-from invokeai.app.models.image import ImageCategory, ResourceOrigin
+from invokeai.app.models.image import (ColorField, ImageCategory, ImageField,
+                                       ResourceOrigin)
 from invokeai.app.util.misc import SEED_MAX, get_random_seed
 from invokeai.backend.generator.inpaint import infill_methods
-from .baseinvocation import BaseInvocation, InvocationContext, InvocationConfig
-from .image import ImageOutput
-from ...backend.generator import Txt2Img, Img2Img, Inpaint, InvokeAIGenerator
+
+from ...backend.generator import Img2Img, Inpaint, InvokeAIGenerator, Txt2Img
 from ...backend.stable_diffusion import PipelineIntermediateState
 from ..util.step_callback import stable_diffusion_step_callback
+from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext
+from .image import ImageOutput

 SAMPLER_NAME_VALUES = Literal[tuple(InvokeAIGenerator.schedulers())]
 INFILL_METHODS = Literal[tuple(infill_methods())]
@ -81,7 +78,7 @@ class TextToImageInvocation(BaseInvocation, SDImageInvocation):

    def invoke(self, context: InvocationContext) -> ImageOutput:
        # Handle invalid model parameter
-        model = choose_model(context.services.model_manager, self.model)
+        model = context.services.model_manager.get_model(self.model,node=self,context=context)

        # loading controlnet image (currently requires pre-processed image)
        control_image = (
@ -171,7 +168,7 @@ class ImageToImageInvocation(TextToImageInvocation):
            image = image.resize((self.width, self.height))

        # Handle invalid model parameter
-        model = choose_model(context.services.model_manager, self.model)
+        model = context.services.model_manager.get_model(self.model,node=self,context=context)

        # Get the source node id (we are invoking the prepared node)
        graph_execution_state = context.services.graph_execution_manager.get(
@ -281,7 +278,7 @@ class InpaintInvocation(ImageToImageInvocation):
        )

        # Handle invalid model parameter
-        model = choose_model(context.services.model_manager, self.model)
+        model = context.services.model_manager.get_model(self.model,node=self,context=context)

        # Get the source node id (we are invoking the prepared node)
        graph_execution_state = context.services.graph_execution_manager.get(
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -1,43 +1,36 @@
 # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)

-import random
-import einops
-from typing import Literal, Optional, Union, List
+from contextlib import ExitStack
+from typing import List, Literal, Optional, Union

-from compel import Compel
-from diffusers.pipelines.controlnet import MultiControlNetModel
+import einops

 from pydantic import BaseModel, Field, validator
 import torch
-
-from invokeai.app.invocations.util.choose_model import choose_model
-from invokeai.app.models.image import ImageCategory
-from invokeai.app.util.misc import SEED_MAX, get_random_seed
-
-from invokeai.app.util.step_callback import stable_diffusion_step_callback
-from .controlnet_image_processors import ControlField
-
-from ...backend.model_management.model_manager import ModelManager
-from ...backend.util.devices import choose_torch_device, torch_dtype
-from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import PostprocessingSettings
-from ...backend.image_util.seamless import configure_model_padding
-from ...backend.prompting.conditioning import get_uc_and_c_and_ec
-
-from ...backend.stable_diffusion.diffusers_pipeline import ConditioningData, StableDiffusionGeneratorPipeline, image_resized_to_grid_as_tensor
-from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
-from ...backend.stable_diffusion.diffusers_pipeline import ControlNetData
-
-from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
-import numpy as np
-from ..services.image_file_storage import ResourceOrigin
-from .baseinvocation import BaseInvocation, InvocationContext
-from .image import ImageField, ImageOutput
-from .compel import ConditioningField
-from ...backend.stable_diffusion import PipelineIntermediateState
+from diffusers import ControlNetModel
+from diffusers.image_processor import VaeImageProcessor
 from diffusers.schedulers import SchedulerMixin as Scheduler
-import diffusers
-from diffusers import DiffusionPipeline, ControlNetModel

+from invokeai.app.util.misc import SEED_MAX, get_random_seed
+from invokeai.app.util.step_callback import stable_diffusion_step_callback
+
+from ..models.image import ImageCategory, ImageField, ResourceOrigin
+from ...backend.image_util.seamless import configure_model_padding
+from ...backend.stable_diffusion import PipelineIntermediateState
+from ...backend.stable_diffusion.diffusers_pipeline import (
+    ConditioningData, ControlNetData, StableDiffusionGeneratorPipeline,
+    image_resized_to_grid_as_tensor)
+from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import \
+    PostprocessingSettings
+from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
+from ...backend.util.devices import choose_torch_device, torch_dtype
+from ...backend.model_management.lora import ModelPatcher
+from .baseinvocation import (BaseInvocation, BaseInvocationOutput,
+                             InvocationConfig, InvocationContext)
+from .compel import ConditioningField
+from .controlnet_image_processors import ControlField
+from .image import ImageOutput
+from .model import ModelInfo, UNetField, VaeField

 class LatentsField(BaseModel):
    """A latents field used for passing latents between invocations"""
@ -90,15 +83,22 @@ SAMPLER_NAME_VALUES = Literal[
 ]


-def get_scheduler(scheduler_name:str, model: StableDiffusionGeneratorPipeline)->Scheduler:
-    scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP['ddim'])

-    scheduler_config = model.scheduler.config
+def get_scheduler(
+    context: InvocationContext,
+    scheduler_info: ModelInfo,
+    scheduler_name: str,
+) -> Scheduler:
+    scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP['ddim'])
+    orig_scheduler_info = context.services.model_manager.get_model(**scheduler_info.dict())
+    with orig_scheduler_info as orig_scheduler:
+        scheduler_config = orig_scheduler.config
+        
    if "_backup" in scheduler_config:
        scheduler_config = scheduler_config["_backup"]
    scheduler_config = {**scheduler_config, **scheduler_extra_config, "_backup": scheduler_config}
    scheduler = scheduler_class.from_config(scheduler_config)
-
+    
    # hack copied over from generate.py
    if not hasattr(scheduler, 'uses_inpainting_model'):
        scheduler.uses_inpainting_model = lambda: False
@ -128,7 +128,6 @@ def get_noise(width:int, height:int, device:torch.device, seed:int = 0, latent_c
    #     x = (1 - self.perlin) * x + self.perlin * perlin_noise
    return x

-
 class NoiseInvocation(BaseInvocation):
    """Generates latent noise."""

@ -176,10 +175,10 @@ class TextToLatentsInvocation(BaseInvocation):
    steps:       int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
    cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
    scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
-    model:       str = Field(default="", description="The model to use (currently ignored)")
-    control: Union[ControlField, List[ControlField]] = Field(default=None, description="The control to use")
-    # seamless:   bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
-    # seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
+    unet: UNetField = Field(default=None, description="UNet submodel")
+    control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use")
+    #seamless:   bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
+    #seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
    # fmt: on

    @validator("cfg_scale")
@ -219,44 +218,10 @@ class TextToLatentsInvocation(BaseInvocation):
            source_node_id=source_node_id,
        )

-    def get_model(self, model_manager: ModelManager) -> StableDiffusionGeneratorPipeline:
-        model_info = choose_model(model_manager, self.model)
-        model_name = model_info['model_name']
-        model_hash = model_info['hash']
-        model: StableDiffusionGeneratorPipeline = model_info['model']
-        model.scheduler = get_scheduler(
-            model=model,
-            scheduler_name=self.scheduler
-        )
-
-        # if isinstance(model, DiffusionPipeline):
-        #     for component in [model.unet, model.vae]:
-        #         configure_model_padding(component,
-        #                                 self.seamless,
-        #                                 self.seamless_axes
-        #                                 )
-        # else:
-        #     configure_model_padding(model,
-        #                             self.seamless,
-        #                             self.seamless_axes
-        #                             )
-
-        return model
-
-
-    def get_conditioning_data(self, context: InvocationContext, model: StableDiffusionGeneratorPipeline) -> ConditioningData:
+    def get_conditioning_data(self, context: InvocationContext, scheduler) -> ConditioningData:
        c, extra_conditioning_info = context.services.latents.get(self.positive_conditioning.conditioning_name)
        uc, _ = context.services.latents.get(self.negative_conditioning.conditioning_name)

-        compel = Compel(
-            tokenizer=model.tokenizer,
-            text_encoder=model.text_encoder,
-            textual_inversion_manager=model.textual_inversion_manager,
-            dtype_for_device_getter=torch_dtype,
-            truncate_long_prompts=False,
-        )
-        [c, uc] = compel.pad_conditioning_tensors_to_same_length([c, uc])
-
        conditioning_data = ConditioningData(
            unconditioned_embeddings=uc,
            text_embeddings=c,
@ -268,16 +233,46 @@ class TextToLatentsInvocation(BaseInvocation):
                h_symmetry_time_pct=None,#h_symmetry_time_pct,
                v_symmetry_time_pct=None#v_symmetry_time_pct,
            ),
-        ).add_scheduler_args_if_applicable(model.scheduler, eta=0.0)#ddim_eta)
+        ).add_scheduler_args_if_applicable(scheduler, eta=0.0)#ddim_eta)
        return conditioning_data

-    def prep_control_data(self,
-                          context: InvocationContext,
-                          model: StableDiffusionGeneratorPipeline, # really only need model for dtype and device
-                          control_input: List[ControlField],
-                          latents_shape: List[int],
-                          do_classifier_free_guidance: bool = True,
-                          ) -> List[ControlNetData]:
+    def create_pipeline(self, unet, scheduler) -> StableDiffusionGeneratorPipeline:
+        # TODO:
+        #configure_model_padding(
+        #    unet,
+        #    self.seamless,
+        #    self.seamless_axes,
+        #)
+
+        class FakeVae:
+            class FakeVaeConfig:
+                def __init__(self):
+                    self.block_out_channels = [0]
+            
+            def __init__(self):
+                self.config = FakeVae.FakeVaeConfig()
+
+        return StableDiffusionGeneratorPipeline(
+            vae=FakeVae(), # TODO: oh...
+            text_encoder=None,
+            tokenizer=None,
+            unet=unet,
+            scheduler=scheduler,
+            safety_checker=None,
+            feature_extractor=None,
+            requires_safety_checker=False,
+            precision="float16" if unet.dtype == torch.float16 else "float32",
+        )
+    
+    def prep_control_data(
+        self,
+        context: InvocationContext,
+        model: StableDiffusionGeneratorPipeline, # really only need model for dtype and device
+        control_input: List[ControlField],
+        latents_shape: List[int],
+        do_classifier_free_guidance: bool = True,
+    ) -> List[ControlNetData]:
+
        # assuming fixed dimensional scaling of 8:1 for image:latents
        control_height_resize = latents_shape[2] * 8
        control_width_resize = latents_shape[3] * 8
@ -356,23 +351,38 @@ class TextToLatentsInvocation(BaseInvocation):
        def step_callback(state: PipelineIntermediateState):
            self.dispatch_progress(context, source_node_id, state)

-        model = self.get_model(context.services.model_manager)
-        conditioning_data = self.get_conditioning_data(context, model)
+        unet_info = context.services.model_manager.get_model(**self.unet.unet.dict())
+        with unet_info as unet,\
+             ExitStack() as stack:

-        control_data = self.prep_control_data(model=model, context=context, control_input=self.control,
-                                              latents_shape=noise.shape,
-                                              # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
-                                              do_classifier_free_guidance=True,)
+            scheduler = get_scheduler(
+                context=context,
+                scheduler_info=self.unet.scheduler,
+                scheduler_name=self.scheduler,
+            )
+            
+            pipeline = self.create_pipeline(unet, scheduler)
+            conditioning_data = self.get_conditioning_data(context, scheduler)

-        # TODO: Verify the noise is the right size
-        result_latents, result_attention_map_saver = model.latents_from_embeddings(
-            latents=torch.zeros_like(noise, dtype=torch_dtype(model.device)),
-            noise=noise,
-            num_inference_steps=self.steps,
-            conditioning_data=conditioning_data,
-            control_data=control_data,  # list[ControlNetData]
-            callback=step_callback,
-        )
+            loras = [(stack.enter_context(context.services.model_manager.get_model(**lora.dict(exclude={"weight"}))), lora.weight) for lora in self.unet.loras]
+
+            control_data = self.prep_control_data(
+                model=pipeline, context=context, control_input=self.control,
+                latents_shape=noise.shape,
+                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                do_classifier_free_guidance=True,
+            )
+
+            with ModelPatcher.apply_lora_unet(pipeline.unet, loras):
+                # TODO: Verify the noise is the right size
+                result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
+                    latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
+                    noise=noise,
+                    num_inference_steps=self.steps,
+                    conditioning_data=conditioning_data,
+                    control_data=control_data, # list[ControlNetData]
+                    callback=step_callback,
+                )

        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        torch.cuda.empty_cache()
@ -381,7 +391,6 @@ class TextToLatentsInvocation(BaseInvocation):
        context.services.latents.save(name, result_latents)
        return build_latents_output(latents_name=name, latents=result_latents)

-
 class LatentsToLatentsInvocation(TextToLatentsInvocation):
    """Generates latents using latents as base image."""

@ -415,32 +424,52 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
        def step_callback(state: PipelineIntermediateState):
            self.dispatch_progress(context, source_node_id, state)

-        model = self.get_model(context.services.model_manager)
-        conditioning_data = self.get_conditioning_data(context, model)
-
-        control_data = self.prep_control_data(model=model, context=context, control_input=self.control,
-                                              latents_shape=noise.shape,
-                                              # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
-                                              do_classifier_free_guidance=True,
-                                              )
-
-        # TODO: Verify the noise is the right size
-
-        initial_latents = latent if self.strength < 1.0 else torch.zeros_like(
-            latent, device=model.device, dtype=latent.dtype
+        unet_info = context.services.model_manager.get_model(
+            **self.unet.unet.dict(),
        )

-        timesteps, _ = model.get_img2img_timesteps(self.steps, self.strength)
+        with unet_info as unet,\
+             ExitStack() as stack:

-        result_latents, result_attention_map_saver = model.latents_from_embeddings(
-            latents=initial_latents,
-            timesteps=timesteps,
-            noise=noise,
-            num_inference_steps=self.steps,
-            conditioning_data=conditioning_data,
-            control_data=control_data,  # list[ControlNetData]
-            callback=step_callback
-        )
+            scheduler = get_scheduler(
+                context=context,
+                scheduler_info=self.unet.scheduler,
+                scheduler_name=self.scheduler,
+            )
+
+            pipeline = self.create_pipeline(unet, scheduler)
+            conditioning_data = self.get_conditioning_data(context, scheduler)
+            
+            control_data = self.prep_control_data(
+                model=pipeline, context=context, control_input=self.control,
+                latents_shape=noise.shape,
+                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                do_classifier_free_guidance=True,
+            )
+
+            # TODO: Verify the noise is the right size
+            initial_latents = latent if self.strength < 1.0 else torch.zeros_like(
+                latent, device=unet.device, dtype=latent.dtype
+            )
+
+            timesteps, _ = pipeline.get_img2img_timesteps(
+                self.steps,
+                self.strength,
+                device=unet.device,
+            )
+
+            loras = [(stack.enter_context(context.services.model_manager.get_model(**lora.dict(exclude={"weight"}))), lora.weight) for lora in self.unet.loras]
+
+            with ModelPatcher.apply_lora_unet(pipeline.unet, loras):
+                result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
+                    latents=initial_latents,
+                    timesteps=timesteps,
+                    noise=noise,
+                    num_inference_steps=self.steps,
+                    conditioning_data=conditioning_data,
+                    control_data=control_data,  # list[ControlNetData]
+                    callback=step_callback
+                )

        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        torch.cuda.empty_cache()
@ -458,16 +487,14 @@ class LatentsToImageInvocation(BaseInvocation):

    # Inputs
    latents: Optional[LatentsField] = Field(description="The latents to generate an image from")
-    model: str = Field(default="", description="The model to use")
+    vae: VaeField = Field(default=None, description="Vae submodel")
+    tiled: bool = Field(default=False, description="Decode latents by overlaping tiles(less memory consumption)")

    # Schema customisation
    class Config(InvocationConfig):
        schema_extra = {
            "ui": {
                "tags": ["latents", "image"],
-                "type_hints": {
-                  "model": "model"
-                }
            },
        }

@ -475,37 +502,44 @@ class LatentsToImageInvocation(BaseInvocation):
    def invoke(self, context: InvocationContext) -> ImageOutput:
        latents = context.services.latents.get(self.latents.latents_name)

-        # TODO: this only really needs the vae
-        model_info = choose_model(context.services.model_manager, self.model)
-        model: StableDiffusionGeneratorPipeline = model_info['model']
+        vae_info = context.services.model_manager.get_model(
+            **self.vae.vae.dict(),
+        )

-        with torch.inference_mode():
-            np_image = model.decode_latents(latents)
-            image = model.numpy_to_pil(np_image)[0]
-
-            # what happened to metadata?
-            # metadata = context.services.metadata.build_metadata(
-            #     session_id=context.graph_execution_state_id, node=self
+        with vae_info as vae:
+            if self.tiled or context.services.configuration.tiled_decode:
+                vae.enable_tiling()
+            else:
+                vae.disable_tiling()

+            # clear memory as vae decode can request a lot
            torch.cuda.empty_cache()

-            # new (post Image service refactor) way of using services to save image
-            #     and gnenerate unique image_name
-            image_dto = context.services.images.create(
-                image=image,
-                image_origin=ResourceOrigin.INTERNAL,
-                image_category=ImageCategory.GENERAL,
-                session_id=context.graph_execution_state_id,
-                node_id=self.id,
-                is_intermediate=self.is_intermediate
-            )
+            with torch.inference_mode():
+                # copied from diffusers pipeline
+                latents = latents / vae.config.scaling_factor
+                image = vae.decode(latents, return_dict=False)[0]
+                image = (image / 2 + 0.5).clamp(0, 1) # denormalize
+                # we always cast to float32 as this does not cause significant overhead and is compatible with bfloat16
+                np_image = image.cpu().permute(0, 2, 3, 1).float().numpy()

-            return ImageOutput(
-                image=ImageField(image_name=image_dto.image_name),
-                width=image_dto.width,
-                height=image_dto.height,
-            )
+                image = VaeImageProcessor.numpy_to_pil(np_image)[0]

+        torch.cuda.empty_cache()
+
+        image_dto = context.services.images.create(
+            image=image,
+            image_origin=ResourceOrigin.INTERNAL,
+            image_category=ImageCategory.GENERAL,
+            node_id=self.id,
+            session_id=context.graph_execution_state_id,
+        )
+
+        return ImageOutput(
+            image=ImageField(image_name=image_dto.image_name),
+            width=image_dto.width,
+            height=image_dto.height,
+        )

 LATENTS_INTERPOLATION_MODE = Literal[
    "nearest", "linear", "bilinear", "bicubic", "trilinear", "area", "nearest-exact"
@ -581,14 +615,14 @@ class ImageToLatentsInvocation(BaseInvocation):

    # Inputs
    image: Union[ImageField, None] = Field(description="The image to encode")
-    model: str = Field(default="", description="The model to use")
+    vae: VaeField = Field(default=None, description="Vae submodel")
+    tiled: bool = Field(default=False, description="Encode latents by overlaping tiles(less memory consumption)")

    # Schema customisation
    class Config(InvocationConfig):
        schema_extra = {
            "ui": {
                "tags": ["latents", "image"],
-                "type_hints": {"model": "model"},
            },
        }

@ -599,20 +633,30 @@ class ImageToLatentsInvocation(BaseInvocation):
        # )
        image = context.services.images.get_pil_image(self.image.image_name)

-        # TODO: this only really needs the vae
-        model_info = choose_model(context.services.model_manager, self.model)
-        model: StableDiffusionGeneratorPipeline = model_info["model"]
+        #vae_info = context.services.model_manager.get_model(**self.vae.vae.dict())
+        vae_info = context.services.model_manager.get_model(
+            **self.vae.vae.dict(),
+        )

        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
-
        if image_tensor.dim() == 3:
            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")

-        latents = model.non_noised_latents_from_image(
-            image_tensor,
-            device=model._model_group.device_for(model.unet),
-            dtype=model.unet.dtype,
-        )
+        with vae_info as vae:
+            if self.tiled:
+                vae.enable_tiling()
+            else:
+                vae.disable_tiling()
+
+            # non_noised_latents_from_image
+            image_tensor = image_tensor.to(device=vae.device, dtype=vae.dtype)
+            with torch.inference_mode():
+                image_tensor_dist = vae.encode(image_tensor).latent_dist
+                latents = image_tensor_dist.sample().to(
+                    dtype=vae.dtype
+                )  # FIXME: uses torch.randn. make reproducible!
+
+            latents = 0.18215 * latents

        name = f"{context.graph_execution_state_id}__{self.id}"
        # context.services.latents.set(name, latents)
--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@ -0,0 +1,311 @@
+from typing import Literal, Optional, Union, List
+from pydantic import BaseModel, Field
+import copy
+
+from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
+
+from ...backend.util.devices import choose_torch_device, torch_dtype
+from ...backend.model_management import BaseModelType, ModelType, SubModelType
+
+class ModelInfo(BaseModel):
+    model_name: str = Field(description="Info to load submodel")
+    base_model: BaseModelType = Field(description="Base model")
+    model_type: ModelType = Field(description="Info to load submodel")
+    submodel: Optional[SubModelType] = Field(description="Info to load submodel")
+
+class LoraInfo(ModelInfo):
+    weight: float = Field(description="Lora's weight which to use when apply to model")
+
+class UNetField(BaseModel):
+    unet: ModelInfo = Field(description="Info to load unet submodel")
+    scheduler: ModelInfo = Field(description="Info to load scheduler submodel")
+    loras: List[LoraInfo] = Field(description="Loras to apply on model loading")
+
+class ClipField(BaseModel):
+    tokenizer: ModelInfo = Field(description="Info to load tokenizer submodel")
+    text_encoder: ModelInfo = Field(description="Info to load text_encoder submodel")
+    loras: List[LoraInfo] = Field(description="Loras to apply on model loading")
+
+class VaeField(BaseModel):
+    # TODO: better naming?
+    vae: ModelInfo = Field(description="Info to load vae submodel")
+
+
+class ModelLoaderOutput(BaseInvocationOutput):
+    """Model loader output"""
+
+    #fmt: off
+    type: Literal["model_loader_output"] = "model_loader_output"
+
+    unet: UNetField = Field(default=None, description="UNet submodel")
+    clip: ClipField = Field(default=None, description="Tokenizer and text_encoder submodels")
+    vae: VaeField = Field(default=None, description="Vae submodel")
+    #fmt: on
+
+
+class SD1ModelLoaderInvocation(BaseInvocation):
+    """Loading submodels of selected model."""
+
+    type: Literal["sd1_model_loader"] = "sd1_model_loader"
+
+    model_name: str = Field(default="", description="Model to load")
+    # TODO: precision?
+
+    # Schema customisation
+    class Config(InvocationConfig):
+        schema_extra = {
+            "ui": {
+                "tags": ["model", "loader"],
+                "type_hints": {
+                  "model_name": "model" # TODO: rename to model_name?
+                }
+            },
+        }
+
+    def invoke(self, context: InvocationContext) -> ModelLoaderOutput:
+
+        base_model = BaseModelType.StableDiffusion1 # TODO:
+
+        # TODO: not found exceptions
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            base_model=base_model,
+            model_type=ModelType.Pipeline,
+        ):
+            raise Exception(f"Unkown model name: {self.model_name}!")
+
+        """
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            model_type=SDModelType.Diffusers,
+            submodel=SDModelType.Tokenizer,
+        ):
+            raise Exception(
+                f"Failed to find tokenizer submodel in {self.model_name}! Check if model corrupted"
+            )
+
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            model_type=SDModelType.Diffusers,
+            submodel=SDModelType.TextEncoder,
+        ):
+            raise Exception(
+                f"Failed to find text_encoder submodel in {self.model_name}! Check if model corrupted"
+            )
+
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            model_type=SDModelType.Diffusers,
+            submodel=SDModelType.UNet,
+        ):
+            raise Exception(
+                f"Failed to find unet submodel from {self.model_name}! Check if model corrupted"
+            )
+        """
+
+
+        return ModelLoaderOutput(
+            unet=UNetField(
+                unet=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.UNet,
+                ),
+                scheduler=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.Scheduler,
+                ),
+                loras=[],
+            ),
+            clip=ClipField(
+                tokenizer=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.Tokenizer,
+                ),
+                text_encoder=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.TextEncoder,
+                ),
+                loras=[],
+            ),
+            vae=VaeField(
+                vae=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.Vae,
+                ),
+            )
+        )
+
+# TODO: optimize(less code copy)
+class SD2ModelLoaderInvocation(BaseInvocation):
+    """Loading submodels of selected model."""
+
+    type: Literal["sd2_model_loader"] = "sd2_model_loader"
+
+    model_name: str = Field(default="", description="Model to load")
+    # TODO: precision?
+
+    # Schema customisation
+    class Config(InvocationConfig):
+        schema_extra = {
+            "ui": {
+                "tags": ["model", "loader"],
+                "type_hints": {
+                  "model_name": "model" # TODO: rename to model_name?
+                }
+            },
+        }
+
+    def invoke(self, context: InvocationContext) -> ModelLoaderOutput:
+
+        base_model = BaseModelType.StableDiffusion2 # TODO:
+
+        # TODO: not found exceptions
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            base_model=base_model,
+            model_type=ModelType.Pipeline,
+        ):
+            raise Exception(f"Unkown model name: {self.model_name}!")
+
+        """
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            model_type=SDModelType.Diffusers,
+            submodel=SDModelType.Tokenizer,
+        ):
+            raise Exception(
+                f"Failed to find tokenizer submodel in {self.model_name}! Check if model corrupted"
+            )
+
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            model_type=SDModelType.Diffusers,
+            submodel=SDModelType.TextEncoder,
+        ):
+            raise Exception(
+                f"Failed to find text_encoder submodel in {self.model_name}! Check if model corrupted"
+            )
+
+        if not context.services.model_manager.model_exists(
+            model_name=self.model_name,
+            model_type=SDModelType.Diffusers,
+            submodel=SDModelType.UNet,
+        ):
+            raise Exception(
+                f"Failed to find unet submodel from {self.model_name}! Check if model corrupted"
+            )
+        """
+
+
+        return ModelLoaderOutput(
+            unet=UNetField(
+                unet=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.UNet,
+                ),
+                scheduler=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.Scheduler,
+                ),
+                loras=[],
+            ),
+            clip=ClipField(
+                tokenizer=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.Tokenizer,
+                ),
+                text_encoder=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.TextEncoder,
+                ),
+                loras=[],
+            ),
+            vae=VaeField(
+                vae=ModelInfo(
+                    model_name=self.model_name,
+                    base_model=base_model,
+                    model_type=ModelType.Pipeline,
+                    submodel=SubModelType.Vae,
+                ),
+            )
+        )
+
+class LoraLoaderOutput(BaseInvocationOutput):
+    """Model loader output"""
+
+    #fmt: off
+    type: Literal["lora_loader_output"] = "lora_loader_output"
+
+    unet: Optional[UNetField] = Field(default=None, description="UNet submodel")
+    clip: Optional[ClipField] = Field(default=None, description="Tokenizer and text_encoder submodels")
+    #fmt: on
+
+class LoraLoaderInvocation(BaseInvocation):
+    """Apply selected lora to unet and text_encoder."""
+
+    type: Literal["lora_loader"] = "lora_loader"
+
+    lora_name: str = Field(description="Lora model name")
+    weight: float = Field(default=0.75, description="With what weight to apply lora")
+
+    unet: Optional[UNetField] = Field(description="UNet model for applying lora")
+    clip: Optional[ClipField] = Field(description="Clip model for applying lora")
+
+    def invoke(self, context: InvocationContext) -> LoraLoaderOutput:
+
+        if not context.services.model_manager.model_exists(
+            model_name=self.lora_name,
+            model_type=SDModelType.Lora,
+        ):
+            raise Exception(f"Unkown lora name: {self.lora_name}!")
+
+        if self.unet is not None and any(lora.model_name == self.lora_name for lora in self.unet.loras):
+            raise Exception(f"Lora \"{self.lora_name}\" already applied to unet")
+
+        if self.clip is not None and any(lora.model_name == self.lora_name for lora in self.clip.loras):
+            raise Exception(f"Lora \"{self.lora_name}\" already applied to clip")
+
+        output = LoraLoaderOutput()
+
+        if self.unet is not None:
+            output.unet = copy.deepcopy(self.unet)
+            output.unet.loras.append(
+                LoraInfo(
+                    model_name=self.lora_name,
+                    model_type=SDModelType.Lora,
+                    submodel=None,
+                    weight=self.weight,
+                )
+            )
+
+        if self.clip is not None:
+            output.clip = copy.deepcopy(self.clip)
+            output.clip.loras.append(
+                LoraInfo(
+                    model_name=self.lora_name,
+                    model_type=SDModelType.Lora,
+                    submodel=None,
+                    weight=self.weight,
+                )
+            )
+
+        return output
+
--- a/invokeai/app/invocations/util/choose_model.py
+++ b/invokeai/app/invocations/util/choose_model.py
@ -1,14 +0,0 @@
-from invokeai.backend.model_management.model_manager import ModelManager
-
-
-def choose_model(model_manager: ModelManager, model_name: str):
-    """Returns the default model if the `model_name` not a valid model, else returns the selected model."""
-    logger = model_manager.logger
-    if model_name and not model_manager.valid_model(model_name):
-        default_model_name = model_manager.default_model()
-        logger.warning(f"\'{model_name}\' is not a valid model name. Using default model \'{default_model_name}\' instead.")
-        model = model_manager.get_model()
-    else:
-        model = model_manager.get_model(model_name)
-
-    return model
--- a/invokeai/app/services/config.py
+++ b/invokeai/app/services/config.py
@ -15,10 +15,7 @@ InvokeAI:
    conf_path: configs/models.yaml
    legacy_conf_dir: configs/stable-diffusion
    outdir: outputs
-    embedding_dir: embeddings
-    lora_dir: loras
    autoconvert_dir: null
-    gfpgan_model_dir: models/gfpgan/GFPGANv1.4.pth
  Models:
    model: stable-diffusion-1.5
    embeddings: true
@ -171,7 +168,7 @@ from argparse import ArgumentParser
 from omegaconf import OmegaConf, DictConfig
 from pathlib import Path
 from pydantic import BaseSettings, Field, parse_obj_as
-from typing import ClassVar, Dict, List, Literal, Type, Union, get_origin, get_type_hints, get_args
+from typing import ClassVar, Dict, List, Literal, Union, get_origin, get_type_hints, get_args

 INIT_FILE = Path('invokeai.yaml')
 DB_FILE   = Path('invokeai.db')
@ -374,24 +371,20 @@ setting environment variables INVOKEAI_<setting>.
    precision           : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='float16',description='Floating point precision', category='Memory/Performance')
    sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance')
    xformers_enabled    : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')
-
+    tiled_decode        : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category='Memory/Performance')

    root                : Path = Field(default=_find_root(), description='InvokeAI runtime root directory', category='Paths')
    autoconvert_dir     : Path = Field(default=None, description='Path to a directory of ckpt files to be converted into diffusers and imported on startup.', category='Paths')
    conf_path           : Path = Field(default='configs/models.yaml', description='Path to models definition file', category='Paths')
-    embedding_dir       : Path = Field(default='embeddings', description='Path to InvokeAI textual inversion aembeddings directory', category='Paths')
-    gfpgan_model_dir    : Path = Field(default="./models/gfpgan/GFPGANv1.4.pth", description='Path to GFPGAN models directory.', category='Paths')
-    controlnet_dir      : Path = Field(default="controlnets", description='Path to directory of ControlNet models.', category='Paths')
+    models_dir          : Path = Field(default='./models', description='Path to the models directory', category='Paths')
    legacy_conf_dir     : Path = Field(default='configs/stable-diffusion', description='Path to directory of legacy checkpoint config files', category='Paths')
-    lora_dir            : Path = Field(default='loras', description='Path to InvokeAI LoRA model directory', category='Paths')
    db_dir              : Path = Field(default='databases', description='Path to InvokeAI databases directory', category='Paths')
    outdir              : Path = Field(default='outputs', description='Default folder for output images', category='Paths')
    from_file           : Path = Field(default=None, description='Take command input from the indicated file (command-line client only)', category='Paths')
    use_memory_db       : bool = Field(default=False, description='Use in-memory database for storing image metadata', category='Paths')
-
+    
    model               : str = Field(default='stable-diffusion-1.5', description='Initial model name', category='Models')
-    embeddings          : bool = Field(default=True, description='Load contents of embeddings directory', category='Models')
-
+    
    log_handlers        : List[str] = Field(default=["console"], description='Log handler. Valid options are "console", "file=<path>", "syslog=path|address:host:port", "http=<url>"', category="Logging")
    # note - would be better to read the log_format values from logging.py, but this creates circular dependencies issues
    log_format          : Literal[tuple(['plain','color','syslog','legacy'])] = Field(default="color", description='Log format. Use "plain" for text-only, "color" for colorized output, "legacy" for 2.3-style logging and "syslog" for syslog-style', category="Logging")
@ -492,39 +485,11 @@ setting environment variables INVOKEAI_<setting>.
        return self._resolve(self.legacy_conf_dir)

    @property
-    def cache_dir(self)->Path:
-        '''
-        Path to the global cache directory for HuggingFace hub-managed models
-        '''
-        return self.models_dir / "hub"
-
-    @property
-    def models_dir(self)->Path:
+    def models_path(self)->Path:
        '''
        Path to the models directory
        '''
-        return self._resolve("models")
-
-    @property
-    def embedding_path(self)->Path:
-        '''
-        Path to the textual inversion embeddings directory.
-        '''
-        return self._resolve(self.embedding_dir) if self.embedding_dir else None
-
-    @property
-    def lora_path(self)->Path:
-        '''
-        Path to the LoRA models directory.
-        '''
-        return self._resolve(self.lora_dir) if self.lora_dir else None
-
-    @property
-    def controlnet_path(self)->Path:
-        '''
-        Path to the controlnet models directory.
-        '''
-        return self._resolve(self.controlnet_dir) if self.controlnet_dir else None
+        return self._resolve(self.models_dir)

    @property
    def autoconvert_path(self)->Path:
@ -533,13 +498,6 @@ setting environment variables INVOKEAI_<setting>.
        '''
        return self._resolve(self.autoconvert_dir) if self.autoconvert_dir else None

-    @property
-    def gfpgan_model_path(self)->Path:
-        '''
-        Path to the GFPGAN model.
-        '''
-        return self._resolve(self.gfpgan_model_dir) if self.gfpgan_model_dir else None
-
    # the following methods support legacy calls leftover from the Globals era
    @property
    def full_precision(self)->bool:
--- a/invokeai/app/services/events.py
+++ b/invokeai/app/services/events.py
@ -3,7 +3,8 @@
 from typing import Any
 from invokeai.app.models.image import ProgressImage
 from invokeai.app.util.misc import get_timestamp
-
+from invokeai.app.services.model_manager_service import BaseModelType, ModelType, SubModelType, ModelInfo
+from invokeai.app.models.exceptions import CanceledException

 class EventServiceBase:
    session_event: str = "session_event"
@ -101,3 +102,53 @@ class EventServiceBase:
                graph_execution_state_id=graph_execution_state_id,
            ),
        )
+
+    def emit_model_load_started (
+            self,
+            graph_execution_state_id: str,
+            node: dict,
+            source_node_id: str,
+            model_name: str,
+            base_model: BaseModelType,
+            model_type: ModelType,
+            submodel: SubModelType,
+    ) -> None:
+        """Emitted when a model is requested"""
+        self.__emit_session_event(
+            event_name="model_load_started",
+            payload=dict(
+                graph_execution_state_id=graph_execution_state_id,
+                node=node,
+                source_node_id=source_node_id,
+                model_name=model_name,
+                base_model=base_model,
+                model_type=model_type,
+                submodel=submodel,
+            ),
+        )
+
+    def emit_model_load_completed(
+            self,
+            graph_execution_state_id: str,
+            node: dict,
+            source_node_id: str,
+            model_name: str,
+            base_model: BaseModelType,
+            model_type: ModelType,
+            submodel: SubModelType,
+            model_info: ModelInfo,
+    ) -> None:
+        """Emitted when a model is correctly loaded (returns model info)"""
+        self.__emit_session_event(
+            event_name="model_load_completed",
+            payload=dict(
+                graph_execution_state_id=graph_execution_state_id,
+                node=node,
+                source_node_id=source_node_id,
+                model_name=model_name,
+                base_model=base_model,
+                model_type=model_type,
+                submodel=submodel,
+                model_info=model_info,
+            ),
+        )
--- a/invokeai/app/services/model_manager_initializer.py
+++ b/invokeai/app/services/model_manager_initializer.py
@ -1,104 +0,0 @@
-import os
-import sys
-import torch
-from argparse import Namespace
-from omegaconf import OmegaConf
-from pathlib import Path
-from typing import types
-
-import invokeai.version
-from .config import InvokeAISettings
-from ...backend import ModelManager
-from ...backend.util import choose_precision, choose_torch_device
-
-# TODO: Replace with an abstract class base ModelManagerBase
-def get_model_manager(config: InvokeAISettings, logger: types.ModuleType) -> ModelManager:
-    model_config = config.model_conf_path
-    if not model_config.exists():
-        report_model_error(
-            config, FileNotFoundError(f"The file {model_config} could not be found."), logger
-        )
-
-    logger.info(f"{invokeai.version.__app_name__}, version {invokeai.version.__version__}")
-    logger.info(f'InvokeAI runtime directory is "{config.root}"')
-
-    # these two lines prevent a horrible warning message from appearing
-    # when the frozen CLIP tokenizer is imported
-    import transformers  # type: ignore
-
-    transformers.logging.set_verbosity_error()
-    import diffusers
-
-    diffusers.logging.set_verbosity_error()
-    embedding_path = config.embedding_path
-
-    # migrate legacy models
-    ModelManager.migrate_models()
-
-    # creating the model manager
-    try:
-        device = torch.device(choose_torch_device())
-        precision = 'float16' if config.precision=='float16' \
-        else 'float32' if config.precision=='float32' \
-        else choose_precision(device)
-        
-        model_manager = ModelManager(
-            OmegaConf.load(config.model_conf_path),
-            precision=precision,
-            device_type=device,
-            max_loaded_models=config.max_loaded_models,
-            embedding_path = embedding_path,
-            logger = logger,
-        )
-    except (FileNotFoundError, TypeError, AssertionError) as e:
-        report_model_error(config, e, logger)
-    except (IOError, KeyError) as e:
-        logger.error(f"{e}. Aborting.")
-        sys.exit(-1)
-
-    # try to autoconvert new models
-    # autoimport new .ckpt files
-    if config.autoconvert_path:
-        model_manager.heuristic_import(
-            config.autoconvert_path,
-        )
-    return model_manager
-
-def report_model_error(opt: Namespace, e: Exception, logger: types.ModuleType):
-    logger.error(f'An error occurred while attempting to initialize the model: "{str(e)}"')
-    logger.error(
-        "This can be caused by a missing or corrupted models file, and can sometimes be fixed by (re)installing the models."
-    )
-    yes_to_all = os.environ.get("INVOKE_MODEL_RECONFIGURE")
-    if yes_to_all:
-        logger.warning(
-            "Reconfiguration is being forced by environment variable INVOKE_MODEL_RECONFIGURE"
-        )
-    else:
-        response = input(
-            "Do you want to run invokeai-configure script to select and/or reinstall models? [y] "
-        )
-        if response.startswith(("n", "N")):
-            return
-
-    logger.info("invokeai-configure is launching....\n")
-
-    # Match arguments that were set on the CLI
-    # only the arguments accepted by the configuration script are parsed
-    root_dir = ["--root", opt.root_dir] if opt.root_dir is not None else []
-    config = ["--config", opt.conf] if opt.conf is not None else []
-    sys.argv = ["invokeai-configure"]
-    sys.argv.extend(root_dir)
-    sys.argv.extend(config.to_dict())
-    if yes_to_all is not None:
-        for arg in yes_to_all.split():
-            sys.argv.append(arg)
-
-    from invokeai.frontend.install import invokeai_configure
-
-    invokeai_configure()
-    # TODO: Figure out how to restart
-    # print('** InvokeAI will now restart')
-    # sys.argv = previous_args
-    # main() # would rather do a os.exec(), but doesn't exist?
-    # sys.exit(0)
--- a/invokeai/app/services/model_manager_service.py
+++ b/invokeai/app/services/model_manager_service.py
@ -0,0 +1,398 @@
+# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
+
+from __future__ import annotations
+
+import torch
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Union, Callable, List, Tuple, types, TYPE_CHECKING
+from dataclasses import dataclass
+
+from invokeai.backend.model_management.model_manager import (
+    ModelManager,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    ModelInfo,
+)
+from invokeai.app.models.exceptions import CanceledException
+from .config import InvokeAIAppConfig
+from ...backend.util import choose_precision, choose_torch_device
+
+if TYPE_CHECKING:
+    from ..invocations.baseinvocation import BaseInvocation, InvocationContext
+
+
+class ModelManagerServiceBase(ABC):
+    """Responsible for managing models on disk and in memory"""
+
+    @abstractmethod
+    def __init__(
+        self,
+        config: InvokeAIAppConfig,
+        logger: types.ModuleType,
+    ):
+        """
+        Initialize with the path to the models.yaml config file. 
+        Optional parameters are the torch device type, precision, max_models,
+        and sequential_offload boolean. Note that the default device
+        type and precision are set up for a CUDA system running at half precision.
+        """
+        pass
+    
+    @abstractmethod
+    def get_model(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+        submodel: Optional[SubModelType] = None,
+        node: Optional[BaseInvocation] = None,
+        context: Optional[InvocationContext] = None,
+    ) -> ModelInfo:
+        """Retrieve the indicated model with name and type. 
+        submodel can be used to get a part (such as the vae) 
+        of a diffusers pipeline."""
+        pass
+
+    @property
+    @abstractmethod
+    def logger(self):
+        pass
+
+    @abstractmethod
+    def model_exists(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+    ) -> bool:
+        pass
+
+    @abstractmethod
+    def default_model(self) -> Optional[Tuple[str, BaseModelType, ModelType]]:
+        """
+        Returns the name and typeof the default model, or None
+        if none is defined.
+        """
+        pass
+
+    @abstractmethod
+    def set_default_model(self, model_name: str, base_model: BaseModelType, model_type: ModelType):
+        """Sets the default model to the indicated name."""
+        pass
+
+    @abstractmethod
+    def model_info(self, model_name: str, base_model: BaseModelType, model_type: ModelType) -> dict:
+        """
+        Given a model name returns a dict-like (OmegaConf) object describing it.
+        """
+        pass
+
+    @abstractmethod
+    def model_names(self) -> List[Tuple[str, BaseModelType, ModelType]]:
+        """
+        Returns a list of all the model names known.
+        """
+        pass
+
+    @abstractmethod
+    def list_models(self, base_model: Optional[BaseModelType] = None, model_type: Optional[ModelType] = None) -> dict:
+        """
+        Return a dict of models in the format:
+        { model_type1:
+          { model_name1: {'status': 'active'|'cached'|'not loaded',
+                         'model_name' : name,
+                         'model_type' : SDModelType,
+                         'description': description,
+                         'format': 'folder'|'safetensors'|'ckpt'
+                         },
+            model_name2: { etc }
+          },
+          model_type2:
+            { model_name_n: etc
+        }
+        """
+        pass
+
+
+    @abstractmethod
+    def add_model(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+        model_attributes: dict,
+        clobber: bool = False
+    ) -> None:
+        """
+        Update the named model with a dictionary of attributes. Will fail with an
+        assertion error if the name already exists. Pass clobber=True to overwrite.
+        On a successful update, the config will be changed in memory. Will fail 
+        with an assertion error if provided attributes are incorrect or 
+        the model name is missing. Call commit() to write changes to disk.
+        """
+        pass
+
+    @abstractmethod
+    def del_model(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+    ):
+        """
+        Delete the named model from configuration. If delete_files is true, 
+        then the underlying weight file or diffusers directory will be deleted 
+        as well. Call commit() to write to disk.
+        """
+        pass
+
+    @abstractmethod
+    def commit(self, conf_file: Path = None) -> None:
+        """
+        Write current configuration out to the indicated file.
+        If no conf_file is provided, then replaces the
+        original file/database used to initialize the object.
+        """
+        pass
+
+# simple implementation
+class ModelManagerService(ModelManagerServiceBase):
+    """Responsible for managing models on disk and in memory"""
+    def __init__(
+        self,
+        config: InvokeAIAppConfig,
+        logger: types.ModuleType,
+    ):
+        """
+        Initialize with the path to the models.yaml config file. 
+        Optional parameters are the torch device type, precision, max_models,
+        and sequential_offload boolean. Note that the default device
+        type and precision are set up for a CUDA system running at half precision.
+        """
+        if config.model_conf_path and config.model_conf_path.exists():
+            config_file = config.model_conf_path
+        else:
+            config_file = config.root_dir / "configs/models.yaml"
+        if not config_file.exists():
+            raise IOError(f"The file {config_file} could not be found.")
+
+        logger.debug(f'config file={config_file}')
+
+        device = torch.device(choose_torch_device())
+        precision = config.precision
+        if precision == "auto":
+            precision = choose_precision(device)
+        dtype = torch.float32 if precision == 'float32' else torch.float16
+
+        # this is transitional backward compatibility
+        # support for the deprecated `max_loaded_models`
+        # configuration value. If present, then the
+        # cache size is set to 2.5 GB times
+        # the number of max_loaded_models. Otherwise
+        # use new `max_cache_size` config setting
+        max_cache_size = config.max_cache_size \
+            if hasattr(config,'max_cache_size') \
+               else config.max_loaded_models * 2.5
+
+        sequential_offload = config.sequential_guidance
+
+        self.mgr = ModelManager(
+            config=config_file,
+            device_type=device,
+            precision=dtype,
+            max_cache_size=max_cache_size,
+            sequential_offload=sequential_offload,
+            logger=logger,
+        )
+        logger.info('Model manager service initialized')
+
+    def get_model(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+        submodel: Optional[SubModelType] = None,
+        node: Optional[BaseInvocation] = None,
+        context: Optional[InvocationContext] = None,
+    ) -> ModelInfo:
+        """
+        Retrieve the indicated model. submodel can be used to get a
+        part (such as the vae) of a diffusers mode.
+        """
+
+        # if we are called from within a node, then we get to emit
+        # load start and complete events
+        if node and context:
+            self._emit_load_event(
+                node=node,
+                context=context,
+                model_name=model_name,
+                base_model=base_model,
+                model_type=model_type,
+                submodel=submodel,
+            )
+
+        model_info = self.mgr.get_model(
+            model_name,
+            base_model,
+            model_type,
+            submodel,
+        )
+
+        if node and context:
+            self._emit_load_event(
+                node=node,
+                context=context,
+                model_name=model_name,
+                base_model=base_model,
+                model_type=model_type,
+                submodel=submodel,
+                model_info=model_info
+            )
+            
+        return model_info
+
+    def model_exists(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+    ) -> bool:
+        """
+        Given a model name, returns True if it is a valid
+        identifier.
+        """
+        return self.mgr.model_exists(
+            model_name,
+            base_model,
+            model_type,
+        )
+
+    def default_model(self) -> Optional[Tuple[str, BaseModelType, ModelType]]:
+        """
+        Returns the name of the default model, or None
+        if none is defined.
+        """
+        return self.mgr.default_model()
+
+    def set_default_model(self, model_name: str, base_model: BaseModelType, model_type: ModelType):
+        """Sets the default model to the indicated name."""
+        self.mgr.set_default_model(model_name, base_model, model_type)
+
+    def model_info(self, model_name: str, base_model: BaseModelType, model_type: ModelType) -> dict:
+        """
+        Given a model name returns a dict-like (OmegaConf) object describing it.
+        """
+        return self.mgr.model_info(model_name, base_model, model_type)
+
+    def model_names(self) -> List[Tuple[str, BaseModelType, ModelType]]:
+        """
+        Returns a list of all the model names known.
+        """
+        return self.mgr.model_names()
+
+    def list_models(
+        self,
+        base_model: Optional[BaseModelType] = None,
+        model_type: Optional[ModelType] = None
+    ) -> dict:
+        """
+        Return a dict of models in the format:
+        { model_type1:
+          { model_name1: {'status': 'active'|'cached'|'not loaded',
+                         'model_name' : name,
+                         'model_type' : SDModelType,
+                         'description': description,
+                         'format': 'folder'|'safetensors'|'ckpt'
+                         },
+            model_name2: { etc }
+          },
+          model_type2:
+            { model_name_n: etc
+        }
+        """
+        return self.mgr.list_models(base_model, model_type)
+
+    def add_model(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+        model_attributes: dict,
+        clobber: bool = False,
+    )->None:
+        """
+        Update the named model with a dictionary of attributes. Will fail with an
+        assertion error if the name already exists. Pass clobber=True to overwrite.
+        On a successful update, the config will be changed in memory. Will fail 
+        with an assertion error if provided attributes are incorrect or 
+        the model name is missing. Call commit() to write changes to disk.
+        """
+        return self.mgr.add_model(model_name, base_model, model_type, model_attributes, clobber)
+
+
+    def del_model(
+        self,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+    ):
+        """
+        Delete the named model from configuration. If delete_files is true, 
+        then the underlying weight file or diffusers directory will be deleted 
+        as well. Call commit() to write to disk.
+        """
+        self.mgr.del_model(model_name, base_model, model_type)
+
+
+    def commit(self, conf_file: Optional[Path]=None):
+        """
+        Write current configuration out to the indicated file.
+        If no conf_file is provided, then replaces the
+        original file/database used to initialize the object.
+        """
+        return self.mgr.commit(conf_file)
+
+    def _emit_load_event(
+        self,
+        node,
+        context,
+        model_name: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+        submodel: SubModelType,
+        model_info: Optional[ModelInfo] = None,
+    ):
+        if context.services.queue.is_canceled(context.graph_execution_state_id):
+            raise CanceledException()
+        graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
+        source_node_id = graph_execution_state.prepared_source_mapping[node.id]
+        if model_info:
+            context.services.events.emit_model_load_completed(
+                graph_execution_state_id=context.graph_execution_state_id,
+                node=node.dict(),
+                source_node_id=source_node_id,
+                model_name=model_name,
+                base_model=base_model,
+                model_type=model_type,
+                submodel=submodel,
+                model_info=model_info
+            )
+        else:
+            context.services.events.emit_model_load_started(
+                graph_execution_state_id=context.graph_execution_state_id,
+                node=node.dict(),
+                source_node_id=source_node_id,
+                model_name=model_name,
+                base_model=base_model,
+                model_type=model_type,
+                submodel=submodel,
+            )
+
+
+    @property
+    def logger(self):
+        return self.mgr.logger
+        
--- a/invokeai/app/services/restoration_services.py
+++ b/invokeai/app/services/restoration_services.py
@ -16,13 +16,14 @@ class RestorationServices:
            gfpgan, codeformer, esrgan = None, None, None
            if args.restore or args.esrgan:
                restoration = Restoration()
-                if args.restore:
+                # TODO: redo for new model structure
+                if False and args.restore:
                    gfpgan, codeformer = restoration.load_face_restore_models(
                        args.gfpgan_model_path
                    )
                else:
                    logger.info("Face restoration disabled")
-                    if args.esrgan:
+                    if False and args.esrgan:
                        esrgan = restoration.load_esrgan(args.esrgan_bg_tile)
                    else:
                        logger.info("Upscaling disabled")
--- a/invokeai/backend/init.py
+++ b/invokeai/backend/init.py
@ -9,5 +9,8 @@ from .generator import (
    Img2Img,
    Inpaint
 )
-from .model_management import ModelManager, SDModelComponent
+from .model_management import (
+    ModelManager, ModelCache, BaseModelType,
+    ModelType, SubModelType, ModelInfo
+    )
 from .safety_checker import SafetyChecker
--- a/invokeai/backend/generator/base.py
+++ b/invokeai/backend/generator/base.py
@ -113,54 +113,51 @@ class InvokeAIGenerator(metaclass=ABCMeta):
        generator_args.update(keyword_args)

        model_info = self.model_info
-        model_name = model_info['model_name']
-        model:StableDiffusionGeneratorPipeline = model_info['model']
-        model_hash = model_info['hash']
-        scheduler: Scheduler = self.get_scheduler(
-            model=model,
-            scheduler_name=generator_args.get('scheduler')
-        )
+        model_name = model_info.name
+        model_hash = model_info.hash
+        with model_info.context as model:
+            scheduler: Scheduler = self.get_scheduler(
+                model=model,
+                scheduler_name=generator_args.get('scheduler')
+            )
+            uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt,model=model)
+            gen_class = self._generator_class()
+            generator = gen_class(model, self.params.precision, **self.kwargs)
+            if self.params.variation_amount > 0:
+                generator.set_variation(generator_args.get('seed'),
+                                        generator_args.get('variation_amount'),
+                                        generator_args.get('with_variations')
+                                        )

-        # get conditioning from prompt via Compel package
-        uc, c, extra_conditioning_info = get_uc_and_c_and_ec(prompt, model=model)
-
-        gen_class = self._generator_class()
-        generator = gen_class(model, self.params.precision, **self.kwargs)
-        if self.params.variation_amount > 0:
-            generator.set_variation(generator_args.get('seed'),
-                                    generator_args.get('variation_amount'),
-                                    generator_args.get('with_variations')
-                                    )
-
-        if isinstance(model, DiffusionPipeline):
-            for component in [model.unet, model.vae]:
-                configure_model_padding(component,
+            if isinstance(model, DiffusionPipeline):
+                for component in [model.unet, model.vae]:
+                    configure_model_padding(component,
+                                            generator_args.get('seamless',False),
+                                            generator_args.get('seamless_axes')
+                                            )
+            else:
+                configure_model_padding(model,
                                        generator_args.get('seamless',False),
                                        generator_args.get('seamless_axes')
                                        )
-        else:
-            configure_model_padding(model,
-                                    generator_args.get('seamless',False),
-                                    generator_args.get('seamless_axes')
-                                    )

-        iteration_count = range(iterations) if iterations else itertools.count(start=0, step=1)
-        for i in iteration_count:
-            results = generator.generate(prompt,
-                                         conditioning=(uc, c, extra_conditioning_info),
-                                         step_callback=step_callback,
-                                         sampler=scheduler,
-                                         **generator_args,
-                                         )
-            output = InvokeAIGeneratorOutput(
-                image=results[0][0],
-                seed=results[0][1],
-                attention_maps_images=results[0][2],
-                model_hash = model_hash,
-                params=Namespace(model_name=model_name,**generator_args),
-            )
-            if callback:
-                callback(output)
+            iteration_count = range(iterations) if iterations else itertools.count(start=0, step=1)
+            for i in iteration_count:
+                results = generator.generate(prompt,
+                                             conditioning=(uc, c, extra_conditioning_info),
+                                             step_callback=step_callback,
+                                             sampler=scheduler,
+                                             **generator_args,
+                                             )
+                output = InvokeAIGeneratorOutput(
+                    image=results[0][0],
+                    seed=results[0][1],
+                    attention_maps_images=results[0][2],
+                    model_hash = model_hash,
+                    params=Namespace(model_name=model_name,**generator_args),
+                )
+                if callback:
+                    callback(output)
            yield output

    @classmethod
@ -274,7 +271,6 @@ class Embiggen(Txt2Img):
        from .embiggen import Embiggen
        return Embiggen

-
 class Generator:
    downsampling_factor: int
    latent_channels: int
--- a/invokeai/backend/generator/txt2img.py
+++ b/invokeai/backend/generator/txt2img.py
@ -90,7 +90,7 @@ class Txt2Img(Generator):
            elif isinstance(self.control_model, MultiControlNetModel):
                images = []
                for image_ in control_image:
-                    image_ = self.model.prepare_control_image(
+                    image_ = pipeline.prepare_control_image(
                        image=image_,
                        do_classifier_free_guidance=do_classifier_free_guidance,
                        width=width,
--- a/invokeai/backend/model_management/init.py
+++ b/invokeai/backend/model_management/init.py
@ -1,11 +1,6 @@
 """
 Initialization file for invokeai.backend.model_management
 """
-from .convert_ckpt_to_diffusers import (
-    convert_ckpt_to_diffusers,
-    load_pipeline_from_original_stable_diffusion_ckpt,
-)
-from .model_manager import ModelManager,SDModelComponent
-
-
-
+from .model_manager import ModelManager, ModelInfo
+from .model_cache import ModelCache
+from .models import BaseModelType, ModelType, SubModelType, ModelVariantType
--- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py
+++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py
@ -28,10 +28,13 @@ from safetensors.torch import load_file
 import invokeai.backend.util.logging as logger
 from invokeai.app.services.config import InvokeAIAppConfig

-from .model_manager import ModelManager, SDLegacyType
+from .model_manager import ModelManager
+from .model_cache import ModelCache
+from .models import SchedulerPredictionType, BaseModelType, ModelVariantType

 try:
    from omegaconf import OmegaConf
+    from omegaconf.dictconfig import DictConfig
 except ImportError:
    raise ImportError(
        "OmegaConf is required to convert the LDM checkpoints. Please install it with `pip install OmegaConf`."
@ -56,10 +59,6 @@ from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import (
    LDMBertConfig,
    LDMBertModel,
 )
-from diffusers.pipelines.paint_by_example import (
-    PaintByExampleImageEncoder,
-    PaintByExamplePipeline,
-)
 from diffusers.pipelines.stable_diffusion.safety_checker import (
    StableDiffusionSafetyChecker,
 )
@ -74,6 +73,8 @@ from transformers import (

 from ..stable_diffusion import StableDiffusionGeneratorPipeline

+MODEL_ROOT = None
+
 def shave_segments(path, n_shave_prefix_segments=1):
    """
    Removes segments. Positive values shave the first segments, negative shave the last segments.
@ -612,16 +613,29 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False

    return new_checkpoint

-
 def convert_ldm_vae_checkpoint(checkpoint, config):
-    # extract state dict for VAE
-    vae_state_dict = {}
-    vae_key = "first_stage_model."
-    keys = list(checkpoint.keys())
-    for key in keys:
-        if key.startswith(vae_key):
-            vae_state_dict[key.replace(vae_key, "")] = checkpoint.get(key)
+    # Extract state dict for VAE. Works both with burnt-in
+    # VAEs, and with standalone VAEs.

+    # checkpoint can either be a all-in-one stable diffusion
+    # model, or an isolated vae .ckpt. This tests for
+    # a key that will be present in the all-in-one model
+    # that isn't present in the isolated ckpt.
+    probe_key = "first_stage_model.encoder.conv_in.weight"
+    if probe_key in checkpoint:
+        vae_state_dict = {}
+        vae_key = "first_stage_model."
+        keys = list(checkpoint.keys())
+        for key in keys:
+            if key.startswith(vae_key):
+                vae_state_dict[key.replace(vae_key, "")] = checkpoint.get(key)
+    else:
+        vae_state_dict = checkpoint
+        
+    new_checkpoint = convert_ldm_vae_state_dict(vae_state_dict,config)
+    return new_checkpoint
+
+def convert_ldm_vae_state_dict(vae_state_dict, config):
    new_checkpoint = {}

    new_checkpoint["encoder.conv_in.weight"] = vae_state_dict["encoder.conv_in.weight"]
@ -841,10 +855,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config):


 def convert_ldm_clip_checkpoint(checkpoint):
-    text_model = CLIPTextModel.from_pretrained(
-        "openai/clip-vit-large-patch14", cache_dir=InvokeAIAppConfig.get_config().cache_dir
-    )
-
+    text_model = CLIPTextModel.from_pretrained(MODEL_ROOT / 'clip-vit-large-patch14')
    keys = list(checkpoint.keys())

    text_model_dict = {}
@ -896,82 +907,10 @@ protected = {re.escape(x[0]): x[1] for x in textenc_transformer_conversion_lst}
 textenc_pattern = re.compile("|".join(protected.keys()))


-def convert_paint_by_example_checkpoint(checkpoint):
-    cache_dir = InvokeAIAppConfig.get_config().cache_dir
-    config = CLIPVisionConfig.from_pretrained(
-        "openai/clip-vit-large-patch14", cache_dir=cache_dir
-    )
-    model = PaintByExampleImageEncoder(config)
-
-    keys = list(checkpoint.keys())
-
-    text_model_dict = {}
-
-    for key in keys:
-        if key.startswith("cond_stage_model.transformer"):
-            text_model_dict[key[len("cond_stage_model.transformer.") :]] = checkpoint[
-                key
-            ]
-
-    # load clip vision
-    model.model.load_state_dict(text_model_dict)
-
-    # load mapper
-    keys_mapper = {
-        k[len("cond_stage_model.mapper.res") :]: v
-        for k, v in checkpoint.items()
-        if k.startswith("cond_stage_model.mapper")
-    }
-
-    MAPPING = {
-        "attn.c_qkv": ["attn1.to_q", "attn1.to_k", "attn1.to_v"],
-        "attn.c_proj": ["attn1.to_out.0"],
-        "ln_1": ["norm1"],
-        "ln_2": ["norm3"],
-        "mlp.c_fc": ["ff.net.0.proj"],
-        "mlp.c_proj": ["ff.net.2"],
-    }
-
-    mapped_weights = {}
-    for key, value in keys_mapper.items():
-        prefix = key[: len("blocks.i")]
-        suffix = key.split(prefix)[-1].split(".")[-1]
-        name = key.split(prefix)[-1].split(suffix)[0][1:-1]
-        mapped_names = MAPPING[name]
-
-        num_splits = len(mapped_names)
-        for i, mapped_name in enumerate(mapped_names):
-            new_name = ".".join([prefix, mapped_name, suffix])
-            shape = value.shape[0] // num_splits
-            mapped_weights[new_name] = value[i * shape : (i + 1) * shape]
-
-    model.mapper.load_state_dict(mapped_weights)
-
-    # load final layer norm
-    model.final_layer_norm.load_state_dict(
-        {
-            "bias": checkpoint["cond_stage_model.final_ln.bias"],
-            "weight": checkpoint["cond_stage_model.final_ln.weight"],
-        }
-    )
-
-    # load final proj
-    model.proj_out.load_state_dict(
-        {
-            "bias": checkpoint["proj_out.bias"],
-            "weight": checkpoint["proj_out.weight"],
-        }
-    )
-
-    # load uncond vector
-    model.uncond_vector.data = torch.nn.Parameter(checkpoint["learnable_vector"])
-    return model
-
-
 def convert_open_clip_checkpoint(checkpoint):
-    cache_dir = InvokeAIAppConfig.get_config().cache_dir
    text_model = CLIPTextModel.from_pretrained(
-        "stabilityai/stable-diffusion-2", subfolder="text_encoder", cache_dir=cache_dir
+        MODEL_ROOT / 'stable-diffusion-2-clip',
+        subfolder='text_encoder',
    )

    keys = list(checkpoint.keys())
@ -1047,22 +986,30 @@ def replace_checkpoint_vae(checkpoint, vae_path:str):
        new_key = f'first_stage_model.{vae_key}'
        checkpoint[new_key] = state_dict[vae_key]

+def convert_ldm_vae_to_diffusers(checkpoint, vae_config: DictConfig, image_size: int)->AutoencoderKL:
+    vae_config = create_vae_diffusers_config(
+        vae_config, image_size=image_size
+    )
+
+    converted_vae_checkpoint = convert_ldm_vae_checkpoint(
+        checkpoint, vae_config
+    )
+
+    vae = AutoencoderKL(**vae_config)
+    vae.load_state_dict(converted_vae_checkpoint)
+    return vae
+
 def load_pipeline_from_original_stable_diffusion_ckpt(
    checkpoint_path: str,
-    original_config_file: str = None,
-    num_in_channels: int = None,
-    scheduler_type: str = "pndm",
-    pipeline_type: str = None,
-    image_size: int = None,
-    prediction_type: str = None,
+    model_version: BaseModelType,
+    model_variant: ModelVariantType,
+    original_config_file: str,
    extract_ema: bool = True,
-    upcast_attn: bool = False,
-    vae: AutoencoderKL = None,
-    vae_path: str = None,
    precision: torch.dtype = torch.float32,
-    return_generator_pipeline: bool = False,
-    scan_needed:bool=True,
-) -> Union[StableDiffusionPipeline, StableDiffusionGeneratorPipeline]:
+    upcast_attention: bool = False,
+    prediction_type: SchedulerPredictionType = SchedulerPredictionType.Epsilon,
+    scan_needed: bool = True,
+) -> StableDiffusionPipeline:
    """
    Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
    config file.
@ -1074,148 +1021,68 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
    :param checkpoint_path: Path to `.ckpt` file.
    :param original_config_file: Path to `.yaml` config file corresponding to the original architecture.
      If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models.
-    :param image_size: The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2
-      Base. Use 768 for Stable Diffusion v2.
    :param prediction_type: The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion
     v1.X and Stable Diffusion v2 Base. Use `'v-prediction'` for Stable Diffusion v2.
-    :param num_in_channels: The number of input channels. If `None` number of input channels will be automatically
-    inferred.
    :param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler",
     "euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of
-     `["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder", "PaintByExample"]`. :param extract_ema: Only relevant for
+     `["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder"]`. :param extract_ema: Only relevant for
     checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights
     or not. Defaults to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher
     quality images for inference. Non-EMA weights are usually better to continue fine-tuning.
    :param precision: precision to use - torch.float16, torch.float32 or torch.autocast
    :param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
    running stable diffusion 2.1.
-    :param vae: A diffusers VAE to load into the pipeline.
-    :param vae_path: Path to a checkpoint VAE that will be converted into diffusers and loaded into the pipeline.
    """
    config = InvokeAIAppConfig.get_config()
-    cache_dir = config.cache_dir

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        verbosity = dlogging.get_verbosity()
        dlogging.set_verbosity_error()

-        if Path(checkpoint_path).suffix == '.ckpt':
-            if scan_needed:
-                ModelManager.scan_model(checkpoint_path,checkpoint_path)
-            checkpoint = torch.load(checkpoint_path)
-        else:
+        if str(checkpoint_path).endswith(".safetensors"):
            checkpoint = load_file(checkpoint_path)
-
-        pipeline_class = (
-            StableDiffusionGeneratorPipeline
-            if return_generator_pipeline
-            else StableDiffusionPipeline
-        )
-
-        # Sometimes models don't have the global_step item
-        if "global_step" in checkpoint:
-            global_step = checkpoint["global_step"]
        else:
-            logger.debug("global_step key not found in model")
-            global_step = None
+            if scan_needed:
+                ModelCache.scan_model(checkpoint_path, checkpoint_path)
+            checkpoint = torch.load(checkpoint_path)

        # sometimes there is a state_dict key and sometimes not
        if "state_dict" in checkpoint:
            checkpoint = checkpoint["state_dict"]

-        upcast_attention = False
-        if original_config_file is None:
-            model_type = ModelManager.probe_model_type(checkpoint)
-
-            if model_type == SDLegacyType.V2_v:
-                original_config_file = (
-                    config.legacy_conf_path / "v2-inference-v.yaml"
-                )
-                if global_step == 110000:
-                    # v2.1 needs to upcast attention
-                    upcast_attention = True
-            elif model_type == SDLegacyType.V2_e:
-                original_config_file = (
-                    config.legacy_conf_path / "v2-inference.yaml"
-                )
-            elif model_type == SDLegacyType.V1_INPAINT:
-                original_config_file = (
-                    config.legacy_conf_path / "v1-inpainting-inference.yaml"
-                )
-
-            elif model_type == SDLegacyType.V1:
-                original_config_file = (
-                    config.legacy_conf_path / "v1-inference.yaml"
-                )
-
-            else:
-                raise Exception("Unknown checkpoint type")
-
        original_config = OmegaConf.load(original_config_file)

-        if num_in_channels is not None:
-            original_config["model"]["params"]["unet_config"]["params"][
-                "in_channels"
-            ] = num_in_channels
-
-        if (
-            "parameterization" in original_config["model"]["params"]
-            and original_config["model"]["params"]["parameterization"] == "v"
-        ):
-            if prediction_type is None:
-                # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"`
-                # as it relies on a brittle global step parameter here
-                prediction_type = "epsilon" if global_step == 875000 else "v_prediction"
-            if image_size is None:
-                # NOTE: For stable diffusion 2 base one has to pass `image_size==512`
-                # as it relies on a brittle global step parameter here
-                image_size = 512 if global_step == 875000 else 768
+        if model_version == BaseModelType.StableDiffusion2 and prediction_type == SchedulerPredictionType.VPrediction:
+            image_size = 768
        else:
-            if prediction_type is None:
-                prediction_type = "epsilon"
-            if image_size is None:
-                image_size = 512
+            image_size = 512
+
+        #
+        # convert scheduler
+        #

        num_train_timesteps = original_config.model.params.timesteps
        beta_start = original_config.model.params.linear_start
        beta_end = original_config.model.params.linear_end

-        scheduler = DDIMScheduler(
+        scheduler = PNDMScheduler(
            beta_end=beta_end,
            beta_schedule="scaled_linear",
            beta_start=beta_start,
            num_train_timesteps=num_train_timesteps,
            steps_offset=1,
-            clip_sample=False,
            set_alpha_to_one=False,
            prediction_type=prediction_type,
+            skip_prk_steps=True
        )
        # make sure scheduler works correctly with DDIM
        scheduler.register_to_config(clip_sample=False)

-        if scheduler_type == "pndm":
-            config = dict(scheduler.config)
-            config["skip_prk_steps"] = True
-            scheduler = PNDMScheduler.from_config(config)
-        elif scheduler_type == "lms":
-            scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
-        elif scheduler_type == "heun":
-            scheduler = HeunDiscreteScheduler.from_config(scheduler.config)
-        elif scheduler_type == "euler":
-            scheduler = EulerDiscreteScheduler.from_config(scheduler.config)
-        elif scheduler_type == "euler-ancestral":
-            scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config)
-        elif scheduler_type == "dpm":
-            scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config)
-        elif scheduler_type == 'unipc':
-            scheduler = UniPCMultistepScheduler.from_config(scheduler.config)
-        elif scheduler_type == "ddim":
-            scheduler = scheduler
-        else:
-            raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
+        #
+        # convert unet
+        #

-        # Convert the UNet2DConditionModel model.
        unet_config = create_unet_diffusers_config(
            original_config, image_size=image_size
        )
@ -1228,44 +1095,25 @@ def load_pipeline_from_original_stable_diffusion_ckpt(

        unet.load_state_dict(converted_unet_checkpoint)

-        # If a replacement VAE path was specified, we'll incorporate that into
-        # the checkpoint model and then convert it
-        if vae_path:
-            logger.debug(f"Converting VAE {vae_path}")
-            replace_checkpoint_vae(checkpoint,vae_path)
-        # otherwise we use the original VAE, provided that
-        # an externally loaded diffusers VAE was not passed
-        elif not vae:
-            logger.debug("Using checkpoint model's original VAE")
+        #
+        # convert vae
+        #

-        if vae:
-            logger.debug("Using replacement diffusers VAE")
-        else:  # convert the original or replacement VAE
-            vae_config = create_vae_diffusers_config(
-                original_config, image_size=image_size
-            )
-            converted_vae_checkpoint = convert_ldm_vae_checkpoint(
-                checkpoint, vae_config
-            )
-
-            vae = AutoencoderKL(**vae_config)
-            vae.load_state_dict(converted_vae_checkpoint)
+        vae = convert_ldm_vae_to_diffusers(
+            checkpoint,
+            original_config,
+            image_size,
+        )

        # Convert the text model.
-        model_type = pipeline_type
-        if model_type is None:
-            model_type = original_config.model.params.cond_stage_config.target.split(
-                "."
-            )[-1]
-
+        model_type = original_config.model.params.cond_stage_config.target.split(".")[-1]
        if model_type == "FrozenOpenCLIPEmbedder":
            text_model = convert_open_clip_checkpoint(checkpoint)
            tokenizer = CLIPTokenizer.from_pretrained(
-                "stabilityai/stable-diffusion-2",
-                subfolder="tokenizer",
-                cache_dir=cache_dir,
+                MODEL_ROOT / 'stable-diffusion-2-clip',
+                subfolder='tokenizer',
            )
-            pipe = pipeline_class(
+            pipe = StableDiffusionPipeline(
                vae=vae.to(precision),
                text_encoder=text_model.to(precision),
                tokenizer=tokenizer,
@ -1275,49 +1123,26 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
                feature_extractor=None,
                requires_safety_checker=False,
            )
-        elif model_type == "PaintByExample":
-            vision_model = convert_paint_by_example_checkpoint(checkpoint)
-            tokenizer = CLIPTokenizer.from_pretrained(
-                "openai/clip-vit-large-patch14", cache_dir=cache_dir
-            )
-            feature_extractor = AutoFeatureExtractor.from_pretrained(
-                "CompVis/stable-diffusion-safety-checker", cache_dir=cache_dir
-            )
-            pipe = PaintByExamplePipeline(
-                vae=vae,
-                image_encoder=vision_model,
-                unet=unet,
-                scheduler=scheduler,
-                safety_checker=None,
-                feature_extractor=feature_extractor,
-            )
+
        elif model_type in ["FrozenCLIPEmbedder", "WeightedFrozenCLIPEmbedder"]:
            text_model = convert_ldm_clip_checkpoint(checkpoint)
-            tokenizer = CLIPTokenizer.from_pretrained(
-                "openai/clip-vit-large-patch14", cache_dir=cache_dir
-            )
-            safety_checker = StableDiffusionSafetyChecker.from_pretrained(
-                "CompVis/stable-diffusion-safety-checker",
-                cache_dir=cache_dir,
-            )
-            feature_extractor = AutoFeatureExtractor.from_pretrained(
-                "CompVis/stable-diffusion-safety-checker", cache_dir=cache_dir
-            )
-            pipe = pipeline_class(
+            tokenizer = CLIPTokenizer.from_pretrained(MODEL_ROOT / 'clip-vit-large-patch14')
+            safety_checker = StableDiffusionSafetyChecker.from_pretrained(MODEL_ROOT / 'stable-diffusion-safety-checker')
+            feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ROOT / 'stable-diffusion-safety-checker')
+            pipe = StableDiffusionPipeline(
                vae=vae.to(precision),
                text_encoder=text_model.to(precision),
                tokenizer=tokenizer,
                unet=unet.to(precision),
                scheduler=scheduler,
-                safety_checker=None if return_generator_pipeline else safety_checker.to(precision),
+                safety_checker=safety_checker.to(precision),
                feature_extractor=feature_extractor,
            )
+
        else:
            text_config = create_ldm_bert_config(original_config)
            text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
-            tokenizer = BertTokenizerFast.from_pretrained(
-                "bert-base-uncased", cache_dir=cache_dir
-            )
+            tokenizer = BertTokenizerFast.from_pretrained(MODEL_ROOT / "bert-base-uncased")
            pipe = LDMTextToImagePipeline(
                vqvae=vae,
                bert=text_model,
@ -1331,15 +1156,19 @@ def load_pipeline_from_original_stable_diffusion_ckpt(


 def convert_ckpt_to_diffusers(
-    checkpoint_path: Union[str, Path],
-    dump_path: Union[str, Path],
-    **kwargs,
+        checkpoint_path: Union[str, Path],
+        dump_path: Union[str, Path],
+        model_root: Union[str, Path],
+        **kwargs,
 ):
    """
    Takes all the arguments of load_pipeline_from_original_stable_diffusion_ckpt(),
    and in addition a path-like object indicating the location of the desired diffusers
    model to be written.
    """
+    # setting global here to avoid massive changes late at night
+    global MODEL_ROOT
+    MODEL_ROOT = Path(model_root) / 'core/convert'
    pipe = load_pipeline_from_original_stable_diffusion_ckpt(checkpoint_path, **kwargs)

    pipe.save_pretrained(
--- a/invokeai/backend/model_management/lora.py
+++ b/invokeai/backend/model_management/lora.py
@ -0,0 +1,676 @@
+from __future__ import annotations
+
+import copy
+from pathlib import Path
+from contextlib import contextmanager
+from typing import Optional, Dict, Tuple, Any
+
+import torch
+from safetensors.torch import load_file
+from torch.utils.hooks import RemovableHandle
+
+from diffusers.models import UNet2DConditionModel
+from transformers import CLIPTextModel
+
+from compel.embeddings_provider import BaseTextualInversionManager
+
+class LoRALayerBase:
+    #rank: Optional[int]
+    #alpha: Optional[float]
+    #bias: Optional[torch.Tensor]
+    #layer_key: str
+
+    #@property
+    #def scale(self):
+    #    return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: dict,
+    ):
+        if "alpha" in values:
+            self.alpha = values["alpha"].item()
+        else:
+            self.alpha = None
+
+        if (
+            "bias_indices" in values
+            and "bias_values" in values
+            and "bias_size" in values
+        ):
+            self.bias = torch.sparse_coo_tensor(
+                values["bias_indices"],
+                values["bias_values"],
+                tuple(values["bias_size"]),
+            )
+
+        else:
+            self.bias = None
+
+        self.rank = None # set in layer implementation
+        self.layer_key = layer_key
+
+    def forward(
+        self,
+        module: torch.nn.Module,
+        input_h: Any, # for real looks like Tuple[torch.nn.Tensor] but not sure
+        multiplier: float,
+    ):
+        if type(module) == torch.nn.Conv2d:
+            op = torch.nn.functional.conv2d
+            extra_args = dict(
+                stride=module.stride,
+                padding=module.padding,
+                dilation=module.dilation,
+                groups=module.groups,
+            )
+
+        else:
+            op = torch.nn.functional.linear
+            extra_args = {}
+
+        weight = self.get_weight(module)
+
+        bias = self.bias if self.bias is not None else 0
+        scale = self.alpha / self.rank if (self.alpha and self.rank) else 1.0
+        return op(
+            *input_h,
+            (weight + bias).view(module.weight.shape),
+            None,
+            **extra_args,
+        ) * multiplier * scale
+
+    def get_weight(self, module: torch.nn.Module):
+        raise NotImplementedError()
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for val in [self.bias]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        if self.bias is not None:
+            self.bias = self.bias.to(device=device, dtype=dtype)
+
+
+# TODO: find and debug lora/locon with bias
+class LoRALayer(LoRALayerBase):
+    #up: torch.Tensor
+    #mid: Optional[torch.Tensor]
+    #down: torch.Tensor
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: dict,
+    ):
+        super().__init__(layer_key, values)
+
+        self.up = values["lora_up.weight"]
+        self.down = values["lora_down.weight"]
+        if "lora_mid.weight" in values:
+            self.mid = values["lora_mid.weight"]
+        else:
+            self.mid = None
+
+        self.rank = self.down.shape[0]
+
+    def get_weight(self, module: torch.nn.Module):
+        if self.mid is not None:
+            up = self.up.reshape(up.shape[0], up.shape[1])
+            down = self.down.reshape(up.shape[0], up.shape[1])
+            weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
+        else:
+            weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.up, self.mid, self.down]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        super().to(device=device, dtype=dtype)
+
+        self.up = self.up.to(device=device, dtype=dtype)
+        self.down = self.down.to(device=device, dtype=dtype)
+
+        if self.mid is not None:
+            self.mid = self.mid.to(device=device, dtype=dtype)
+
+
+class LoHALayer(LoRALayerBase):
+    #w1_a: torch.Tensor
+    #w1_b: torch.Tensor
+    #w2_a: torch.Tensor
+    #w2_b: torch.Tensor
+    #t1: Optional[torch.Tensor] = None
+    #t2: Optional[torch.Tensor] = None
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: dict,
+    ):
+        super().__init__(module_key, rank, alpha, bias)
+
+        self.w1_a = values["hada_w1_a"]
+        self.w1_b = values["hada_w1_b"]
+        self.w2_a = values["hada_w2_a"]
+        self.w2_b = values["hada_w2_b"]
+
+        if "hada_t1" in values:
+            self.t1 = values["hada_t1"]
+        else:
+            self.t1 = None
+
+        if "hada_t2" in values:
+            self.t2 = values["hada_t2"]
+        else:
+            self.t2 = None
+
+        self.rank = self.w1_b.shape[0]
+
+    def get_weight(self, module: torch.nn.Module):
+        if self.t1 is None:
+            weight = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
+
+        else:
+            rebuild1 = torch.einsum(
+                "i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a
+            )
+            rebuild2 = torch.einsum(
+                "i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a
+            )
+            weight = rebuild1 * rebuild2
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        super().to(device=device, dtype=dtype)
+
+        self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+        self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+        if self.t1 is not None:
+            self.t1 = self.t1.to(device=device, dtype=dtype)
+
+        self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+        self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
+
+
+class LoKRLayer(LoRALayerBase):
+    #w1: Optional[torch.Tensor] = None
+    #w1_a: Optional[torch.Tensor] = None
+    #w1_b: Optional[torch.Tensor] = None
+    #w2: Optional[torch.Tensor] = None
+    #w2_a: Optional[torch.Tensor] = None
+    #w2_b: Optional[torch.Tensor] = None
+    #t2: Optional[torch.Tensor] = None
+
+    def __init__(
+        self,
+        layer_key: str,
+        values: dict,
+    ):
+        super().__init__(module_key, rank, alpha, bias)        
+
+        if "lokr_w1" in values:
+            self.w1 = values["lokr_w1"]
+            self.w1_a = None
+            self.w1_b = None
+        else:
+            self.w1 = None
+            self.w1_a = values["lokr_w1_a"]
+            self.w1_b = values["lokr_w1_b"]
+
+        if "lokr_w2" in values:
+            self.w2 = values["lokr_w2"]
+            self.w2_a = None
+            self.w2_b = None
+        else:
+            self.w2 = None
+            self.w2_a = values["lokr_w2_a"]
+            self.w2_b = values["lokr_w2_b"]
+
+        if "lokr_t2" in values:
+            self.t2 = values["lokr_t2"]
+        else:
+            self.t2 = None
+
+        if "lokr_w1_b" in values:
+            self.rank = values["lokr_w1_b"].shape[0]
+        elif "lokr_w2_b" in values:
+            self.rank = values["lokr_w2_b"].shape[0]
+        else:
+            self.rank = None # unscaled
+
+    def get_weight(self, module: torch.nn.Module):
+        w1 = self.w1
+        if w1 is None:
+            w1 = self.w1_a @ self.w1_b
+
+        w2 = self.w2
+        if w2 is None:
+            if self.t2 is None:
+                w2 = self.w2_a @ self.w2_b
+            else:
+                w2 = torch.einsum('i j k l, i p, j r -> p r k l', self.t2, self.w2_a, self.w2_b)
+
+        if len(w2.shape) == 4:
+            w1 = w1.unsqueeze(2).unsqueeze(2)
+        w2 = w2.contiguous()
+        weight = torch.kron(w1, w2).reshape(module.weight.shape) # TODO: can we remove reshape?
+
+        return weight
+
+    def calc_size(self) -> int:
+        model_size = super().calc_size()
+        for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
+            if val is not None:
+                model_size += val.nelement() * val.element_size()
+        return model_size
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        super().to(device=device, dtype=dtype)
+
+        if self.w1 is not None:
+            self.w1 = self.w1.to(device=device, dtype=dtype)
+        else:
+            self.w1_a = self.w1_a.to(device=device, dtype=dtype)
+            self.w1_b = self.w1_b.to(device=device, dtype=dtype)
+
+        if self.w2 is not None:
+            self.w2 = self.w2.to(device=device, dtype=dtype)
+        else:
+            self.w2_a = self.w2_a.to(device=device, dtype=dtype)
+            self.w2_b = self.w2_b.to(device=device, dtype=dtype)
+
+        if self.t2 is not None:
+            self.t2 = self.t2.to(device=device, dtype=dtype)
+
+
+class LoRAModel: #(torch.nn.Module):
+    _name: str
+    layers: Dict[str, LoRALayer]
+    _device: torch.device
+    _dtype: torch.dtype
+
+    def __init__(
+        self,
+        name: str,
+        layers: Dict[str, LoRALayer],
+        device: torch.device,
+        dtype: torch.dtype,
+    ):
+        self._name = name
+        self._device = device or torch.cpu
+        self._dtype = dtype or torch.float32
+        self.layers = layers
+
+    @property
+    def name(self):
+        return self._name
+
+    @property
+    def device(self):
+        return self._device
+
+    @property
+    def dtype(self):
+        return self._dtype    
+
+    def to(
+        self,
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ) -> LoRAModel:
+        # TODO: try revert if exception?
+        for key, layer in self.layers.items():
+            layer.to(device=device, dtype=dtype)
+        self._device = device
+        self._dtype = dtype
+
+    def calc_size(self) -> int:
+        model_size = 0
+        for _, layer in self.layers.items():
+            model_size += layer.calc_size()
+        return model_size
+
+    @classmethod
+    def from_checkpoint(
+        cls,
+        file_path: Union[str, Path],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        device = device or torch.device("cpu")
+        dtype = dtype or torch.float32
+
+        if isinstance(file_path, str):
+            file_path = Path(file_path)
+
+        model = cls(
+            device=device,
+            dtype=dtype,
+            name=file_path.stem, # TODO:
+            layers=dict(),
+        )
+
+        if file_path.suffix == ".safetensors":
+            state_dict = load_file(file_path.absolute().as_posix(), device="cpu")
+        else:
+            state_dict = torch.load(file_path, map_location="cpu")
+
+        state_dict = cls._group_state(state_dict)
+
+        for layer_key, values in state_dict.items():
+
+            # lora and locon
+            if "lora_down.weight" in values:
+                layer = LoRALayer(layer_key, values)
+
+            # loha
+            elif "hada_w1_b" in values:
+                layer = LoHALayer(layer_key, values)
+
+            # lokr
+            elif "lokr_w1_b" in values or "lokr_w1" in values:
+                layer = LoKRLayer(layer_key, values)
+
+            else:
+                # TODO: diff/ia3/... format
+                print(
+                    f">> Encountered unknown lora layer module in {self.name}: {layer_key}"
+                )
+                return
+
+            # lower memory consumption by removing already parsed layer values
+            state_dict[layer_key].clear()
+
+            layer.to(device=device, dtype=dtype)
+            model.layers[layer_key] = layer
+
+        return model
+
+    @staticmethod
+    def _group_state(state_dict: dict):
+        state_dict_groupped = dict()
+
+        for key, value in state_dict.items():
+            stem, leaf = key.split(".", 1)
+            if stem not in state_dict_groupped:
+                state_dict_groupped[stem] = dict()
+            state_dict_groupped[stem][leaf] = value
+
+        return state_dict_groupped
+
+
+"""
+loras = [
+    (lora_model1, 0.7),
+    (lora_model2, 0.4),
+]
+with LoRAHelper.apply_lora_unet(unet, loras):
+    # unet with applied loras
+# unmodified unet
+
+"""
+# TODO: rename smth like ModelPatcher and add TI method?
+class ModelPatcher:
+
+    @staticmethod
+    def _resolve_lora_key(model: torch.nn.Module, lora_key: str, prefix: str) -> Tuple[str, torch.nn.Module]:
+        assert "." not in lora_key
+
+        if not lora_key.startswith(prefix):
+            raise Exception(f"lora_key with invalid prefix: {lora_key}, {prefix}")
+
+        module = model
+        module_key = ""
+        key_parts = lora_key[len(prefix):].split('_')
+
+        submodule_name = key_parts.pop(0)
+        
+        while len(key_parts) > 0:
+            try:
+                module = module.get_submodule(submodule_name)
+                module_key += "." + submodule_name
+                submodule_name = key_parts.pop(0)
+            except:
+                submodule_name += "_" + key_parts.pop(0)
+
+        module = module.get_submodule(submodule_name)
+        module_key = module_key.rstrip(".")
+
+        return (module_key, module)
+
+    @staticmethod
+    def _lora_forward_hook(
+        applied_loras: List[Tuple[LoraModel, float]],
+        layer_name: str,
+    ):
+
+        def lora_forward(module, input_h, output):
+            if len(applied_loras) == 0:
+                return output
+
+            for lora, weight in applied_loras:
+                layer = lora.layers.get(layer_name, None)
+                if layer is None:
+                    continue
+                output += layer.forward(module, input_h, weight)
+            return output
+
+        return lora_forward
+
+
+    @classmethod
+    @contextmanager
+    def apply_lora_unet(
+        cls,
+        unet: UNet2DConditionModel,
+        loras: List[Tuple[LoRAModel, float]],
+    ):
+        with cls.apply_lora(unet, loras, "lora_unet_"):
+            yield
+
+
+    @classmethod
+    @contextmanager
+    def apply_lora_text_encoder(
+        cls,
+        text_encoder: CLIPTextModel,
+        loras: List[Tuple[LoRAModel, float]],
+    ):
+        with cls.apply_lora(text_encoder, loras, "lora_te_"):
+            yield
+
+
+    @classmethod
+    @contextmanager
+    def apply_lora(
+        cls,
+        model: torch.nn.Module,
+        loras: List[Tuple[LoraModel, float]],
+        prefix: str,
+    ):
+        hooks = dict()
+        try:
+            for lora, lora_weight in loras:
+                for layer_key, layer in lora.layers.items():
+                    if not layer_key.startswith(prefix):
+                        continue
+
+                    module_key, module = cls._resolve_lora_key(model, layer_key, prefix)
+                    if module_key not in hooks:
+                        hooks[module_key] = module.register_forward_hook(cls._lora_forward_hook(loras, layer_key))
+
+            yield # wait for context manager exit
+
+        finally:
+            for module_key, hook in hooks.items():
+                hook.remove()
+            hooks.clear()
+
+
+    @classmethod
+    @contextmanager
+    def apply_ti(
+        cls,
+        tokenizer: CLIPTokenizer,
+        text_encoder: CLIPTextModel,
+        ti_list: List[Any],
+    ) -> Tuple[CLIPTokenizer, TextualInversionManager]:
+        init_tokens_count = None
+        new_tokens_added = None
+
+        try:
+            ti_manager = TextualInversionManager()
+            ti_tokenizer = copy.deepcopy(tokenizer)
+            init_tokens_count = text_encoder.resize_token_embeddings(None).num_embeddings
+
+            def _get_trigger(ti, index):
+                trigger = ti.name
+                if index > 0:
+                    trigger += f"-!pad-{i}"
+                return f"<{trigger}>"
+
+            # modify tokenizer
+            new_tokens_added = 0
+            for ti in ti_list:
+                for i in range(ti.embedding.shape[0]):
+                    new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti, i))
+
+            # modify text_encoder
+            text_encoder.resize_token_embeddings(init_tokens_count + new_tokens_added)
+            model_embeddings = text_encoder.get_input_embeddings()
+
+            for ti in ti_list:
+                ti_tokens = []
+                for i in range(ti.embedding.shape[0]):
+                    embedding = ti.embedding[i]
+                    trigger = _get_trigger(ti, i)
+
+                    token_id = ti_tokenizer.convert_tokens_to_ids(trigger)
+                    if token_id == ti_tokenizer.unk_token_id:
+                        raise RuntimeError(f"Unable to find token id for token '{trigger}'")
+
+                    if model_embeddings.weight.data[token_id].shape != embedding.shape:
+                        raise ValueError(
+                            f"Cannot load embedding for {trigger}. It was trained on a model with token dimension {embedding.shape[0]}, but the current model has token dimension {model_embeddings.weight.data[token_id].shape[0]}."
+                        )
+
+                    model_embeddings.weight.data[token_id] = embedding
+                    ti_tokens.append(token_id)
+
+                if len(ti_tokens) > 1:
+                    ti_manager.pad_tokens[ti_tokens[0]] = ti_tokens[1:]
+
+            yield ti_tokenizer, ti_manager
+
+        finally:
+            if init_tokens_count and new_tokens_added:
+                text_encoder.resize_token_embeddings(init_tokens_count)
+
+
+class TextualInversionModel:
+    name: str
+    embedding: torch.Tensor # [n, 768]|[n, 1280]
+
+    @classmethod
+    def from_checkpoint(
+        cls,
+        file_path: Union[str, Path],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        if not isinstance(file_path, Path):
+            file_path = Path(file_path)
+
+        result = cls() # TODO:
+        result.name = file_path.stem # TODO:
+
+        if file_path.suffix == ".safetensors":
+            state_dict = load_file(file_path.absolute().as_posix(), device="cpu")
+        else:
+            state_dict = torch.load(file_path, map_location="cpu")
+
+        # both v1 and v2 format embeddings
+        # difference mostly in metadata
+        if "string_to_param" in state_dict:
+            if len(state_dict["string_to_param"]) > 1:
+                print(f"Warn: Embedding \"{file_path.name}\" contains multiple tokens, which is not supported. The first token will be used.")
+
+            result.embedding = next(iter(state_dict["string_to_param"].values()))
+
+        # v3 (easynegative)
+        elif "emb_params" in state_dict:
+            result.embedding = state_dict["emb_params"]
+
+        # v4(diffusers bin files)
+        else:
+            result.embedding = next(iter(state_dict.values()))
+
+            if not isinstance(result.embedding, torch.Tensor):
+                raise ValueError(f"Invalid embeddings file: {file_path.name}")
+
+        return result
+
+
+class TextualInversionManager(BaseTextualInversionManager):
+    pad_tokens: Dict[int, List[int]]
+
+    def __init__(self):
+        self.pad_tokens = dict()
+
+    def expand_textual_inversion_token_ids_if_necessary(
+        self, token_ids: list[int]
+    ) -> list[int]:
+
+        #if token_ids[0] == self.tokenizer.bos_token_id:
+        #    raise ValueError("token_ids must not start with bos_token_id")
+        #if token_ids[-1] == self.tokenizer.eos_token_id:
+        #    raise ValueError("token_ids must not end with eos_token_id")
+
+        if len(self.pad_tokens) == 0:
+            return token_ids
+
+        new_token_ids = []
+        for token_id in token_ids:
+            new_token_ids.append(token_id)
+            if token_id in self.pad_tokens:
+                new_token_ids.extend(self.pad_tokens[token_id])
+
+        return new_token_ids
+
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -0,0 +1,391 @@
+"""
+Manage a RAM cache of diffusion/transformer models for fast switching.
+They are moved between GPU VRAM and CPU RAM as necessary. If the cache
+grows larger than a preset maximum, then the least recently used
+model will be cleared and (re)loaded from disk when next needed.
+
+The cache returns context manager generators designed to load the
+model into the GPU within the context, and unload outside the
+context. Use like this:
+
+   cache = ModelCache(max_models_cached=6)
+   with cache.get_model('runwayml/stable-diffusion-1-5') as SD1,
+          cache.get_model('stabilityai/stable-diffusion-2') as SD2:
+       do_something_in_GPU(SD1,SD2)
+
+
+"""
+
+import gc
+import os
+import sys
+import hashlib
+from contextlib import suppress
+from pathlib import Path
+from typing import Dict, Union, types, Optional, Type, Any
+
+import torch
+
+import logging
+import invokeai.backend.util.logging as logger
+from invokeai.app.services.config import get_invokeai_config
+from .lora import LoRAModel, TextualInversionModel
+from .models import BaseModelType, ModelType, SubModelType, ModelBase
+
+# Maximum size of the cache, in gigs
+# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
+DEFAULT_MAX_CACHE_SIZE = 6.0
+
+# actual size of a gig
+GIG = 1073741824
+
+class ModelLocker(object):
+    "Forward declaration"
+    pass
+
+class ModelCache(object):
+    "Forward declaration"
+    pass
+
+class _CacheRecord:
+    size: int
+    model: Any
+    cache: ModelCache
+    _locks: int
+
+    def __init__(self, cache, model: Any, size: int):
+        self.size = size
+        self.model = model
+        self.cache = cache
+        self._locks = 0
+
+    def lock(self):
+        self._locks += 1
+
+    def unlock(self):
+        self._locks -= 1
+        assert self._locks >= 0
+
+    @property
+    def locked(self):
+        return self._locks > 0
+
+    @property
+    def loaded(self):
+        if self.model is not None and hasattr(self.model, "device"):
+            return self.model.device != self.cache.storage_device
+        else:
+            return False
+    
+
+class ModelCache(object):
+    def __init__(
+        self,
+        max_cache_size: float=DEFAULT_MAX_CACHE_SIZE,
+        execution_device: torch.device=torch.device('cuda'),
+        storage_device: torch.device=torch.device('cpu'),
+        precision: torch.dtype=torch.float16,
+        sequential_offload: bool=False,
+        lazy_offloading: bool=True,
+        sha_chunksize: int = 16777216,
+        logger: types.ModuleType = logger
+    ):
+        '''
+        :param max_models: Maximum number of models to cache in CPU RAM [4]
+        :param execution_device: Torch device to load active model into [torch.device('cuda')]
+        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
+        :param precision: Precision for loaded models [torch.float16]
+        :param lazy_offloading: Keep model in VRAM until another model needs to be loaded
+        :param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
+        :param sha_chunksize: Chunksize to use when calculating sha256 model hash
+        '''
+        #max_cache_size = 9999
+        execution_device = torch.device('cuda')
+
+        self.model_infos: Dict[str, ModelBase] = dict()
+        self.lazy_offloading = lazy_offloading
+        #self.sequential_offload: bool=sequential_offload
+        self.precision: torch.dtype=precision
+        self.max_cache_size: int=max_cache_size
+        self.execution_device: torch.device=execution_device
+        self.storage_device: torch.device=storage_device
+        self.sha_chunksize=sha_chunksize
+        self.logger = logger
+
+        self._cached_models = dict()
+        self._cache_stack = list()
+
+    def get_key(
+        self,
+        model_path: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+        submodel_type: Optional[SubModelType] = None,
+    ):
+
+        key = f"{model_path}:{base_model}:{model_type}"
+        if submodel_type:
+            key += f":{submodel_type}"
+        return key
+
+    #def get_model(
+    #    self,
+    #    repo_id_or_path: Union[str, Path],
+    #    model_type: ModelType = ModelType.Diffusers,
+    #    subfolder: Path = None,
+    #    submodel: ModelType = None,
+    #    revision: str = None,
+    #    attach_model_part: Tuple[ModelType, str] = (None, None),
+    #    gpu_load: bool = True,
+    #) -> ModelLocker:  # ?? what does it return
+    def _get_model_info(
+        self,
+        model_path: str,
+        model_class: Type[ModelBase],
+        base_model: BaseModelType,
+        model_type: ModelType,
+    ):
+        model_info_key = self.get_key(
+            model_path=model_path,
+            base_model=base_model,
+            model_type=model_type,
+            submodel_type=None,
+        )
+
+        if model_info_key not in self.model_infos:
+            self.model_infos[model_info_key] = model_class(
+                model_path,
+                base_model,
+                model_type,
+            )
+
+        return self.model_infos[model_info_key]
+
+    # TODO: args
+    def get_model(
+        self,
+        model_path: Union[str, Path],
+        model_class: Type[ModelBase],
+        base_model: BaseModelType,
+        model_type: ModelType,
+        submodel: Optional[SubModelType] = None,
+        gpu_load: bool = True,
+    ) -> Any:
+
+        if not isinstance(model_path, Path):
+            model_path = Path(model_path)
+
+        if not os.path.exists(model_path):
+            raise Exception(f"Model not found: {model_path}")
+
+        model_info = self._get_model_info(
+            model_path=model_path,
+            model_class=model_class,
+            base_model=base_model,
+            model_type=model_type,
+        )
+        key = self.get_key(
+            model_path=model_path,
+            base_model=base_model,
+            model_type=model_type,
+            submodel_type=submodel,
+        )
+
+        # TODO: lock for no copies on simultaneous calls?
+        cache_entry = self._cached_models.get(key, None)
+        if cache_entry is None:
+            self.logger.info(f'Loading model {model_path}, type {base_model}:{model_type}:{submodel}')
+
+            # this will remove older cached models until
+            # there is sufficient room to load the requested model
+            self._make_cache_room(model_info.get_size(submodel))
+
+            # clean memory to make MemoryUsage() more accurate
+            gc.collect()
+            model = model_info.get_model(child_type=submodel, torch_dtype=self.precision)
+            if mem_used := model_info.get_size(submodel):
+                self.logger.debug(f'CPU RAM used for load: {(mem_used/GIG):.2f} GB')
+
+            cache_entry = _CacheRecord(self, model, mem_used)
+            self._cached_models[key] = cache_entry
+
+        with suppress(Exception):
+            self._cache_stack.remove(key)
+        self._cache_stack.append(key)
+
+        return self.ModelLocker(self, key, cache_entry.model, gpu_load)
+
+    class ModelLocker(object):
+        def __init__(self, cache, key, model, gpu_load):
+            self.gpu_load = gpu_load
+            self.cache = cache
+            self.key = key
+            self.model = model
+            self.cache_entry = self.cache._cached_models[self.key]
+
+        def __enter__(self) -> Any:
+            if not hasattr(self.model, 'to'):
+                return self.model
+
+            # NOTE that the model has to have the to() method in order for this
+            # code to move it into GPU!
+            if self.gpu_load:
+                self.cache_entry.lock()
+
+                try:
+                    if self.cache.lazy_offloading:
+                       self.cache._offload_unlocked_models()
+                       
+                    if self.model.device != self.cache.execution_device:
+                        self.cache.logger.debug(f'Moving {self.key} into {self.cache.execution_device}')
+                        with VRAMUsage() as mem:
+                            self.model.to(self.cache.execution_device)  # move into GPU
+                        self.cache.logger.debug(f'GPU VRAM used for load: {(mem.vram_used/GIG):.2f} GB')
+                        
+                    self.cache.logger.debug(f'Locking {self.key} in {self.cache.execution_device}')                
+                    self.cache._print_cuda_stats()
+
+                except:
+                    self.cache_entry.unlock()
+                    raise
+
+            
+            # TODO: not fully understand
+            # in the event that the caller wants the model in RAM, we
+            # move it into CPU if it is in GPU and not locked
+            elif self.cache_entry.loaded and not self.cache_entry.locked:
+                self.model.to(self.cache.storage_device)
+
+            return self.model
+
+        def __exit__(self, type, value, traceback):
+            if not hasattr(self.model, 'to'):
+                return
+
+            self.cache_entry.unlock()
+            if not self.cache.lazy_offloading:
+                self.cache._offload_unlocked_models()
+                self.cache._print_cuda_stats()
+
+    # TODO: should it be called untrack_model?
+    def uncache_model(self, cache_id: str):
+        with suppress(ValueError):
+            self._cache_stack.remove(cache_id)
+        self._cached_models.pop(cache_id, None)
+
+    def model_hash(
+        self,
+        model_path: Union[str, Path],
+    ) -> str:
+        '''
+        Given the HF repo id or path to a model on disk, returns a unique
+        hash. Works for legacy checkpoint files, HF models on disk, and HF repo IDs
+        :param model_path: Path to model file/directory on disk.
+        '''
+        return self._local_model_hash(model_path)
+
+    def cache_size(self) -> float:
+        "Return the current size of the cache, in GB"
+        current_cache_size = sum([m.size for m in self._cached_models.values()])
+        return current_cache_size / GIG
+
+    def _has_cuda(self) -> bool:
+        return self.execution_device.type == 'cuda'
+
+    def _print_cuda_stats(self):
+        vram = "%4.2fG" % (torch.cuda.memory_allocated() / GIG)
+        ram = "%4.2fG" % self.cache_size()
+
+        cached_models = 0
+        loaded_models = 0
+        locked_models = 0
+        for model_info in self._cached_models.values():
+            cached_models += 1
+            if model_info.loaded:
+                loaded_models += 1
+            if model_info.locked:
+                locked_models += 1
+
+        self.logger.debug(f"Current VRAM/RAM usage: {vram}/{ram}; cached_models/loaded_models/locked_models/ = {cached_models}/{loaded_models}/{locked_models}")
+
+
+    def _make_cache_room(self, model_size):
+        # calculate how much memory this model will require
+        #multiplier = 2 if self.precision==torch.float32 else 1
+        bytes_needed = model_size
+        maximum_size = self.max_cache_size * GIG  # stored in GB, convert to bytes
+        current_size = sum([m.size for m in self._cached_models.values()])
+
+        if current_size + bytes_needed > maximum_size:
+            self.logger.debug(f'Max cache size exceeded: {(current_size/GIG):.2f}/{self.max_cache_size:.2f} GB, need an additional {(bytes_needed/GIG):.2f} GB')
+
+        self.logger.debug(f"Before unloading: cached_models={len(self._cached_models)}")
+
+        pos = 0
+        while current_size + bytes_needed > maximum_size and pos < len(self._cache_stack):
+            model_key = self._cache_stack[pos]
+            cache_entry = self._cached_models[model_key]
+
+            refs = sys.getrefcount(cache_entry.model)
+
+            device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None
+            self.logger.debug(f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}, refs: {refs}")
+
+            # 2 refs:
+            # 1 from cache_entry
+            # 1 from getrefcount function
+            if not cache_entry.locked and refs <= 2:
+                self.logger.debug(f'Unloading model {model_key} to free {(model_size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)')
+                current_size -= cache_entry.size
+                del self._cache_stack[pos]
+                del self._cached_models[model_key]
+                del cache_entry
+
+            else:
+                pos += 1
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+        self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
+
+
+    def _offload_unlocked_models(self):
+        for model_key, cache_entry in self._cached_models.items():
+            if not cache_entry.locked and cache_entry.loaded:
+                self.logger.debug(f'Offloading {model_key} from {self.execution_device} into {self.storage_device}')
+                cache_entry.model.to(self.storage_device)
+        
+    def _local_model_hash(self, model_path: Union[str, Path]) -> str:
+        sha = hashlib.sha256()
+        path = Path(model_path)
+        
+        hashpath = path / "checksum.sha256"
+        if hashpath.exists() and path.stat().st_mtime <= hashpath.stat().st_mtime:
+            with open(hashpath) as f:
+                hash = f.read()
+            return hash
+        
+        self.logger.debug(f'computing hash of model {path.name}')
+        for file in list(path.rglob("*.ckpt")) \
+            + list(path.rglob("*.safetensors")) \
+            + list(path.rglob("*.pth")):
+            with open(file, "rb") as f:
+                while chunk := f.read(self.sha_chunksize):
+                    sha.update(chunk)
+        hash = sha.hexdigest()
+        with open(hashpath, "w") as f:
+            f.write(hash)
+        return hash
+
+class VRAMUsage(object):
+    def __init__(self):
+        self.vram = None
+        self.vram_used = 0
+        
+    def __enter__(self):
+        self.vram = torch.cuda.memory_allocated()
+        return self
+
+    def __exit__(self, *args):
+        self.vram_used = torch.cuda.memory_allocated() - self.vram
--- a/invokeai/backend/model_management/model_install.py
+++ b/invokeai/backend/model_management/model_install.py
@ -0,0 +1,118 @@
+"""
+Routines for downloading and installing models.
+"""
+import json
+import safetensors
+import safetensors.torch
+import shutil
+import tempfile
+import torch
+import traceback
+from dataclasses import dataclass
+from diffusers import ModelMixin
+from enum import Enum
+from typing import Callable
+from pathlib import Path
+
+import invokeai.backend.util.logging as logger
+from invokeai.app.services.config import InvokeAIAppConfig
+from . import ModelManager
+from .models import BaseModelType, ModelType, VariantType
+from .model_probe import ModelProbe, ModelVariantInfo
+from .model_cache import SilenceWarnings
+
+class ModelInstall(object):
+    '''
+    This class is able to download and install several different kinds of 
+    InvokeAI models. The helper function, if provided, is called on to distinguish
+    between v2-base and v2-768 stable diffusion pipelines. This usually involves
+    asking the user to select the proper type, as there is no way of distinguishing
+    the two type of v2 file programmatically (as far as I know).
+    '''
+    def __init__(self,
+                 config: InvokeAIAppConfig,
+                 model_base_helper: Callable[[Path],BaseModelType]=None,
+                 clobber:bool = False
+                 ):
+        '''
+        :param config: InvokeAI configuration object
+        :param model_base_helper: A function call that accepts the Path to a checkpoint model and returns a ModelType enum
+        :param clobber: If true, models with colliding names will be overwritten
+        '''
+        self.config = config
+        self.clogger = clobber
+        self.helper = model_base_helper
+        self.prober = ModelProbe()
+
+    def install_checkpoint_file(self, checkpoint: Path)->dict:
+        '''
+        Install the checkpoint file at path and return a
+        configuration entry that can be added to `models.yaml`.
+        Model checkpoints and VAEs will be converted into 
+        diffusers before installation. Note that the model manager
+        does not hold entries for anything but diffusers pipelines,
+        and the configuration file stanzas returned from such models
+        can be safely ignored.
+        '''
+        model_info = self.prober.probe(checkpoint, self.helper)
+        if not model_info:
+            raise ValueError(f"Unable to determine type of checkpoint file {checkpoint}")
+
+        key = ModelManager.create_key(
+            model_name = checkpoint.stem,
+            base_model = model_info.base_type,
+            model_type = model_info.model_type,
+        )
+        destination_path = self._dest_path(model_info) / checkpoint
+        destination_path.parent.mkdir(parents=True, exist_ok=True)
+        self._check_for_collision(destination_path)
+        stanza = {
+            key: dict(
+                name = checkpoint.stem,
+                description = f'{model_info.model_type} model {checkpoint.stem}',
+                base = model_info.base_model.value,
+                type = model_info.model_type.value,
+                variant = model_info.variant_type.value,
+                path = str(destination_path),
+            )
+       }
+        
+        # non-pipeline; no conversion needed, just copy into right place
+        if model_info.model_type != ModelType.Pipeline:
+            shutil.copyfile(checkpoint, destination_path)
+            stanza[key].update({'format': 'checkpoint'})
+            
+        # pipeline - conversion needed here
+        else:
+            destination_path = self._dest_path(model_info) / checkpoint.stem
+            config_file = self._pipeline_type_to_config_file(model_info.model_type)
+
+            from .convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
+            with SilenceWarnings:
+                convert_ckpt_to_diffusers(
+                    checkpoint,
+                    destination_path,
+                    extract_ema=True,
+                    original_config_file=config_file,
+                    scan_needed=False,
+                )
+            stanza[key].update({'format': 'folder',
+                                'path': destination_path, # no suffix on this
+                                })
+            
+        return stanza
+
+
+    def _check_for_collision(self, path: Path):
+        if not path.exists():
+            return
+        if self.clobber:
+            shutil.rmtree(path)
+        else:
+            raise ValueError(f"Destination {path} already exists. Won't overwrite unless clobber=True.")
+
+    def _staging_directory(self)->tempfile.TemporaryDirectory:
+        return tempfile.TemporaryDirectory(dir=self.config.root_path)
+
+    
+        
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
--- a/invokeai/backend/model_management/model_probe.py
+++ b/invokeai/backend/model_management/model_probe.py
@ -0,0 +1,417 @@
+import json
+import traceback
+import torch
+import safetensors.torch
+
+from dataclasses import dataclass
+from enum import Enum
+
+from diffusers import ModelMixin, ConfigMixin, StableDiffusionPipeline, AutoencoderKL, ControlNetModel
+from pathlib import Path
+from typing import Callable, Literal, Union, Dict
+from picklescan.scanner import scan_file_path
+
+import invokeai.backend.util.logging as logger
+from .models import BaseModelType, ModelType, ModelVariantType, SchedulerPredictionType, SilenceWarnings
+
+@dataclass
+class ModelVariantInfo(object):
+    model_type: ModelType
+    base_type: BaseModelType
+    variant_type: ModelVariantType
+    prediction_type: SchedulerPredictionType
+    upcast_attention: bool
+    format: Literal['folder','checkpoint']
+    image_size: int
+
+class ProbeBase(object):
+    '''forward declaration'''
+    pass
+
+class ModelProbe(object):
+    
+    PROBES = {
+        'folder': { },
+        'checkpoint': { },
+    }
+
+    CLASS2TYPE = {
+        'StableDiffusionPipeline' : ModelType.Pipeline,
+        'AutoencoderKL' : ModelType.Vae,
+        'ControlNetModel' : ModelType.ControlNet,
+    }
+    
+    @classmethod
+    def register_probe(cls,
+                       format: Literal['folder','file'],
+                       model_type: ModelType,
+                       probe_class: ProbeBase):
+        cls.PROBES[format][model_type] = probe_class
+
+    @classmethod
+    def heuristic_probe(cls,
+                        model: Union[Dict, ModelMixin, Path],
+                        prediction_type_helper: Callable[[Path],BaseModelType]=None,
+                        )->ModelVariantInfo:
+        if isinstance(model,Path):
+            return cls.probe(model_path=model,prediction_type_helper=prediction_type_helper)
+        elif isinstance(model,(dict,ModelMixin,ConfigMixin)):
+            return cls.probe(model_path=None, model=model, prediction_type_helper=prediction_type_helper)
+        else:
+            raise Exception("model parameter {model} is neither a Path, nor a model")
+
+    @classmethod
+    def probe(cls,
+              model_path: Path,
+              model: Union[Dict, ModelMixin] = None,
+              prediction_type_helper: Callable[[Path],BaseModelType] = None)->ModelVariantInfo:
+        '''
+        Probe the model at model_path and return sufficient information about it
+        to place it somewhere in the models directory hierarchy. If the model is
+        already loaded into memory, you may provide it as model in order to avoid
+        opening it a second time. The prediction_type_helper callable is a function that receives
+        the path to the model and returns the BaseModelType. It is called to distinguish
+        between V2-Base and V2-768 SD models.
+        '''
+        if model_path:
+            format = 'folder' if model_path.is_dir() else 'checkpoint'
+        else:
+            format = 'folder' if isinstance(model,(ConfigMixin,ModelMixin)) else 'checkpoint'
+
+        model_info = None
+        try:
+            model_type = cls.get_model_type_from_folder(model_path, model) \
+                if format == 'folder' \
+                   else cls.get_model_type_from_checkpoint(model_path, model)
+            probe_class = cls.PROBES[format].get(model_type)
+            if not probe_class:
+                return None
+            probe = probe_class(model_path, model, prediction_type_helper)
+            base_type = probe.get_base_type()
+            variant_type = probe.get_variant_type()
+            prediction_type = probe.get_scheduler_prediction_type()
+            model_info = ModelVariantInfo(
+                model_type = model_type,
+                base_type = base_type,
+                variant_type = variant_type,
+                prediction_type = prediction_type,
+                upcast_attention = (base_type==BaseModelType.StableDiffusion2 \
+                                     and prediction_type==SchedulerPredictionType.VPrediction),
+                format = format,
+                image_size = 768 if (base_type==BaseModelType.StableDiffusion2 \
+                                     and prediction_type==SchedulerPredictionType.VPrediction \
+                                     ) else 512,
+            )
+        except Exception as e:
+            return None
+
+        return model_info
+
+    @classmethod
+    def get_model_type_from_checkpoint(cls, model_path: Path, checkpoint: dict)->ModelType:
+        if model_path.suffix not in ('.bin','.pt','.ckpt','.safetensors'):
+            return None
+        if model_path.name=='learned_embeds.bin':
+            return ModelType.TextualInversion
+        checkpoint = checkpoint or cls._scan_and_load_checkpoint(model_path)
+        state_dict = checkpoint.get("state_dict") or checkpoint
+        if any([x.startswith("model.diffusion_model") for x in state_dict.keys()]):
+            return ModelType.Pipeline
+        if any([x.startswith("encoder.conv_in") for x in state_dict.keys()]):
+            return ModelType.Vae
+        if "string_to_token" in state_dict or "emb_params" in state_dict:
+            return ModelType.TextualInversion
+        if any([x.startswith("lora") for x in state_dict.keys()]):
+            return ModelType.Lora
+        if any([x.startswith("control_model") for x in state_dict.keys()]):
+            return ModelType.ControlNet
+        if any([x.startswith("input_blocks") for x in state_dict.keys()]):
+            return ModelType.ControlNet
+        return None # give up
+
+    @classmethod
+    def get_model_type_from_folder(cls, folder_path: Path, model: ModelMixin)->ModelType:
+        '''
+        Get the model type of a hugging-face style folder.
+        '''
+        class_name = None
+        if model:
+            class_name = model.__class__.__name__
+        else:
+            if (folder_path / 'learned_embeds.bin').exists():
+                return ModelType.TextualInversion
+
+            if (folder_path / 'pytorch_lora_weights.bin').exists():
+                return ModelType.Lora
+
+            i  = folder_path / 'model_index.json'
+            c = folder_path / 'config.json'
+            config_path = i if i.exists() else c if c.exists() else None
+
+            if config_path:
+                with open(config_path,'r') as file:
+                    conf = json.load(file)
+                class_name = conf['_class_name']
+
+        if class_name and (type := cls.CLASS2TYPE.get(class_name)):
+            return type
+
+        # give up
+        raise ValueError("Unable to determine model type")
+
+    @classmethod
+    def _scan_and_load_checkpoint(cls,model_path: Path)->dict:
+        with SilenceWarnings():
+            if model_path.suffix.endswith((".ckpt", ".pt", ".bin")):
+                cls._scan_model(model_path, model_path)
+                return torch.load(model_path)
+            else:
+                return safetensors.torch.load_file(model_path)
+
+    @classmethod
+    def _scan_model(cls, model_name, checkpoint):
+            """
+            Apply picklescanner to the indicated checkpoint and issue a warning
+            and option to exit if an infected file is identified.
+            """
+            # scan model
+            scan_result = scan_file_path(checkpoint)
+            if scan_result.infected_files != 0:
+                raise "The model {model_name} is potentially infected by malware. Aborting import."
+
+###################################################3
+# Checkpoint probing
+###################################################3
+class ProbeBase(object):
+    def get_base_type(self)->BaseModelType:
+        pass
+
+    def get_variant_type(self)->ModelVariantType:
+        pass
+    
+    def get_scheduler_prediction_type(self)->SchedulerPredictionType:
+        pass
+
+class CheckpointProbeBase(ProbeBase):
+    def __init__(self,
+                 checkpoint_path: Path,
+                 checkpoint: dict,
+                 helper: Callable[[Path],BaseModelType] = None
+                 )->BaseModelType:
+        self.checkpoint = checkpoint or ModelProbe._scan_and_load_checkpoint(checkpoint_path)
+        self.checkpoint_path = checkpoint_path
+        self.helper = helper
+
+    def get_base_type(self)->BaseModelType:
+        pass
+
+    def get_variant_type(self)-> ModelVariantType:
+        model_type = ModelProbe.get_model_type_from_checkpoint(self.checkpoint_path,self.checkpoint)
+        if model_type != ModelType.Pipeline:
+            return ModelVariantType.Normal
+        state_dict = self.checkpoint.get('state_dict') or self.checkpoint
+        in_channels = state_dict[
+            "model.diffusion_model.input_blocks.0.0.weight"
+        ].shape[1]
+        if in_channels == 9:
+            return ModelVariantType.Inpaint
+        elif in_channels == 5:
+            return ModelVariantType.Depth
+        elif in_channels == 4:
+            return ModelVariantType.Normal
+        else:
+            raise Exception("Cannot determine variant type")
+
+class PipelineCheckpointProbe(CheckpointProbeBase):
+    def get_base_type(self)->BaseModelType:
+        checkpoint = self.checkpoint
+        state_dict = self.checkpoint.get('state_dict') or checkpoint
+        key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
+        if key_name in state_dict and state_dict[key_name].shape[-1] == 768:
+            return BaseModelType.StableDiffusion1
+        if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
+            return BaseModelType.StableDiffusion2
+        raise Exception("Cannot determine base type")
+
+    def get_scheduler_prediction_type(self)->SchedulerPredictionType:
+        type = self.get_base_type()
+        if type == BaseModelType.StableDiffusion1:
+            return SchedulerPredictionType.Epsilon
+        checkpoint = self.checkpoint
+        state_dict = self.checkpoint.get('state_dict') or checkpoint
+        key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
+        if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
+            if 'global_step' in checkpoint:
+                if checkpoint['global_step'] == 220000:
+                    return SchedulerPredictionType.Epsilon
+                elif checkpoint["global_step"] == 110000:
+                    return SchedulerPredictionType.VPrediction
+            if self.checkpoint_path and self.helper:
+                return self.helper(self.checkpoint_path)
+            else:
+                return None
+
+class VaeCheckpointProbe(CheckpointProbeBase):
+    def get_base_type(self)->BaseModelType:
+        # I can't find any standalone 2.X VAEs to test with!
+        return BaseModelType.StableDiffusion1
+
+class LoRACheckpointProbe(CheckpointProbeBase):
+    def get_base_type(self)->BaseModelType:
+        checkpoint = self.checkpoint
+        key1 = "lora_te_text_model_encoder_layers_0_mlp_fc1.lora_down.weight"
+        key2 = "lora_te_text_model_encoder_layers_0_self_attn_k_proj.hada_w1_a"
+        lora_token_vector_length = (
+            checkpoint[key1].shape[1]
+            if key1 in checkpoint
+            else checkpoint[key2].shape[0]
+            if key2 in checkpoint
+            else 768
+        )
+        if lora_token_vector_length == 768:
+            return BaseModelType.StableDiffusion1
+        elif lora_token_vector_length == 1024:
+            return BaseModelType.StableDiffusion2
+        else:
+            return None
+
+class TextualInversionCheckpointProbe(CheckpointProbeBase):
+    def get_base_type(self)->BaseModelType:
+        checkpoint = self.checkpoint
+        if 'string_to_token' in checkpoint:
+            token_dim = list(checkpoint['string_to_param'].values())[0].shape[-1]
+        elif 'emb_params' in checkpoint:
+            token_dim = checkpoint['emb_params'].shape[-1]
+        else:
+            token_dim = list(checkpoint.values())[0].shape[0]
+        if token_dim == 768:
+            return BaseModelType.StableDiffusion1
+        elif token_dim == 1024:
+            return BaseModelType.StableDiffusion2
+        else:
+            return None
+
+class ControlNetCheckpointProbe(CheckpointProbeBase):
+    def get_base_type(self)->BaseModelType:
+        checkpoint = self.checkpoint
+        for key_name in ('control_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight',
+                         'input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight'
+                         ):
+            if key_name not in checkpoint:
+                continue
+            if checkpoint[key_name].shape[-1] == 768:
+                return BaseModelType.StableDiffusion1
+            elif checkpoint[key_name].shape[-1] == 1024:
+                return BaseModelType.StableDiffusion2
+            elif self.checkpoint_path and self.helper:
+                return self.helper(self.checkpoint_path)
+        raise Exception("Unable to determine base type for {self.checkpoint_path}")
+
+########################################################
+# classes for probing folders
+#######################################################
+class FolderProbeBase(ProbeBase):
+    def __init__(self,
+                 folder_path: Path,
+                 model: ModelMixin = None,
+                 helper: Callable=None  # not used
+                 ):
+        self.model = model
+        self.folder_path = folder_path
+
+    def get_variant_type(self)->ModelVariantType:
+        return ModelVariantType.Normal
+
+class PipelineFolderProbe(FolderProbeBase):
+    def get_base_type(self)->BaseModelType:
+        if self.model:
+            unet_conf = self.model.unet.config
+            scheduler_conf = self.model.scheduler.config
+        else:
+            with open(self.folder_path / 'unet' / 'config.json','r') as file:
+                unet_conf = json.load(file)
+            with open(self.folder_path / 'scheduler' / 'scheduler_config.json','r') as file:
+                scheduler_conf = json.load(file)
+            
+        if unet_conf['cross_attention_dim'] == 768:
+            return BaseModelType.StableDiffusion1  
+        elif unet_conf['cross_attention_dim'] == 1024:
+            return BaseModelType.StableDiffusion2
+        else:
+            raise ValueError(f'Unknown base model for {self.folder_path}')
+
+    def get_scheduler_prediction_type(self)->SchedulerPredictionType:
+        if self.model:
+            scheduler_conf = self.model.scheduler.config
+        else:
+            with open(self.folder_path / 'scheduler' / 'scheduler_config.json','r') as file:
+                scheduler_conf = json.load(file)
+        if scheduler_conf['prediction_type'] == "v_prediction":
+            return SchedulerPredictionType.VPrediction
+        elif scheduler_conf['prediction_type'] == 'epsilon':
+            return SchedulerPredictionType.Epsilon
+        else:
+            return None
+        
+    def get_variant_type(self)->ModelVariantType:
+        # This only works for pipelines! Any kind of
+        # exception results in our returning the
+        # "normal" variant type
+        try:
+            if self.model:
+                conf = self.model.unet.config
+            else:
+                config_file = self.folder_path / 'unet' / 'config.json'
+                with open(config_file,'r') as file:
+                    conf = json.load(file)
+                
+            in_channels = conf['in_channels']
+            if in_channels == 9:
+                return ModelVariantType.Inpainting
+            elif in_channels == 5:
+                return ModelVariantType.Depth
+            elif in_channels == 4:
+                return ModelVariantType.Normal
+        except:
+            pass
+        return ModelVariantType.Normal
+
+class VaeFolderProbe(FolderProbeBase):
+    def get_base_type(self)->BaseModelType:
+        return BaseModelType.StableDiffusion1
+
+class TextualInversionFolderProbe(FolderProbeBase):
+    def get_base_type(self)->BaseModelType:
+        path = self.folder_path / 'learned_embeds.bin'
+        if not path.exists():
+            return None
+        checkpoint = ModelProbe._scan_and_load_checkpoint(path)
+        return TextualInversionCheckpointProbe(None,checkpoint=checkpoint).get_base_type()
+
+class ControlNetFolderProbe(FolderProbeBase):
+    def get_base_type(self)->BaseModelType:
+        config_file = self.folder_path / 'config.json'
+        if not config_file.exists():
+            raise Exception(f"Cannot determine base type for {self.folder_path}")
+        with open(config_file,'r') as file:
+            config = json.load(file)
+        # no obvious way to distinguish between sd2-base and sd2-768
+        return BaseModelType.StableDiffusion1 \
+            if config['cross_attention_dim']==768 \
+               else BaseModelType.StableDiffusion2
+
+class LoRAFolderProbe(FolderProbeBase):
+    # I've never seen one of these in the wild, so this is a noop
+    pass
+
+############## register probe classes ######
+ModelProbe.register_probe('folder', ModelType.Pipeline,  PipelineFolderProbe)
+ModelProbe.register_probe('folder', ModelType.Vae, VaeFolderProbe)
+ModelProbe.register_probe('folder', ModelType.Lora, LoRAFolderProbe)
+ModelProbe.register_probe('folder', ModelType.TextualInversion, TextualInversionFolderProbe)
+ModelProbe.register_probe('folder', ModelType.ControlNet, ControlNetFolderProbe)
+ModelProbe.register_probe('checkpoint', ModelType.Pipeline, PipelineCheckpointProbe)
+ModelProbe.register_probe('checkpoint', ModelType.Vae, VaeCheckpointProbe)
+ModelProbe.register_probe('checkpoint', ModelType.Lora, LoRACheckpointProbe)
+ModelProbe.register_probe('checkpoint', ModelType.TextualInversion, TextualInversionCheckpointProbe)
+ModelProbe.register_probe('checkpoint', ModelType.ControlNet, ControlNetCheckpointProbe)
--- a/invokeai/backend/model_management/models/init.py
+++ b/invokeai/backend/model_management/models/init.py
@ -0,0 +1,38 @@
+from .base import BaseModelType, ModelType, SubModelType, ModelBase, ModelConfigBase, ModelVariantType, SchedulerPredictionType, ModelError, SilenceWarnings
+from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model
+from .vae import VaeModel
+from .lora import LoRAModel
+from .controlnet import ControlNetModel # TODO:
+from .textual_inversion import TextualInversionModel
+
+MODEL_CLASSES = {
+    BaseModelType.StableDiffusion1: {
+        ModelType.Pipeline: StableDiffusion1Model,
+        ModelType.Vae: VaeModel,
+        ModelType.Lora: LoRAModel,
+        ModelType.ControlNet: ControlNetModel,
+        ModelType.TextualInversion: TextualInversionModel,
+    },
+    BaseModelType.StableDiffusion2: {
+        ModelType.Pipeline: StableDiffusion2Model,
+        ModelType.Vae: VaeModel,
+        ModelType.Lora: LoRAModel,
+        ModelType.ControlNet: ControlNetModel,
+        ModelType.TextualInversion: TextualInversionModel,
+    },
+    #BaseModelType.Kandinsky2_1: {
+    #    ModelType.Pipeline: Kandinsky2_1Model,
+    #    ModelType.MoVQ: MoVQModel,
+    #    ModelType.Lora: LoRAModel,
+    #    ModelType.ControlNet: ControlNetModel,
+    #    ModelType.TextualInversion: TextualInversionModel,
+    #},
+}
+
+def get_all_model_configs():
+    configs = set()
+    for models in MODEL_CLASSES.values():
+        for _, model in models.items():
+            configs.update(model._get_configs().values())
+    configs.discard(None)
+    return list(configs) # TODO: set, list or tuple
--- a/invokeai/backend/model_management/models/base.py
+++ b/invokeai/backend/model_management/models/base.py
@ -0,0 +1,418 @@
+import os
+import sys
+import typing
+import inspect
+from enum import Enum
+from abc import ABCMeta, abstractmethod
+import torch
+import safetensors.torch
+from diffusers import DiffusionPipeline, ConfigMixin
+
+from contextlib import suppress
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional, Type, Literal, TypeVar, Generic, Callable, Any, Union
+
+class BaseModelType(str, Enum):
+    StableDiffusion1 = "sd-1"
+    StableDiffusion2 = "sd-2"
+    #Kandinsky2_1 = "kandinsky-2.1"
+
+class ModelType(str, Enum):
+    Pipeline = "pipeline"
+    Vae = "vae"
+    Lora = "lora"
+    ControlNet = "controlnet" # used by model_probe
+    TextualInversion = "embedding"
+
+class SubModelType(str, Enum):
+    UNet = "unet"
+    TextEncoder = "text_encoder"
+    Tokenizer = "tokenizer"
+    Vae = "vae"
+    Scheduler = "scheduler"
+    SafetyChecker = "safety_checker"
+    #MoVQ = "movq"
+
+class ModelVariantType(str, Enum):
+    Normal = "normal"
+    Inpaint = "inpaint"
+    Depth = "depth"
+
+class SchedulerPredictionType(str, Enum):
+    Epsilon = "epsilon"
+    VPrediction = "v_prediction"
+    Sample = "sample"
+    
+class ModelError(str, Enum):
+    NotFound = "not_found"
+
+class ModelConfigBase(BaseModel):
+    path: str # or Path
+    #name: str # not included as present in model key
+    description: Optional[str] = Field(None)
+    format: Optional[str] = Field(None)
+    default: Optional[bool] = Field(False)
+    # do not save to config
+    error: Optional[ModelError] = Field(None, exclude=True)
+
+    class Config:
+        use_enum_values = True
+
+
+class EmptyConfigLoader(ConfigMixin):
+    @classmethod
+    def load_config(cls, *args, **kwargs):
+        cls.config_name = kwargs.pop("config_name")
+        return super().load_config(*args, **kwargs)
+
+T_co = TypeVar('T_co', covariant=True)
+class classproperty(Generic[T_co]):
+    def __init__(self, fget: Callable[[Any], T_co]) -> None:
+        self.fget = fget
+
+    def __get__(self, instance: Optional[Any], owner: Type[Any]) -> T_co:
+        return self.fget(owner)
+
+    def __set__(self, instance: Optional[Any], value: Any) -> None:
+        raise AttributeError('cannot set attribute')
+
+class ModelBase(metaclass=ABCMeta):
+    #model_path: str
+    #base_model: BaseModelType
+    #model_type: ModelType
+
+    def __init__(
+        self,
+        model_path: str,
+        base_model: BaseModelType,
+        model_type: ModelType,
+    ):
+        self.model_path = model_path
+        self.base_model = base_model
+        self.model_type = model_type
+
+    def _hf_definition_to_type(self, subtypes: List[str]) -> Type:
+        if len(subtypes) < 2:
+            raise Exception("Invalid subfolder definition!")
+        if subtypes[0] in ["diffusers", "transformers"]:
+            res_type = sys.modules[subtypes[0]]
+            subtypes = subtypes[1:]
+
+        else:
+            res_type = sys.modules["diffusers"]
+            res_type = getattr(res_type, "pipelines")
+
+
+        for subtype in subtypes:
+            res_type = getattr(res_type, subtype)
+        return res_type
+
+    @classmethod
+    def _get_configs(cls):
+        with suppress(Exception):
+            return cls.__configs
+        
+        configs = dict()
+        for name in dir(cls):
+            if name.startswith("__"):
+                continue
+
+            value = getattr(cls, name)
+            if not isinstance(value, type) or not issubclass(value, ModelConfigBase):
+                continue
+
+            fields = inspect.get_annotations(value)
+            if "format" not in fields:
+                raise Exception("Invalid config definition - format field not found")
+
+            format_type = typing.get_origin(fields["format"])
+            if format_type not in {None, Literal, Union}:
+                raise Exception(f"Invalid config definition - unknown format type: {fields['format']}")
+
+            if format_type is Union and not all(typing.get_origin(v) in {None, Literal} for v in fields["format"].__args__):
+                raise Exception(f"Invalid config definition - unknown format type: {fields['format']}")
+
+            if format_type == Union:
+                f_fields = fields["format"].__args__
+            else:
+                f_fields = (fields["format"],)
+                    
+
+            for field in f_fields:
+                if field is None:
+                    format_name = None
+                else:
+                    format_name = field.__args__[0]
+
+                configs[format_name] = value # TODO: error when override(multiple)?
+
+
+        cls.__configs = configs
+        return cls.__configs
+
+    @classmethod
+    def create_config(cls, **kwargs) -> ModelConfigBase:
+        if "format" not in kwargs:
+            raise Exception("Field 'format' not found in model config")
+
+        configs = cls._get_configs()
+        return configs[kwargs["format"]](**kwargs)
+
+    @classmethod
+    def probe_config(cls, path: str, **kwargs) -> ModelConfigBase:
+        return cls.create_config(
+            path=path,
+            format=cls.detect_format(path),
+        )
+
+    @classmethod
+    @abstractmethod
+    def detect_format(cls, path: str) -> str:
+        raise NotImplementedError()
+
+    @classproperty
+    @abstractmethod
+    def save_to_config(cls) -> bool:
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_size(self, child_type: Optional[SubModelType] = None) -> int:
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_model(
+        self,
+        torch_dtype: Optional[torch.dtype],
+        child_type: Optional[SubModelType] = None,
+    ) -> Any:
+        raise NotImplementedError()
+
+
+class DiffusersModel(ModelBase):
+    #child_types: Dict[str, Type]
+    #child_sizes: Dict[str, int]
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        super().__init__(model_path, base_model, model_type)
+
+        self.child_types: Dict[str, Type] = dict()
+        self.child_sizes: Dict[str, int] = dict()
+
+        try:
+            config_data = DiffusionPipeline.load_config(self.model_path)
+            #config_data = json.loads(os.path.join(self.model_path, "model_index.json"))
+        except:
+            raise Exception("Invalid diffusers model! (model_index.json not found or invalid)")
+
+        config_data.pop("_ignore_files", None)
+
+        # retrieve all folder_names that contain relevant files
+        child_components = [k for k, v in config_data.items() if isinstance(v, list)]
+
+        for child_name in child_components:
+            child_type = self._hf_definition_to_type(config_data[child_name])
+            self.child_types[child_name] = child_type
+            self.child_sizes[child_name] = calc_model_size_by_fs(self.model_path, subfolder=child_name)
+
+
+    def get_size(self, child_type: Optional[SubModelType] = None):
+        if child_type is None:
+            return sum(self.child_sizes.values())
+        else:
+            return self.child_sizes[child_type]
+
+
+    def get_model(
+        self,
+        torch_dtype: Optional[torch.dtype],
+        child_type: Optional[SubModelType] = None,
+    ):
+        # return pipeline in different function to pass more arguments
+        if child_type is None:
+            raise Exception("Child model type can't be null on diffusers model")
+        if child_type not in self.child_types:
+            return None # TODO: or raise
+
+        if torch_dtype == torch.float16:
+            variants = ["fp16", None]
+        else:
+            variants = [None, "fp16"]
+
+        # TODO: better error handling(differentiate not found from others)
+        for variant in variants:
+            try:
+                # TODO: set cache_dir to /dev/null to be sure that cache not used?
+                model = self.child_types[child_type].from_pretrained(
+                    self.model_path,
+                    subfolder=child_type.value,
+                    torch_dtype=torch_dtype,
+                    variant=variant,
+                    local_files_only=True,
+                )
+                break
+            except Exception as e:
+                #print("====ERR LOAD====")
+                #print(f"{variant}: {e}")
+                pass
+        else:
+            raise Exception(f"Failed to load {self.base_model}:{self.model_type}:{child_type} model")
+
+        # calc more accurate size
+        self.child_sizes[child_type] = calc_model_size_by_data(model)
+        return model
+
+    #def convert_if_required(model_path: str, cache_path: str, config: Optional[dict]) -> str:
+
+
+
+def calc_model_size_by_fs(
+    model_path: str,
+    subfolder: Optional[str] = None,
+    variant: Optional[str] = None
+):
+    if subfolder is not None:
+        model_path = os.path.join(model_path, subfolder)
+
+    # this can happen when, for example, the safety checker
+    # is not downloaded.
+    if not os.path.exists(model_path):
+        return 0
+
+    all_files = os.listdir(model_path)
+    all_files = [f for f in all_files if os.path.isfile(os.path.join(model_path, f))]
+
+    fp16_files = set([f for f in all_files if ".fp16." in f or ".fp16-" in f])
+    bit8_files = set([f for f in all_files if ".8bit." in f or ".8bit-" in f])
+    other_files = set(all_files) - fp16_files - bit8_files
+
+    if variant is None:
+        files = other_files
+    elif variant == "fp16":
+        files = fp16_files
+    elif variant == "8bit":
+        files = bit8_files
+    else:
+        raise NotImplementedError(f"Unknown variant: {variant}")
+
+    # try read from index if exists
+    index_postfix = ".index.json"
+    if variant is not None:
+        index_postfix = f".index.{variant}.json"
+
+    for file in files:
+        if not file.endswith(index_postfix):
+            continue
+        try:
+            with open(os.path.join(model_path, file), "r") as f:
+                index_data = json.loads(f.read())
+            return int(index_data["metadata"]["total_size"])
+        except:
+            pass
+
+    # calculate files size if there is no index file
+    formats = [
+        (".safetensors",), # safetensors
+        (".bin",), # torch
+        (".onnx", ".pb"), # onnx
+        (".msgpack",), # flax
+        (".ckpt",), # tf
+        (".h5",), # tf2
+    ]
+
+    for file_format in formats:
+        model_files = [f for f in files if f.endswith(file_format)]
+        if len(model_files) == 0:
+            continue
+
+        model_size = 0
+        for model_file in model_files:
+            file_stats = os.stat(os.path.join(model_path, model_file))
+            model_size += file_stats.st_size
+        return model_size
+    
+    #raise NotImplementedError(f"Unknown model structure! Files: {all_files}")
+    return 0 # scheduler/feature_extractor/tokenizer - models without loading to gpu
+
+
+def calc_model_size_by_data(model) -> int:
+    if isinstance(model, DiffusionPipeline):
+        return _calc_pipeline_by_data(model)
+    elif isinstance(model, torch.nn.Module):
+        return _calc_model_by_data(model)
+    else:
+        return 0
+
+
+def _calc_pipeline_by_data(pipeline) -> int:
+    res = 0
+    for submodel_key in pipeline.components.keys():
+        submodel = getattr(pipeline, submodel_key)
+        if submodel is not None and isinstance(submodel, torch.nn.Module):
+            res += _calc_model_by_data(submodel)
+    return res
+    
+
+def _calc_model_by_data(model) -> int:
+    mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
+    mem_bufs = sum([buf.nelement()*buf.element_size() for buf in model.buffers()])
+    mem = mem_params + mem_bufs # in bytes
+    return mem
+
+
+def _fast_safetensors_reader(path: str):
+    checkpoint = dict()
+    device = torch.device("meta")
+    with open(path, "rb") as f:
+        definition_len = int.from_bytes(f.read(8), 'little')
+        definition_json = f.read(definition_len)
+        definition = json.loads(definition_json)
+
+        if "__metadata__" in definition and definition["__metadata__"].get("format", "pt") not in {"pt", "torch", "pytorch"}:
+            raise Exception("Supported only pytorch safetensors files")
+        definition.pop("__metadata__", None)
+
+        for key, info in definition.items():
+            dtype = {
+                "I8": torch.int8,
+                "I16": torch.int16,
+                "I32": torch.int32,
+                "I64": torch.int64,
+                "F16": torch.float16,
+                "F32": torch.float32,
+                "F64": torch.float64,
+            }[info["dtype"]]
+
+            checkpoint[key] = torch.empty(info["shape"], dtype=dtype, device=device)
+
+    return checkpoint
+
+
+def read_checkpoint_meta(path: str):
+    if path.endswith(".safetensors"):
+        try:
+            checkpoint = _fast_safetensors_reader(path)
+        except:
+            # TODO: create issue for support "meta"?
+            checkpoint = safetensors.torch.load_file(path, device="cpu")
+    else:
+        checkpoint = torch.load(path, map_location=torch.device("meta"))
+    return checkpoint
+
+import warnings
+from diffusers import logging as diffusers_logging
+from transformers import logging as transformers_logging
+
+class SilenceWarnings(object):
+    def __init__(self):
+        self.transformers_verbosity = transformers_logging.get_verbosity()
+        self.diffusers_verbosity = diffusers_logging.get_verbosity()
+        
+    def __enter__(self):
+        transformers_logging.set_verbosity_error()
+        diffusers_logging.set_verbosity_error()
+        warnings.simplefilter('ignore')
+
+    def __exit__(self, type, value, traceback):
+        transformers_logging.set_verbosity(self.transformers_verbosity)
+        diffusers_logging.set_verbosity(self.diffusers_verbosity)
+        warnings.simplefilter('default')
--- a/invokeai/backend/model_management/models/controlnet.py
+++ b/invokeai/backend/model_management/models/controlnet.py
@ -0,0 +1,87 @@
+import os
+import torch
+from pathlib import Path
+from typing import Optional, Union, Literal
+from .base import (
+    ModelBase,
+    ModelConfigBase,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    EmptyConfigLoader,
+    calc_model_size_by_fs,
+    calc_model_size_by_data,
+    classproperty,
+)
+
+class ControlNetModel(ModelBase):
+    #model_class: Type
+    #model_size: int
+
+    class Config(ModelConfigBase):
+        format: Union[Literal["checkpoint"], Literal["diffusers"]]
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert model_type == ModelType.ControlNet
+        super().__init__(model_path, base_model, model_type)
+
+        try:
+            config = EmptyConfigLoader.load_config(self.model_path, config_name="config.json")
+            #config = json.loads(os.path.join(self.model_path, "config.json"))
+        except:
+            raise Exception("Invalid controlnet model! (config.json not found or invalid)")
+
+        model_class_name = config.get("_class_name", None)
+        if model_class_name not in {"ControlNetModel"}:
+            raise Exception(f"Invalid ControlNet model! Unknown _class_name: {model_class_name}")
+
+        try:
+            self.model_class = self._hf_definition_to_type(["diffusers", model_class_name])
+            self.model_size = calc_model_size_by_fs(self.model_path)
+        except:
+            raise Exception("Invalid ControlNet model!")
+
+    def get_size(self, child_type: Optional[SubModelType] = None):
+        if child_type is not None:
+            raise Exception("There is no child models in controlnet model")
+        return self.model_size
+
+    def get_model(
+        self,
+        torch_dtype: Optional[torch.dtype],
+        child_type: Optional[SubModelType] = None,
+    ):
+        if child_type is not None:
+            raise Exception("There is no child models in controlnet model")
+
+        model = self.model_class.from_pretrained(
+            self.model_path,
+            torch_dtype=torch_dtype,
+        )
+        # calc more accurate size
+        self.model_size = calc_model_size_by_data(model)
+        return model
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return False
+
+    @classmethod
+    def detect_format(cls, path: str):
+        if os.path.isdir(path):
+            return "diffusers"
+        else:
+            return "checkpoint"
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase, # empty config or config of parent model
+        base_model: BaseModelType,
+    ) -> str:
+        if cls.detect_format(model_path) != "diffusers":
+            raise NotImlemetedError("Checkpoint controlnet models currently unsupported")
+        else:
+            return model_path
--- a/invokeai/backend/model_management/models/lora.py
+++ b/invokeai/backend/model_management/models/lora.py
@ -0,0 +1,70 @@
+import torch
+from typing import Optional, Union, Literal
+from .base import (
+    ModelBase,
+    ModelConfigBase,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    classproperty,
+)
+# TODO: naming
+from ..lora import LoRAModel as LoRAModelRaw
+
+class LoRAModel(ModelBase):
+    #model_size: int
+
+    class Config(ModelConfigBase):
+        format: Union[Literal["lycoris"], Literal["diffusers"]]
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert model_type == ModelType.Lora
+        super().__init__(model_path, base_model, model_type)
+
+        self.model_size = os.path.getsize(self.model_path)
+
+    def get_size(self, child_type: Optional[SubModelType] = None):
+        if child_type is not None:
+            raise Exception("There is no child models in lora")
+        return self.model_size
+
+    def get_model(
+        self,
+        torch_dtype: Optional[torch.dtype],
+        child_type: Optional[SubModelType] = None,
+    ):
+        if child_type is not None:
+            raise Exception("There is no child models in lora")
+
+        model = LoRAModelRaw.from_checkpoint(
+            file_path=self.model_path,
+            dtype=torch_dtype,
+        )
+
+        self.model_size = model.calc_size()
+        return model
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return False
+
+    @classmethod
+    def detect_format(cls, path: str):
+        if os.path.isdir(path):
+            return "diffusers"
+        else:
+            return "lycoris"
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        if cls.detect_format(model_path) == "diffusers":
+            # TODO: add diffusers lora when it stabilizes a bit
+            raise NotImplementedError("Diffusers lora not supported")
+        else:
+            return model_path
--- a/invokeai/backend/model_management/models/stable_diffusion.py
+++ b/invokeai/backend/model_management/models/stable_diffusion.py
@ -0,0 +1,312 @@
+import os
+import json
+from pydantic import Field
+from pathlib import Path
+from typing import Literal, Optional, Union
+from .base import (
+    ModelBase,
+    ModelConfigBase,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    ModelVariantType,
+    DiffusersModel,
+    SchedulerPredictionType,
+    SilenceWarnings,
+    read_checkpoint_meta,
+    classproperty,
+)
+from invokeai.app.services.config import InvokeAIAppConfig
+from omegaconf import OmegaConf
+
+
+class StableDiffusion1Model(DiffusersModel):
+
+    class DiffusersConfig(ModelConfigBase):
+        format: Literal["diffusers"]
+        vae: Optional[str] = Field(None)
+        variant: ModelVariantType
+
+    class CheckpointConfig(ModelConfigBase):
+        format: Literal["checkpoint"]
+        vae: Optional[str] = Field(None)
+        config: Optional[str] = Field(None)
+        variant: ModelVariantType
+
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert base_model == BaseModelType.StableDiffusion1
+        assert model_type == ModelType.Pipeline
+        super().__init__(
+            model_path=model_path,
+            base_model=BaseModelType.StableDiffusion1,
+            model_type=ModelType.Pipeline,
+        )
+
+    @classmethod
+    def probe_config(cls, path: str, **kwargs):
+        model_format = cls.detect_format(path)
+        ckpt_config_path = kwargs.get("config", None)
+        if model_format == "checkpoint":
+            if ckpt_config_path:
+                ckpt_config = OmegaConf.load(ckpt_config_path)
+                ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
+
+            else:
+                checkpoint = read_checkpoint_meta(path)
+                checkpoint = checkpoint.get('state_dict', checkpoint)
+                in_channels = checkpoint["model.diffusion_model.input_blocks.0.0.weight"].shape[1]
+
+        elif model_format == "diffusers":
+            unet_config_path = os.path.join(path, "unet", "config.json")
+            if os.path.exists(unet_config_path):
+                with open(unet_config_path, "r") as f:
+                    unet_config = json.loads(f.read())
+                in_channels = unet_config['in_channels']
+
+            else:
+                raise Exception("Not supported stable diffusion diffusers format(possibly onnx?)")
+
+        else:
+            raise NotImplementedError(f"Unknown stable diffusion 1.* format: {model_format}")
+
+        if in_channels == 9:
+            variant = ModelVariantType.Inpaint
+        elif in_channels == 4:
+            variant = ModelVariantType.Normal
+        else:
+            raise Exception("Unkown stable diffusion 1.* model format")
+
+
+        return cls.create_config(
+            path=path,
+            format=model_format,
+
+            config=ckpt_config_path,
+            variant=variant,
+        )
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return True
+
+    @classmethod
+    def detect_format(cls, model_path: str):
+        if os.path.isdir(model_path):
+            return "diffusers"
+        else:
+            return "checkpoint"
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        assert model_path == config.path
+
+        if isinstance(config, cls.CheckpointConfig):
+            return _convert_ckpt_and_cache(
+                version=BaseModelType.StableDiffusion1,
+                model_config=config,
+                output_path=output_path,
+            ) # TODO: args
+        else:
+            return model_path
+
+
+class StableDiffusion2Model(DiffusersModel):
+
+    # TODO: check that configs overwriten properly
+    class DiffusersConfig(ModelConfigBase):
+        format: Literal["diffusers"]
+        vae: Optional[str] = Field(None)
+        prediction_type: SchedulerPredictionType
+        upcast_attention: bool
+
+    class CheckpointConfig(ModelConfigBase):
+        format: Literal["checkpoint"]
+        vae: Optional[str] = Field(None)
+        config: Optional[str] = Field(None)
+        prediction_type: SchedulerPredictionType
+        upcast_attention: bool
+
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert base_model == BaseModelType.StableDiffusion2
+        assert model_type == ModelType.Pipeline
+        super().__init__(
+            model_path=model_path,
+            base_model=BaseModelType.StableDiffusion2,
+            model_type=ModelType.Pipeline,
+        )
+
+    @classmethod
+    def probe_config(cls, path: str, **kwargs):
+        model_format = cls.detect_format(path)
+        ckpt_config_path = kwargs.get("config", None)
+        if model_format == "checkpoint":
+            if ckpt_config_path:
+                ckpt_config = OmegaConf.load(ckpt_config_path)
+                ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
+
+            else:
+                checkpoint = read_checkpoint_meta(path)
+                checkpoint = checkpoint.get('state_dict', checkpoint)
+                in_channels = checkpoint["model.diffusion_model.input_blocks.0.0.weight"].shape[1]
+
+        elif model_format == "diffusers":
+            unet_config_path = os.path.join(path, "unet", "config.json")
+            if os.path.exists(unet_config_path):
+                with open(unet_config_path, "r") as f:
+                    unet_config = json.loads(f.read())
+                in_channels = unet_config['in_channels']
+
+            else:
+                raise Exception("Not supported stable diffusion diffusers format(possibly onnx?)")
+
+        else:
+            raise NotImplementedError(f"Unknown stable diffusion 2.* format: {model_format}")
+
+        if in_channels == 9:
+            variant = ModelVariantType.Inpaint
+        elif in_channels == 5:
+            variant = ModelVariantType.Depth
+        elif in_channels == 4:
+            variant = ModelVariantType.Normal
+        else:
+            raise Exception("Unkown stable diffusion 2.* model format")
+
+        if variant == ModelVariantType.Normal:
+            prediction_type = SchedulerPredictionType.VPrediction
+            upcast_attention = True
+
+        else:
+            prediction_type = SchedulerPredictionType.Epsilon
+            upcast_attention = False
+
+        return cls.create_config(
+            path=path,
+            format=model_format,
+
+            config=ckpt_config_path,
+            variant=variant,
+            prediction_type=prediction_type,
+            upcast_attention=upcast_attention,
+        )
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return True
+
+    @classmethod
+    def detect_format(cls, model_path: str):
+        if os.path.isdir(model_path):
+            return "diffusers"
+        else:
+            return "checkpoint"
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        assert model_path == config.path
+
+        if isinstance(config, cls.CheckpointConfig):
+            return _convert_ckpt_and_cache(
+                version=BaseModelType.StableDiffusion2,
+                model_config=config,
+                output_path=output_path,
+            ) # TODO: args
+        else:
+            return model_path
+
+def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
+    ckpt_configs = {
+        BaseModelType.StableDiffusion1: {
+            ModelVariantType.Normal: "v1-inference.yaml",
+            ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
+        },
+        BaseModelType.StableDiffusion2: {
+            # code further will manually set upcast_attention and v_prediction
+            ModelVariantType.Normal: "v2-inference.yaml",
+            ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
+            ModelVariantType.Depth: "v2-midas-inference.yaml",
+        }
+    }
+
+    try:
+        # TODO: path
+        #model_config.config = app_config.config_dir / "stable-diffusion" / ckpt_configs[version][model_config.variant]
+        #return InvokeAIAppConfig.get_config().legacy_conf_dir / ckpt_configs[version][variant]
+        return InvokeAIAppConfig.get_config().root_dir / "configs" / "stable-diffusion" / ckpt_configs[version][variant]
+            
+    except:
+        return None
+
+
+# TODO: rework
+def _convert_ckpt_and_cache(
+    version: BaseModelType,
+    model_config: Union[StableDiffusion1Model.CheckpointConfig, StableDiffusion2Model.CheckpointConfig],
+    output_path: str,
+) -> str:
+    """
+    Convert the checkpoint model indicated in mconfig into a
+    diffusers, cache it to disk, and return Path to converted
+    file. If already on disk then just returns Path.
+    """
+    app_config = InvokeAIAppConfig.get_config()
+
+    if model_config.config is None:
+        model_config.config = _select_ckpt_config(version, model_config.variant)
+        if model_config.config is None:
+            raise Exception(f"Model variant {model_config.variant} not supported for {version}")
+
+
+    weights = app_config.root_dir / model_config.path
+    config_file = app_config.root_dir / model_config.config
+    output_path = Path(output_path)
+
+    if version == BaseModelType.StableDiffusion1:
+        upcast_attention = False
+        prediction_type = SchedulerPredictionType.Epsilon
+
+    elif version == BaseModelType.StableDiffusion2:
+        upcast_attention = config.upcast_attention
+        prediction_type = config.prediction_type
+
+    else:
+        raise Exception(f"Unknown model provided: {version}")
+
+
+    # return cached version if it exists
+    if output_path.exists():
+        return output_path
+
+    # TODO: I think that it more correctly to convert with embedded vae
+    #       as if user will delete custom vae he will got not embedded but also custom vae
+    #vae_ckpt_path, vae_model = self._get_vae_for_conversion(weights, mconfig)
+
+    # to avoid circular import errors
+    from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
+    with SilenceWarnings():        
+        convert_ckpt_to_diffusers(
+            weights,
+            output_path,
+            model_version=version,
+            model_variant=model_config.variant,
+            original_config_file=config_file,
+            extract_ema=True,
+            upcast_attention=upcast_attention,
+            prediction_type=prediction_type,
+            scan_needed=True,
+            model_root=app_config.models_path,
+        )
+    return output_path
--- a/invokeai/backend/model_management/models/textual_inversion.py
+++ b/invokeai/backend/model_management/models/textual_inversion.py
@ -0,0 +1,63 @@
+import torch
+from typing import Optional
+from .base import (
+    ModelBase,
+    ModelConfigBase,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    classproperty,
+)
+# TODO: naming
+from ..lora import TextualInversionModel as TextualInversionModelRaw
+
+class TextualInversionModel(ModelBase):
+    #model_size: int
+
+    class Config(ModelConfigBase):
+        format: None
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert model_type == ModelType.TextualInversion
+        super().__init__(model_path, base_model, model_type)
+
+        self.model_size = os.path.getsize(self.model_path)
+
+    def get_size(self, child_type: Optional[SubModelType] = None):
+        if child_type is not None:
+            raise Exception("There is no child models in textual inversion")
+        return self.model_size
+
+    def get_model(
+        self,
+        torch_dtype: Optional[torch.dtype],
+        child_type: Optional[SubModelType] = None,
+    ):
+        if child_type is not None:
+            raise Exception("There is no child models in textual inversion")
+
+        model = TextualInversionModelRaw.from_checkpoint(
+            file_path=self.model_path,
+            dtype=torch_dtype,
+        )
+
+        self.model_size = model.embedding.nelement() * model.embedding.element_size()
+        return model
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return False
+
+    @classmethod
+    def detect_format(cls, path: str):
+        return None
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        return model_path
--- a/invokeai/backend/model_management/models/vae.py
+++ b/invokeai/backend/model_management/models/vae.py
@ -0,0 +1,161 @@
+import os
+import torch
+from pathlib import Path
+from typing import Optional, Union, Literal
+from .base import (
+    ModelBase,
+    ModelConfigBase,
+    BaseModelType,
+    ModelType,
+    SubModelType,
+    ModelVariantType,
+    EmptyConfigLoader,
+    calc_model_size_by_fs,
+    calc_model_size_by_data,
+    classproperty,
+)
+from invokeai.app.services.config import InvokeAIAppConfig
+from diffusers.utils import is_safetensors_available
+from omegaconf import OmegaConf
+
+class VaeModel(ModelBase):
+    #vae_class: Type
+    #model_size: int
+
+    class Config(ModelConfigBase):
+        format: Union[Literal["checkpoint"], Literal["diffusers"]]
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert model_type == ModelType.Vae
+        super().__init__(model_path, base_model, model_type)
+
+        try:
+            config = EmptyConfigLoader.load_config(self.model_path, config_name="config.json")
+            #config = json.loads(os.path.join(self.model_path, "config.json"))
+        except:
+            raise Exception("Invalid vae model! (config.json not found or invalid)")
+
+        try:
+            vae_class_name = config.get("_class_name", "AutoencoderKL")
+            self.vae_class = self._hf_definition_to_type(["diffusers", vae_class_name])
+            self.model_size = calc_model_size_by_fs(self.model_path)
+        except:
+            raise Exception("Invalid vae model! (Unkown vae type)")
+
+    def get_size(self, child_type: Optional[SubModelType] = None):
+        if child_type is not None:
+            raise Exception("There is no child models in vae model")
+        return self.model_size
+
+    def get_model(
+        self,
+        torch_dtype: Optional[torch.dtype],
+        child_type: Optional[SubModelType] = None,
+    ):
+        if child_type is not None:
+            raise Exception("There is no child models in vae model")
+
+        model = self.vae_class.from_pretrained(
+            self.model_path,
+            torch_dtype=torch_dtype,
+        )
+        # calc more accurate size
+        self.model_size = calc_model_size_by_data(model)
+        return model
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return False
+
+    @classmethod
+    def detect_format(cls, path: str):
+        if os.path.isdir(path):
+            return "diffusers"
+        else:
+            return "checkpoint"
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase, # empty config or config of parent model
+        base_model: BaseModelType,
+    ) -> str:
+        if cls.detect_format(model_path) != "diffusers":
+            return _convert_vae_ckpt_and_cache(
+                weights_path=model_path,
+                output_path=output_path,
+                base_model=base_model,
+                model_config=config,
+            )
+        else:
+            return model_path
+
+# TODO: rework
+def _convert_vae_ckpt_and_cache(
+    weights_path: str,
+    output_path: str,
+    base_model: BaseModelType,
+    model_config: ModelConfigBase,
+) -> str:
+    """
+    Convert the VAE indicated in mconfig into a diffusers AutoencoderKL
+    object, cache it to disk, and return Path to converted
+    file. If already on disk then just returns Path.
+    """
+    app_config = InvokeAIAppConfig.get_config()
+    weights_path = app_config.root_dir / weights_path
+    output_path = Path(output_path)
+
+    """
+    this size used only in when tiling enabled to separate input in tiles
+    sizes in configs from stable diffusion githubs(1 and 2) set to 256
+    on huggingface it:
+    1.5 - 512
+    1.5-inpainting - 256
+    2-inpainting - 512
+    2-depth - 256
+    2-base - 512
+    2 - 768
+    2.1-base - 768
+    2.1 - 768
+    """
+    image_size = 512
+        
+    # return cached version if it exists
+    if output_path.exists():
+        return output_path
+
+    if base_model in {BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2}:
+        from .stable_diffusion import _select_ckpt_config
+        # all sd models use same vae settings
+        config_file = _select_ckpt_config(base_model, ModelVariantType.Normal)
+
+    else:
+        raise Exception(f"Vae conversion not supported for model type: {base_model}")
+
+    # this avoids circular import error
+    from ..convert_ckpt_to_diffusers import convert_ldm_vae_to_diffusers
+    if weights_path.suffix == '.safetensors':
+        checkpoint = safetensors.torch.load_file(weights_path, device="cpu")
+    else:
+        checkpoint = torch.load(weights_path, map_location="cpu")
+
+    # sometimes weights are hidden under "state_dict", and sometimes not
+    if "state_dict" in checkpoint:
+        checkpoint = checkpoint["state_dict"]
+
+    config = OmegaConf.load(config_file)
+
+    vae_model = convert_ldm_vae_to_diffusers(
+        checkpoint = checkpoint,
+        vae_config = config,
+        image_size = image_size,
+        model_root = app_config.models_path,
+    )
+    vae_model.save_pretrained(
+        output_path,
+        safe_serialization=is_safetensors_available()
+    )
+    return output_path
--- a/invokeai/backend/prompting/conditioning.py
+++ b/invokeai/backend/prompting/conditioning.py
@ -7,6 +7,7 @@ get_uc_and_c_and_ec()           get the conditioned and unconditioned latent, an

 """
 import re
+import torch
 from typing import Optional, Union

 from compel import Compel
--- a/invokeai/backend/restoration/base.py
+++ b/invokeai/backend/restoration/base.py
@ -5,7 +5,7 @@ class Restoration:
        pass

    def load_face_restore_models(
-        self, gfpgan_model_path="./models/gfpgan/GFPGANv1.4.pth"
+        self, gfpgan_model_path="./models/core/face_restoration/gfpgan/GFPGANv1.4.pth"
    ):
        # Load GFPGAN
        gfpgan = self.load_gfpgan(gfpgan_model_path)
--- a/invokeai/backend/restoration/codeformer.py
+++ b/invokeai/backend/restoration/codeformer.py
@ -15,7 +15,7 @@ pretrained_model_url = (

 class CodeFormerRestoration:
    def __init__(
-        self, codeformer_dir="models/codeformer", codeformer_model_path="codeformer.pth"
+        self, codeformer_dir="./models/core/face_restoration/codeformer", codeformer_model_path="codeformer.pth"
    ) -> None:

        self.globals = InvokeAIAppConfig.get_config()
@ -24,7 +24,7 @@ class CodeFormerRestoration:
        self.codeformer_model_exists = self.model_path.exists()

        if not self.codeformer_model_exists:
-            logger.error("NOT FOUND: CodeFormer model not found at " + self.model_path)
+            logger.error(f"NOT FOUND: CodeFormer model not found at {self.model_path}")
        sys.path.append(os.path.abspath(codeformer_dir))

    def process(self, image, strength, device, seed=None, fidelity=0.75):
@ -71,7 +71,7 @@ class CodeFormerRestoration:
                upscale_factor=1,
                use_parse=True,
                device=device,
-                model_rootpath = self.globals.root_dir / "gfpgan" / "weights"
+                model_rootpath = self.globals.model_path / 'core/face_restoration/gfpgan/weights'
            )
            face_helper.clean_all()
            face_helper.read_image(bgr_image_array)
--- a/invokeai/backend/restoration/gfpgan.py
+++ b/invokeai/backend/restoration/gfpgan.py
@ -18,7 +18,7 @@ class GFPGAN:
        self.gfpgan_model_exists = os.path.isfile(self.model_path)

        if not self.gfpgan_model_exists:
-            logger.error("NOT FOUND: GFPGAN model not found at " + self.model_path)
+            logger.error(f"NOT FOUND: GFPGAN model not found at {self.model_path}")
            return None

    def model_exists(self):
--- a/invokeai/backend/restoration/realesrgan.py
+++ b/invokeai/backend/restoration/realesrgan.py
@ -30,8 +30,8 @@ class ESRGAN:
            upscale=4,
            act_type="prelu",
        )
-        model_path = config.root_dir / "models/realesrgan/realesr-general-x4v3.pth"
-        wdn_model_path = config.root_dir / "models/realesrgan/realesr-general-wdn-x4v3.pth"
+        model_path = config.models_path / "core/upscaling/realesrgan/realesr-general-x4v3.pth"
+        wdn_model_path = config.models_path / "core/upscaling/realesrgan/realesr-general-wdn-x4v3.pth"
        scale = 4

        bg_upsampler = RealESRGANer(
--- a/invokeai/backend/safety_checker.py
+++ b/invokeai/backend/safety_checker.py
@ -30,18 +30,10 @@ class SafetyChecker(object):
        self.device = device

        try:
-            safety_model_id = "CompVis/stable-diffusion-safety-checker"
-            safety_model_path = config.cache_dir
-            self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
-                safety_model_id,
-                local_files_only=True,
-                cache_dir=safety_model_path,
-            )
-            self.safety_feature_extractor = AutoFeatureExtractor.from_pretrained(
-                safety_model_id,
-                local_files_only=True,
-                cache_dir=safety_model_path,
-            )
+            safety_model_id = config.models_path / 'core/convert/stable-diffusion-safety-checker'
+            feature_extractor_id = config.models_path / 'core/convert/stable-diffusion-safety-checker-extractor'
+            self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
+            self.safety_feature_extractor = AutoFeatureExtractor.from_pretrained(feature_extractor_id)
        except Exception:
            logger.error(
                "An error was encountered while installing the safety checker:"
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@ -1026,8 +1026,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):

    # Copied from diffusers pipeline_stable_diffusion_controlnet.py
    # Returns torch.Tensor of shape (batch_size, 3, height, width)
+    @staticmethod
    def prepare_control_image(
-        self,
        image,
        # FIXME: need to fix hardwiring of width and height, change to basing on latents dimensions?
        # latents,
--- a/invokeai/backend/stable_diffusion/offloading.py
+++ b/invokeai/backend/stable_diffusion/offloading.py
@ -157,7 +157,7 @@ class LazilyLoadedModelGroup(ModelGroup):
    def offload_current(self):
        module = self._current_model_ref()
        if module is not NO_MODEL:
-            module.to(device=OFFLOAD_DEVICE)
+            module.to(OFFLOAD_DEVICE)
        self.clear_current_model()

    def _load(self, module: torch.nn.Module) -> torch.nn.Module:
@ -228,7 +228,7 @@ class FullyLoadedModelGroup(ModelGroup):
    def install(self, *models: torch.nn.Module):
        for model in models:
            self._models.add(model)
-            model.to(device=self.execution_device)
+            model.to(self.execution_device)

    def uninstall(self, *models: torch.nn.Module):
        for model in models:
@ -238,11 +238,11 @@ class FullyLoadedModelGroup(ModelGroup):
        self.uninstall(*self._models)

    def load(self, model):
-        model.to(device=self.execution_device)
+        model.to(self.execution_device)

    def offload_current(self):
        for model in self._models:
-            model.to(device=OFFLOAD_DEVICE)
+            model.to(OFFLOAD_DEVICE)

    def ready(self):
        for model in self._models:
@ -252,7 +252,7 @@ class FullyLoadedModelGroup(ModelGroup):
        self.execution_device = device
        for model in self._models:
            if model.device != OFFLOAD_DEVICE:
-                model.to(device=device)
+                model.to(device)

    def device_for(self, model):
        if model not in self:
--- a/invokeai/backend/util/logging.py
+++ b/invokeai/backend/util/logging.py
@ -358,7 +358,6 @@ class InvokeAILogger(object):
                
            elif handler_name=='syslog':
                ch = cls._parse_syslog_args(args)
-                ch.setFormatter(InvokeAISyslogFormatter())
                handlers.append(ch)
                
            elif handler_name=='file':
@ -367,7 +366,8 @@ class InvokeAILogger(object):
                handlers.append(ch)
                
            elif handler_name=='http':
-                handlers.append(cls._parse_http_args(args))
+                ch = cls._parse_http_args(args)
+                handlers.append(ch)
        return handlers

    @staticmethod
--- a/invokeai/backend/web/invoke_ai_web_server.py
+++ b/invokeai/backend/web/invoke_ai_web_server.py
@ -1277,13 +1277,14 @@ class InvokeAIWebServer:
                eventlet.sleep(0)

                parsed_prompt, _ = get_prompt_structure(generation_parameters["prompt"])
-                tokens = (
-                    None
-                    if type(parsed_prompt) is Blend
-                    else get_tokens_for_prompt_object(
-                        self.generate.model.tokenizer, parsed_prompt
+                with self.generate.model_context as model:
+                    tokens = (
+                        None
+                        if type(parsed_prompt) is Blend
+                        else get_tokens_for_prompt_object(
+                                model.tokenizer, parsed_prompt
+                        )
                    )
-                )
                attention_maps_image_base64_url = (
                    None
                    if attention_maps_image is None
--- a/invokeai/frontend/web/dist/index.html
+++ b/invokeai/frontend/web/dist/index.html
@ -12,7 +12,7 @@
        margin: 0;
      }
    </style>
-    <script type="module" crossorigin src="./assets/index-b060dbab.js"></script>
+    <script type="module" crossorigin src="./assets/index-8a3e9251.js"></script>
  </head>

  <body dir="ltr">
--- a/invokeai/frontend/web/dist/locales/en.json
+++ b/invokeai/frontend/web/dist/locales/en.json
@ -506,8 +506,8 @@
            "isScheduled": "Canceling",
            "setType": "Set cancel type"
        },
-        "promptPlaceholder": "Type prompt here. [negative tokens], (upweight)++, (downweight)--, swap and blend are available (see docs)",
-        "negativePrompts": "Negative Prompts",
+        "positivePromptPlaceholder": "Positive Prompt",
+        "negativePromptPlaceholder": "Negative Prompt",
        "sendTo": "Send to",
        "sendToImg2Img": "Send to Image to Image",
        "sendToUnifiedCanvas": "Send To Unified Canvas",
--- a/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/InputFieldComponent.tsx
@ -7,6 +7,9 @@ import EnumInputFieldComponent from './fields/EnumInputFieldComponent';
 import ImageInputFieldComponent from './fields/ImageInputFieldComponent';
 import LatentsInputFieldComponent from './fields/LatentsInputFieldComponent';
 import ConditioningInputFieldComponent from './fields/ConditioningInputFieldComponent';
+import UNetInputFieldComponent from './fields/UNetInputFieldComponent';
+import ClipInputFieldComponent from './fields/ClipInputFieldComponent';
+import VaeInputFieldComponent from './fields/VaeInputFieldComponent';
 import ControlInputFieldComponent from './fields/ControlInputFieldComponent';
 import ModelInputFieldComponent from './fields/ModelInputFieldComponent';
 import NumberInputFieldComponent from './fields/NumberInputFieldComponent';
@ -98,6 +101,36 @@ const InputFieldComponent = (props: InputFieldComponentProps) => {
    );
  }

+  if (type === 'unet' && template.type === 'unet') {
+    return (
+      <UNetInputFieldComponent
+        nodeId={nodeId}
+        field={field}
+        template={template}
+      />
+    );
+  }
+
+  if (type === 'clip' && template.type === 'clip') {
+    return (
+      <ClipInputFieldComponent
+        nodeId={nodeId}
+        field={field}
+        template={template}
+      />
+    );
+  }
+
+  if (type === 'vae' && template.type === 'vae') {
+    return (
+      <VaeInputFieldComponent
+        nodeId={nodeId}
+        field={field}
+        template={template}
+      />
+    );
+  }
+
  if (type === 'control' && template.type === 'control') {
    return (
      <ControlInputFieldComponent
--- a/invokeai/frontend/web/src/features/nodes/components/fields/ClipInputFieldComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/fields/ClipInputFieldComponent.tsx
@ -0,0 +1,16 @@
+import {
+  ClipInputFieldTemplate,
+  ClipInputFieldValue,
+} from 'features/nodes/types/types';
+import { memo } from 'react';
+import { FieldComponentProps } from './types';
+
+const ClipInputFieldComponent = (
+  props: FieldComponentProps<ClipInputFieldValue, ClipInputFieldTemplate>
+) => {
+  const { nodeId, field } = props;
+
+  return null;
+};
+
+export default memo(ClipInputFieldComponent);
--- a/invokeai/frontend/web/src/features/nodes/components/fields/UNetInputFieldComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/fields/UNetInputFieldComponent.tsx
@ -0,0 +1,16 @@
+import {
+  UNetInputFieldTemplate,
+  UNetInputFieldValue,
+} from 'features/nodes/types/types';
+import { memo } from 'react';
+import { FieldComponentProps } from './types';
+
+const UNetInputFieldComponent = (
+  props: FieldComponentProps<UNetInputFieldValue, UNetInputFieldTemplate>
+) => {
+  const { nodeId, field } = props;
+
+  return null;
+};
+
+export default memo(UNetInputFieldComponent);
--- a/invokeai/frontend/web/src/features/nodes/components/fields/VaeInputFieldComponent.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/fields/VaeInputFieldComponent.tsx
@ -0,0 +1,16 @@
+import {
+  VaeInputFieldTemplate,
+  VaeInputFieldValue,
+} from 'features/nodes/types/types';
+import { memo } from 'react';
+import { FieldComponentProps } from './types';
+
+const VaeInputFieldComponent = (
+  props: FieldComponentProps<VaeInputFieldValue, VaeInputFieldTemplate>
+) => {
+  const { nodeId, field } = props;
+
+  return null;
+};
+
+export default memo(VaeInputFieldComponent);
--- a/invokeai/frontend/web/src/features/nodes/types/constants.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts
@ -12,6 +12,9 @@ export const FIELD_TYPE_MAP: Record<string, FieldType> = {
  ImageField: 'image',
  LatentsField: 'latents',
  ConditioningField: 'conditioning',
+  UNetField: 'unet',
+  ClipField: 'clip',
+  VaeField: 'vae',
  model: 'model',
  array: 'array',
  item: 'item',
@ -79,6 +82,24 @@ export const FIELDS: Record<FieldType, FieldUIConfig> = {
    title: 'Conditioning',
    description: 'Conditioning may be passed between nodes.',
  },
+  unet: {
+    color: 'red',
+    colorCssVar: getColorTokenCssVariable('red'),
+    title: 'UNet',
+    description: 'UNet submodel.',
+  },
+  clip: {
+    color: 'green',
+    colorCssVar: getColorTokenCssVariable('green'),
+    title: 'Clip',
+    description: 'Tokenizer and text_encoder submodels.',
+  },
+  vae: {
+    color: 'blue',
+    colorCssVar: getColorTokenCssVariable('blue'),
+    title: 'Vae',
+    description: 'Vae submodel.',
+  },
  control: {
    color: 'cyan',
    colorCssVar: getColorTokenCssVariable('cyan'), // TODO: no free color left
--- a/invokeai/frontend/web/src/features/nodes/types/types.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/types.ts
@ -61,6 +61,9 @@ export type FieldType =
  | 'image'
  | 'latents'
  | 'conditioning'
+  | 'unet'
+  | 'clip'
+  | 'vae'
  | 'control'
  | 'model'
  | 'array'
@ -83,6 +86,9 @@ export type InputFieldValue =
  | ImageInputFieldValue
  | LatentsInputFieldValue
  | ConditioningInputFieldValue
+  | UNetInputFieldValue
+  | ClipInputFieldValue
+  | VaeInputFieldValue
  | ControlInputFieldValue
  | EnumInputFieldValue
  | ModelInputFieldValue
@ -104,6 +110,9 @@ export type InputFieldTemplate =
  | ImageInputFieldTemplate
  | LatentsInputFieldTemplate
  | ConditioningInputFieldTemplate
+  | UNetInputFieldTemplate
+  | ClipInputFieldTemplate
+  | VaeInputFieldTemplate
  | ControlInputFieldTemplate
  | EnumInputFieldTemplate
  | ModelInputFieldTemplate
@ -188,6 +197,21 @@ export type ControlInputFieldValue = FieldValueBase & {
  value?: undefined;
 };

+export type UNetInputFieldValue = FieldValueBase & {
+  type: 'unet';
+  value?: undefined;
+};
+
+export type ClipInputFieldValue = FieldValueBase & {
+  type: 'clip';
+  value?: undefined;
+};
+
+export type VaeInputFieldValue = FieldValueBase & {
+  type: 'vae';
+  value?: undefined;
+};
+
 export type ImageInputFieldValue = FieldValueBase & {
  type: 'image';
  value?: ImageDTO;
--- a/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts
@ -10,6 +10,9 @@ import {
  IntegerInputFieldTemplate,
  LatentsInputFieldTemplate,
  ConditioningInputFieldTemplate,
+  UNetInputFieldTemplate,
+  ClipInputFieldTemplate,
+  VaeInputFieldTemplate,
  ControlInputFieldTemplate,
  StringInputFieldTemplate,
  ModelInputFieldTemplate,
@ -216,6 +219,51 @@ const buildConditioningInputFieldTemplate = ({
  return template;
 };

+const buildUNetInputFieldTemplate = ({
+  schemaObject,
+  baseField,
+}: BuildInputFieldArg): UNetInputFieldTemplate => {
+  const template: UNetInputFieldTemplate = {
+    ...baseField,
+    type: 'unet',
+    inputRequirement: 'always',
+    inputKind: 'connection',
+    default: schemaObject.default ?? undefined,
+  };
+
+  return template;
+};
+
+const buildClipInputFieldTemplate = ({
+  schemaObject,
+  baseField,
+}: BuildInputFieldArg): ClipInputFieldTemplate => {
+  const template: ClipInputFieldTemplate = {
+    ...baseField,
+    type: 'clip',
+    inputRequirement: 'always',
+    inputKind: 'connection',
+    default: schemaObject.default ?? undefined,
+  };
+
+  return template;
+};
+
+const buildVaeInputFieldTemplate = ({
+  schemaObject,
+  baseField,
+}: BuildInputFieldArg): VaeInputFieldTemplate => {
+  const template: VaeInputFieldTemplate = {
+    ...baseField,
+    type: 'vae',
+    inputRequirement: 'always',
+    inputKind: 'connection',
+    default: schemaObject.default ?? undefined,
+  };
+
+  return template;
+};
+
 const buildControlInputFieldTemplate = ({
  schemaObject,
  baseField,
@ -358,6 +406,15 @@ export const buildInputFieldTemplate = (
  if (['conditioning'].includes(fieldType)) {
    return buildConditioningInputFieldTemplate({ schemaObject, baseField });
  }
+  if (['unet'].includes(fieldType)) {
+    return buildUNetInputFieldTemplate({ schemaObject, baseField });
+  }
+  if (['clip'].includes(fieldType)) {
+    return buildClipInputFieldTemplate({ schemaObject, baseField });
+  }
+  if (['vae'].includes(fieldType)) {
+    return buildVaeInputFieldTemplate({ schemaObject, baseField });
+  }
  if (['control'].includes(fieldType)) {
    return buildControlInputFieldTemplate({ schemaObject, baseField });
  }
--- a/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts
@ -52,6 +52,18 @@ export const buildInputFieldValue = (
      fieldValue.value = undefined;
    }

+    if (template.type === 'unet') {
+      fieldValue.value = undefined;
+    }
+
+    if (template.type === 'clip') {
+      fieldValue.value = undefined;
+    }
+
+    if (template.type === 'vae') {
+      fieldValue.value = undefined;
+    }
+
    if (template.type === 'control') {
      fieldValue.value = undefined;
    }
--- a/invokeai/frontend/web/src/services/thunks/model.ts
+++ b/invokeai/frontend/web/src/services/thunks/model.ts
@ -14,7 +14,7 @@ export const receivedModels = createAppAsyncThunk(
    const response = await ModelsService.listModels();

    const deserializedModels = reduce(
-      response.models,
+      response.models['sd-1']['pipeline'],
      (modelsAccumulator, model, modelName) => {
        modelsAccumulator[modelName] = { ...model, name: modelName };

@ -23,7 +23,10 @@ export const receivedModels = createAppAsyncThunk(
      {} as Record<string, Model>
    );

-    models.info({ response }, `Received ${size(response.models)} models`);
+    models.info(
+      { response },
+      `Received ${size(response.models['sd-1']['pipeline'])} models`
+    );

    return deserializedModels;
  }
--- a/invokeai/frontend/web/stats.html
+++ b/invokeai/frontend/web/stats.html
--- a/pyproject.toml
+++ b/pyproject.toml
@ -67,6 +67,7 @@ dependencies = [
  "picklescan",
  "pillow",
  "prompt-toolkit",
+  "pympler==1.0.1",
  "pypatchmatch",
  'pyperclip',
  "pyreadline3",
@ -82,7 +83,7 @@ dependencies = [
  "torch~=2.0.0",
  "torchvision>=0.14.1",
  "torchmetrics",
-  "transformers~=4.26",
+  "transformers~=4.30",
  "uvicorn[standard]==0.21.1",
  "windows-curses; sys_platform=='win32'",
 ]
--- a/scripts/migrate_models_to_3.0.py
+++ b/scripts/migrate_models_to_3.0.py
@ -0,0 +1,278 @@
+'''
+Migrate the models directory and models.yaml file from an existing
+InvokeAI 2.3 installation to 3.0.0.
+'''
+
+import io
+import os
+import argparse
+import shutil
+import yaml
+
+import transformers
+import diffusers
+import warnings
+from pathlib import Path
+from omegaconf import OmegaConf
+from diffusers import StableDiffusionPipeline, AutoencoderKL
+from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
+from transformers import (
+    CLIPTextModel,
+    CLIPTokenizer,
+    AutoFeatureExtractor,
+    BertTokenizerFast,
+)
+
+import invokeai.backend.util.logging as logger
+from invokeai.backend.model_management.model_probe import (
+    ModelProbe, ModelType, BaseModelType
+    )
+
+warnings.filterwarnings("ignore")
+transformers.logging.set_verbosity_error()
+diffusers.logging.set_verbosity_error()
+
+def create_directory_structure(dest: Path):
+    for model_base in [BaseModelType.StableDiffusion1,BaseModelType.StableDiffusion2]:
+        for model_type in [ModelType.Pipeline, ModelType.Vae, ModelType.Lora,
+                           ModelType.ControlNet,ModelType.TextualInversion]:
+            path = dest / model_base.value / model_type.value
+            path.mkdir(parents=True, exist_ok=True)
+    path = dest / 'core'
+    path.mkdir(parents=True, exist_ok=True)
+
+def copy_file(src:Path,dest:Path):
+    logger.info(f'Copying {str(src)} to {str(dest)}')
+    try:
+        shutil.copy(src, dest)
+    except Exception as e:
+        logger.error(f'COPY FAILED: {str(e)}')
+
+def copy_dir(src:Path,dest:Path):
+    logger.info(f'Copying {str(src)} to {str(dest)}')
+    try:
+        shutil.copytree(src, dest)
+    except Exception as e:
+        logger.error(f'COPY FAILED: {str(e)}')
+
+def migrate_models(src_dir: Path, dest_dir: Path):
+    for root, dirs, files in os.walk(src_dir):
+        for f in files:
+            # hack - don't copy raw learned_embeds.bin, let them
+            # be copied as part of a tree copy operation
+            if f == 'learned_embeds.bin':
+                continue
+            try:
+                model = Path(root,f)
+                info = ModelProbe().heuristic_probe(model)
+                if not info:
+                    continue
+                dest = Path(dest_dir, info.base_type.value, info.model_type.value, f)
+                copy_file(model, dest)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(str(e))
+        for d in dirs:
+            try:
+                model = Path(root,d)
+                info = ModelProbe().heuristic_probe(model)
+                if not info:
+                    continue
+                dest = Path(dest_dir, info.base_type.value, info.model_type.value, model.name)
+                copy_dir(model, dest)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(str(e))
+
+def migrate_support_models(dest_directory: Path):
+    if Path('./models/clipseg').exists():
+        copy_dir(Path('./models/clipseg'),dest_directory / 'core/misc/clipseg')
+    if Path('./models/realesrgan').exists():
+        copy_dir(Path('./models/realesrgan'),dest_directory / 'core/upscaling/realesrgan')
+    for d in ['codeformer','gfpgan']:
+        path = Path('./models',d)
+        if path.exists():
+            copy_dir(path,dest_directory / f'core/face_restoration/{d}')
+
+def migrate_conversion_models(dest_directory: Path):
+    # These are needed for the conversion script
+    kwargs = dict(
+        cache_dir = Path('./models/hub'),
+        #local_files_only = True
+    )
+    try:
+        logger.info('Migrating core tokenizers and text encoders')
+        target_dir = dest_directory / 'core' / 'convert'
+        
+        # bert
+        bert = BertTokenizerFast.from_pretrained("bert-base-uncased", **kwargs)
+        bert.save_pretrained(target_dir / 'bert-base-uncased', safe_serialization=True)
+        
+        # sd-1
+        repo_id = 'openai/clip-vit-large-patch14'
+        pipeline = CLIPTokenizer.from_pretrained(repo_id, **kwargs)
+        pipeline.save_pretrained(target_dir / 'clip-vit-large-patch14', safe_serialization=True)
+
+        pipeline = CLIPTextModel.from_pretrained(repo_id, **kwargs)
+        pipeline.save_pretrained(target_dir / 'clip-vit-large-patch14', safe_serialization=True)
+
+        # sd-2
+        repo_id = "stabilityai/stable-diffusion-2"
+        pipeline = CLIPTokenizer.from_pretrained(repo_id, subfolder="tokenizer", **kwargs)
+        pipeline.save_pretrained(target_dir / 'stable-diffusion-2-clip' / 'tokenizer', safe_serialization=True)
+
+        pipeline = CLIPTextModel.from_pretrained(repo_id, subfolder="text_encoder", **kwargs)
+        pipeline.save_pretrained(target_dir / 'stable-diffusion-2-clip' / 'text_encoder', safe_serialization=True)
+
+        # VAE
+        logger.info('Migrating stable diffusion VAE')
+        vae = AutoencoderKL.from_pretrained('stabilityai/sd-vae-ft-mse', **kwargs)
+        vae.save_pretrained(target_dir / 'sd-vae-ft-mse', safe_serialization=True)
+
+        # safety checking
+        logger.info('Migrating safety checker')
+        repo_id = "CompVis/stable-diffusion-safety-checker"
+        pipeline = AutoFeatureExtractor.from_pretrained(repo_id,**kwargs)
+        pipeline.save_pretrained(target_dir / 'stable-diffusion-safety-checker', safe_serialization=True)
+
+        pipeline = StableDiffusionSafetyChecker.from_pretrained(repo_id,**kwargs)
+        pipeline.save_pretrained(target_dir / 'stable-diffusion-safety-checker', safe_serialization=True)
+    except KeyboardInterrupt:
+        raise
+    except Exception as e:
+        logger.error(str(e))
+
+def migrate_tuning_models(dest: Path):
+    for subdir in ['embeddings','loras','controlnets']:
+        src = Path('.',subdir)
+        if not src.is_dir():
+            logger.info(f'{subdir} directory not found; skipping')
+            continue
+        logger.info(f'Scanning {subdir}')
+        migrate_models(src, dest)
+
+def migrate_pipelines(dest_dir: Path, dest_yaml: io.TextIOBase):
+    cache = Path('./models/hub')
+    kwargs = dict(
+        cache_dir = cache,
+        local_files_only = True,
+        safety_checker = None,
+    )
+    for model in cache.glob('models--*'):
+        if len(list(model.glob('snapshots/**/model_index.json')))==0:
+            continue
+        _,owner,repo_name=model.name.split('--')
+        repo_id = f'{owner}/{repo_name}'
+        revisions = [x.name for x in model.glob('refs/*')]
+        for revision in revisions:
+            logger.info(f'Migrating {repo_id}, revision {revision}')
+            try:
+                pipeline = StableDiffusionPipeline.from_pretrained(
+                    repo_id,
+                    revision=revision,
+                    **kwargs)
+                info = ModelProbe().heuristic_probe(pipeline)
+                if not info:
+                    continue
+                dest = Path(dest_dir, info.base_type.value, info.model_type.value, f'{repo_name}-{revision}')
+                pipeline.save_pretrained(dest, safe_serialization=True)
+                rel_path = Path('models',dest.relative_to(dest_dir))
+                stanza = {
+                    f'{info.base_type.value}/{info.model_type.value}/{repo_name}-{revision}':
+                    {
+                        'name': repo_name,
+                        'path': str(rel_path),
+                        'description': f'diffusers model {repo_id}',
+                        'format': 'diffusers',
+                        'image_size': info.image_size,
+                        'base': info.base_type.value,
+                        'variant': info.variant_type.value,
+                        'prediction_type': info.prediction_type.value,
+                    }
+                }
+                print(yaml.dump(stanza),file=dest_yaml,end="")
+                dest_yaml.flush()
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.warning(f'Could not load the "{revision}" version of {repo_id}. Skipping.')
+
+def migrate_checkpoints(dest_dir: Path, dest_yaml: io.TextIOBase):
+    # find any checkpoints referred to in old models.yaml
+    conf = OmegaConf.load('./configs/models.yaml')
+    orig_models_dir = Path.cwd() / 'models'
+    for model_name, stanza in conf.items():
+        if stanza.get('format') and stanza['format'] == 'ckpt':
+            try:
+                logger.info(f'Migrating checkpoint model {model_name}')
+                weights = orig_models_dir.parent / stanza['weights']
+                config = stanza['config']
+                info = ModelProbe().heuristic_probe(weights)
+                if not info:
+                    continue
+                
+                # uh oh, weights is in the old models directory - move it into the new one
+                if Path(weights).is_relative_to(orig_models_dir):
+                    dest = Path(dest_dir, info.base_type.value, info.model_type.value,weights.name)
+                    copy_file(weights,dest)
+                    weights = Path('models', info.base_type.value, info.model_type.value,weights.name)
+                stanza = {
+                    f'{info.base_type.value}/{info.model_type.value}/{model_name}':
+                    {
+                        'name': model_name,
+                        'path': str(weights),
+                        'description': f'checkpoint model {model_name}',
+                        'format': 'checkpoint',
+                        'image_size': info.image_size,
+                        'base': info.base_type.value,
+                        'variant': info.variant_type.value,
+                        'config': config
+                    }
+                }
+                print(yaml.dump(stanza),file=dest_yaml,end="")
+                dest_yaml.flush()
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(str(e))
+    
+def main():
+    parser = argparse.ArgumentParser(description="Model directory migrator")
+    parser.add_argument('root_directory',
+                        help='Root directory (containing "models", "embeddings", "controlnets" and "loras")'
+                        )
+    parser.add_argument('--dest-directory',
+                        default='./models-3.0',
+                        help='Destination for new models directory',
+                        )
+    parser.add_argument('--dest-yaml',
+                        default='./models.yaml-3.0',
+                        help='Destination for new models.yaml file',
+                        )
+    args = parser.parse_args()
+    root_directory = Path(args.root_directory)
+    assert root_directory.is_dir(), f"{root_directory} is not a valid directory"
+    assert (root_directory / 'models').is_dir(), f"{root_directory} does not contain a 'models' subdirectory"
+
+    dest_directory = Path(args.dest_directory).resolve()
+    dest_yaml = Path(args.dest_yaml).resolve()
+
+    os.chdir(root_directory)
+    with open(dest_yaml,'w') as yaml_file:
+        print(yaml.dump({'__metadata__':
+                         {'version':'3.0.0'}
+                         }
+                        ),file=yaml_file,end=""
+              )
+        create_directory_structure(dest_directory)
+        migrate_support_models(dest_directory)
+        migrate_conversion_models(dest_directory)
+        migrate_tuning_models(dest_directory)
+        migrate_pipelines(dest_directory,yaml_file)
+        migrate_checkpoints(dest_directory,yaml_file)
+
+if __name__ == '__main__':
+    main()
+
--- a/scripts/scan_models_directory.py
+++ b/scripts/scan_models_directory.py
@ -0,0 +1,59 @@
+#!/usr/bin/env python
+
+'''
+Scan the models directory and print out a new models.yaml
+'''
+
+import os
+import sys
+import argparse
+
+from pathlib import Path
+from omegaconf import OmegaConf
+
+def main():
+    parser = argparse.ArgumentParser(description="Model directory scanner")
+    parser.add_argument('models_directory')
+    parser.add_argument('--all-models',
+                        default=False,
+                        action='store_true',
+                        help='If true, then generates stanzas for all models; otherwise just diffusers'
+                        )
+                        
+    args = parser.parse_args()
+    directory = args.models_directory
+
+    conf = OmegaConf.create()
+    conf['_version'] = '3.0.0'
+    
+    for root, dirs, files in os.walk(directory):
+        parents = root.split('/')
+        subpaths = parents[parents.index('models')+1:]
+        if len(subpaths) < 2:
+            continue
+        base, model_type, *_ = subpaths
+        
+        if args.all_models or model_type=='diffusers':
+            for d in dirs:
+                conf[f'{base}/{model_type}/{d}'] = dict(
+                    path = os.path.join(root,d),
+                    description = f'{model_type} model {d}',
+                    format = 'folder',
+                    base = base,
+                )
+
+            for f in files:
+                basename = Path(f).stem
+                format = Path(f).suffix[1:]
+                conf[f'{base}/{model_type}/{basename}'] = dict(
+                    path = os.path.join(root,f),
+                    description = f'{model_type} model {basename}',
+                    format = format,
+                    base = base,
+                )
+                
+    OmegaConf.save(config=dict(sorted(conf.items())), f=sys.stdout)
+    
+
+if __name__ == '__main__':
+    main()