[mm] Do not write diffuser model to disk when convert_cache set to zero (#6072)

* pass model config to _load_model * make conversion work again * do not write diffusers to disk when convert_cache set to 0 * adding same model to cache twice is a no-op, not an assertion error * fix issues identified by psychedelicious during pr review * following conversion, avoid redundant read of cached submodels * fix error introduced while merging --------- Co-authored-by: Lincoln Stein <lstein@gmail.com>
2024-08-30 20:32:17 +00:00 · 2024-03-29 16:11:08 -04:00
parent 0ac1c0f339
commit 3d6d89feb4
14 changed files with 147 additions and 133 deletions
--- a/invokeai/backend/model_manager/load/load_default.py
+++ b/invokeai/backend/model_manager/load/load_default.py
@ -3,14 +3,13 @@

 from logging import Logger
 from pathlib import Path
-from typing import Optional, Tuple
+from typing import Optional

 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
    InvalidModelConfigException,
-    ModelRepoVariant,
    SubModelType,
 )
 from invokeai.backend.model_manager.config import DiffusersConfigBase, ModelType
@ -54,51 +53,43 @@ class ModelLoader(ModelLoaderBase):
        if model_config.type is ModelType.Main and not submodel_type:
            raise InvalidModelConfigException("submodel_type is required when loading a main model")

-        model_path, model_config, submodel_type = self._get_model_path(model_config, submodel_type)
+        model_path = self._get_model_path(model_config)

        if not model_path.exists():
            raise InvalidModelConfigException(f"Files for model '{model_config.name}' not found at {model_path}")

-        model_path = self._convert_if_needed(model_config, model_path, submodel_type)
-        locker = self._load_if_needed(model_config, model_path, submodel_type)
+        with skip_torch_weight_init():
+            locker = self._convert_and_load(model_config, model_path, submodel_type)
        return LoadedModel(config=model_config, _locker=locker)

-    def _get_model_path(
-        self, config: AnyModelConfig, submodel_type: Optional[SubModelType] = None
-    ) -> Tuple[Path, AnyModelConfig, Optional[SubModelType]]:
+    @property
+    def convert_cache(self) -> ModelConvertCacheBase:
+        """Return the convert cache associated with this loader."""
+        return self._convert_cache
+
+    @property
+    def ram_cache(self) -> ModelCacheBase[AnyModel]:
+        """Return the ram cache associated with this loader."""
+        return self._ram_cache
+
+    def _get_model_path(self, config: AnyModelConfig) -> Path:
        model_base = self._app_config.models_path
-        result = (model_base / config.path).resolve(), config, submodel_type
-        return result
+        return (model_base / config.path).resolve()

-    def _convert_if_needed(
-        self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None
-    ) -> Path:
-        cache_path: Path = self._convert_cache.cache_path(config.key)
-
-        if not self._needs_conversion(config, model_path, cache_path):
-            return cache_path if cache_path.exists() else model_path
-
-        self._convert_cache.make_room(self.get_size_fs(config, model_path, submodel_type))
-        return self._convert_model(config, model_path, cache_path)
-
-    def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
-        return False
-
-    def _load_if_needed(
+    def _convert_and_load(
        self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None
    ) -> ModelLockerBase:
-        # TO DO: This is not thread safe!
        try:
            return self._ram_cache.get(config.key, submodel_type)
        except IndexError:
            pass

-        model_variant = getattr(config, "repo_variant", None)
-        self._ram_cache.make_room(self.get_size_fs(config, model_path, submodel_type))
-
-        # This is where the model is actually loaded!
-        with skip_torch_weight_init():
-            loaded_model = self._load_model(model_path, model_variant=model_variant, submodel_type=submodel_type)
+        cache_path: Path = self._convert_cache.cache_path(config.key)
+        if self._needs_conversion(config, model_path, cache_path):
+            loaded_model = self._do_convert(config, model_path, cache_path, submodel_type)
+        else:
+            config.path = str(cache_path) if cache_path.exists() else str(self._get_model_path(config))
+            loaded_model = self._load_model(config, submodel_type)

        self._ram_cache.put(
            config.key,
@ -123,15 +114,34 @@ class ModelLoader(ModelLoaderBase):
            variant=config.repo_variant if isinstance(config, DiffusersConfigBase) else None,
        )

+    def _do_convert(
+        self, config: AnyModelConfig, model_path: Path, cache_path: Path, submodel_type: Optional[SubModelType] = None
+    ) -> AnyModel:
+        self.convert_cache.make_room(calc_model_size_by_fs(model_path))
+        pipeline = self._convert_model(config, model_path, cache_path if self.convert_cache.max_size > 0 else None)
+        if submodel_type:
+            # Proactively load the various submodels into the RAM cache so that we don't have to re-convert
+            # the entire pipeline every time a new submodel is needed.
+            for subtype in SubModelType:
+                if subtype == submodel_type:
+                    continue
+                if submodel := getattr(pipeline, subtype.value, None):
+                    self._ram_cache.put(
+                        config.key, submodel_type=subtype, model=submodel, size=calc_model_size_by_data(submodel)
+                    )
+        return getattr(pipeline, submodel_type.value) if submodel_type else pipeline
+
+    def _needs_conversion(self, config: AnyModelConfig, model_path: Path, dest_path: Path) -> bool:
+        return False
+
    # This needs to be implemented in subclasses that handle checkpoints
-    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
+    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Optional[Path] = None) -> AnyModel:
        raise NotImplementedError

    # This needs to be implemented in the subclass
    def _load_model(
        self,
-        model_path: Path,
-        model_variant: Optional[ModelRepoVariant] = None,
+        config: AnyModelConfig,
        submodel_type: Optional[SubModelType] = None,
    ) -> AnyModel:
        raise NotImplementedError