feat(mm): remove autoimport; revise startup model scanning

These two changes are interrelated. ## Autoimport The autoimport feature can be easily replicated using the scan folder tab in the model manager. Removing the implicit autoimport reduces surface area and unifies all model installation into the UI. This functionality is removed, and the `autoimport_dir` config setting is removed. ## Startup model dir scanning We scanned the invoke-managed models dir on startup and took certain actions: - Register orphaned model files - Remove model records from the db when the model path doesn't exist ### Orphaned model files We should never have orphaned model files during normal use - we manage the models directory, and we only delete files when the user requests it. During testing or development, when a fresh DB or memory DB is used, we could end up with orphaned models that should be registered. Instead of always scanning for orphaned models and registering them, we now only do the scan if the new `scan_models_on_startup` config flag is set. The description for this setting indicates it is intended for use for testing only. ### Remove records for missing model files This functionality could unexpectedly wipe models from the db. For example, if your models dir was on external media, and that media was inaccessible during startup, the scan would see all your models as missing and delete them from the db. The "proactive" scan is removed. Instead, we will scan for missing models and log a warning if we find a model whose path doesn't exist. No possibility for data loss.
2024-08-30 20:32:17 +00:00 · 2024-03-27 15:24:18 +11:00 · 2024-03-27 15:24:18 +11:00 · 73c326680a
commit 73c326680a
parent 2f6cce48af
4 changed files with 48 additions and 112 deletions
--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@ -592,25 +592,6 @@ async def prune_model_install_jobs() -> Response:
    return Response(status_code=204)


-@model_manager_router.patch(
-    "/sync",
-    operation_id="sync_models_to_config",
-    responses={
-        204: {"description": "Model config record database resynced with files on disk"},
-        400: {"description": "Bad request"},
-    },
-)
-async def sync_models_to_config() -> Response:
-    """
-    Traverse the models and autoimport directories.
-
-    Model files without a corresponding
-    record in the database are added. Orphan records without a models file are deleted.
-    """
-    ApiDependencies.invoker.services.model_manager.install.sync_to_config()
-    return Response(status_code=204)
-
-
@model_manager_router.put(
    "/convert/{key}",
    operation_id="convert_model",
--- a/invokeai/app/services/config/config_default.py
+++ b/invokeai/app/services/config/config_default.py
@ -83,7 +83,6 @@ class InvokeAIAppConfig(BaseSettings):
        ssl_keyfile: SSL key file for HTTPS. See https://www.uvicorn.org/settings/#https.
        log_tokenization: Enable logging of parsed prompt tokens.
        patchmatch: Enable patchmatch inpaint code.
-        autoimport_dir: Path to a directory of models files to be imported on startup.
        models_dir: Path to the models directory.
        convert_cache_dir: Path to the converted models cache directory. When loading a non-diffusers model, it will be converted and store on disk at this location.
        legacy_conf_dir: Path to directory of legacy checkpoint config files.
@ -117,6 +116,7 @@ class InvokeAIAppConfig(BaseSettings):
        node_cache_size: How many cached nodes to keep in memory.
        hashing_algorithm: Model hashing algorthim for model installs. 'blake3_multi' is best for SSDs. 'blake3_single' is best for spinning disk HDDs. 'random' disables hashing, instead assigning a UUID to models. Useful when using a memory db to reduce model installation time, or if you don't care about storing stable hashes for models. Alternatively, any other hashlib algorithm is accepted, though these are not nearly as performant as blake3.<br>Valid values: `blake3_multi`, `blake3_single`, `random`, `md5`, `sha1`, `sha224`, `sha256`, `sha384`, `sha512`, `blake2b`, `blake2s`, `sha3_224`, `sha3_256`, `sha3_384`, `sha3_512`, `shake_128`, `shake_256`
        remote_api_tokens: List of regular expression and token pairs used when downloading models from URLs. The download URL is tested against the regex, and if it matches, the token is provided in as a Bearer token.
+        scan_models_on_startup: Scan the models directory on startup, registering orphaned models. This is typically only used in conjunction with `use_memory_db` for testing purposes.
    """

    _root: Optional[Path] = PrivateAttr(default=None)
@ -144,7 +144,6 @@ class InvokeAIAppConfig(BaseSettings):
    patchmatch:                    bool = Field(default=True,               description="Enable patchmatch inpaint code.")

    # PATHS
-    autoimport_dir:                Path = Field(default=Path("autoimport"), description="Path to a directory of models files to be imported on startup.")
    models_dir:                    Path = Field(default=Path("models"),     description="Path to the models directory.")
    convert_cache_dir:             Path = Field(default=Path("models/.cache"), description="Path to the converted models cache directory. When loading a non-diffusers model, it will be converted and store on disk at this location.")
    legacy_conf_dir:               Path = Field(default=Path("configs"), description="Path to directory of legacy checkpoint config files.")
@ -193,6 +192,7 @@ class InvokeAIAppConfig(BaseSettings):
    # MODEL INSTALL
    hashing_algorithm: HASHING_ALGORITHMS = Field(default="blake3_single",  description="Model hashing algorthim for model installs. 'blake3_multi' is best for SSDs. 'blake3_single' is best for spinning disk HDDs. 'random' disables hashing, instead assigning a UUID to models. Useful when using a memory db to reduce model installation time, or if you don't care about storing stable hashes for models. Alternatively, any other hashlib algorithm is accepted, though these are not nearly as performant as blake3.")
    remote_api_tokens: Optional[list[URLRegexTokenPair]] = Field(default=None, description="List of regular expression and token pairs used when downloading models from URLs. The download URL is tested against the regex, and if it matches, the token is provided in as a Bearer token.")
+    scan_models_on_startup:        bool = Field(default=False,              description="Scan the models directory on startup, registering orphaned models. This is typically only used in conjunction with `use_memory_db` for testing purposes.")

    # fmt: on

@ -275,11 +275,6 @@ class InvokeAIAppConfig(BaseSettings):
        assert resolved_path is not None
        return resolved_path

-    @property
-    def autoimport_path(self) -> Path:
-        """Path to the autoimports directory, resolved to an absolute path.."""
-        return self._resolve(self.autoimport_dir)
-
    @property
    def outputs_path(self) -> Optional[Path]:
        """Path to the outputs directory, resolved to an absolute path.."""
@ -423,7 +418,6 @@ def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
    else:
        # Attempt to load as a v4 config file
        try:
-            # Meta is not included in the model fields, so we need to validate it separately
            config = InvokeAIAppConfig.model_validate(loaded_config_dict)
            assert (
                config.schema_version == CONFIG_SCHEMA_VERSION
--- a/invokeai/app/services/model_install/model_install_base.py
+++ b/invokeai/app/services/model_install/model_install_base.py
@ -454,20 +454,6 @@ class ModelInstallServiceBase(ABC):
        will block indefinitely until the installs complete.
        """

-    @abstractmethod
-    def scan_directory(self, scan_dir: Path, install: bool = False) -> List[str]:
-        """
-        Recursively scan directory for new models and register or install them.
-
-        :param scan_dir: Path to the directory to scan.
-        :param install: Install if True, otherwise register in place.
-        :returns list of IDs: Returns list of IDs of models registered/installed
-        """
-
-    @abstractmethod
-    def sync_to_config(self) -> None:
-        """Synchronize models on disk to those in the model record database."""
-
    @abstractmethod
    def sync_model_path(self, key: str) -> AnyModelConfig:
        """
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@ -10,7 +10,7 @@ from pathlib import Path
 from queue import Empty, Queue
 from shutil import copyfile, copytree, move, rmtree
 from tempfile import mkdtemp
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Optional, Union

 import yaml
 from huggingface_hub import HfFolder
@ -25,12 +25,10 @@ from invokeai.app.services.model_records import DuplicateModelException, ModelRe
 from invokeai.app.services.model_records.model_records_base import ModelRecordChanges
 from invokeai.backend.model_manager.config import (
    AnyModelConfig,
-    BaseModelType,
    CheckpointConfigBase,
    InvalidModelConfigException,
    ModelRepoVariant,
    ModelSourceType,
-    ModelType,
 )
 from invokeai.backend.model_manager.metadata import (
    AnyModelRepoMetadata,
@ -42,7 +40,7 @@ from invokeai.backend.model_manager.metadata import (
 from invokeai.backend.model_manager.metadata.metadata_base import HuggingFaceMetadata
 from invokeai.backend.model_manager.probe import ModelProbe
 from invokeai.backend.model_manager.search import ModelSearch
-from invokeai.backend.util import Chdir, InvokeAILogger
+from invokeai.backend.util import InvokeAILogger
 from invokeai.backend.util.devices import choose_precision, choose_torch_device

 from .model_install_base import (
@ -84,8 +82,6 @@ class ModelInstallService(ModelInstallServiceBase):
        self._logger = InvokeAILogger.get_logger(name=self.__class__.__name__)
        self._install_jobs: List[ModelInstallJob] = []
        self._install_queue: Queue[ModelInstallJob] = Queue()
-        self._cached_model_paths: Set[Path] = set()
-        self._models_installed: Set[str] = set()
        self._lock = threading.Lock()
        self._stop_event = threading.Event()
        self._downloads_changed_event = threading.Event()
@ -131,7 +127,16 @@ class ModelInstallService(ModelInstallServiceBase):
            self._start_installer_thread()
            self._remove_dangling_install_dirs()
            self._migrate_yaml()
-            self.sync_to_config()
+            # In normal use, we do not want to scan the models directory - it should never have orphaned models.
+            # We should only do the scan when the flag is set (which should only be set when testing).
+            if self.app_config.scan_models_on_startup:
+                self._register_orphaned_models()
+
+            # Check all models' paths and confirm they exist. A model could be missing if it was installed on a volume
+            # that isn't currently mounted. In this case, we don't want to delete the model from the database, but we do
+            # want to alert the user.
+            for model in self._scan_for_missing_models():
+                self._logger.warning(f"Missing model file: {model.name} at {model.path}")

    def stop(self, invoker: Optional[Invoker] = None) -> None:
        """Stop the installer thread; after this the object can be deleted and garbage collected."""
@ -306,15 +311,6 @@ class ModelInstallService(ModelInstallServiceBase):
        unfinished_jobs = [x for x in self._install_jobs if not x.in_terminal_state]
        self._install_jobs = unfinished_jobs

-    def sync_to_config(self) -> None:
-        """Synchronize models on disk to those in the config record store database."""
-        self._scan_models_directory()
-        if self._app_config.autoimport_path:
-            self._logger.info("Scanning autoimport directory for new models")
-            installed = self.scan_directory(self._app_config.autoimport_path)
-            self._logger.info(f"{len(installed)} new models registered")
-        self._logger.info("Model installer (re)initialized")
-
    def _migrate_yaml(self) -> None:
        db_models = self.record_store.all_models()

@ -366,14 +362,6 @@ class ModelInstallService(ModelInstallServiceBase):
        # Unset the path - we are done with it either way
        self._app_config.legacy_models_yaml_path = None

-    def scan_directory(self, scan_dir: Path, install: bool = False) -> List[str]:  # noqa D102
-        self._cached_model_paths = {Path(x.path).resolve() for x in self.record_store.all_models()}
-        callback = self._scan_install if install else self._scan_register
-        search = ModelSearch(on_model_found=callback)
-        self._models_installed.clear()
-        search.search(scan_dir)
-        return list(self._models_installed)
-
    def unregister(self, key: str) -> None:  # noqa D102
        self.record_store.del_model(key)

@ -509,34 +497,44 @@ class ModelInstallService(ModelInstallServiceBase):
            self._logger.info(f"Removing dangling temporary directory {tmpdir}")
            rmtree(tmpdir)

-    def _scan_models_directory(self) -> None:
+    def _scan_for_missing_models(self) -> list[AnyModelConfig]:
+        """Scan the models directory for missing models and return a list of them."""
+        missing_models: list[AnyModelConfig] = []
+        for x in self.record_store.all_models():
+            if not Path(x.path).resolve().exists():
+                missing_models.append(x)
+        return missing_models
+
+    def _register_orphaned_models(self) -> None:
+        """Scan the invoke-managed models directory for orphaned models and registers them.
+
+        This is typically only used during testing with a new DB or when using the memory DB, because those are the
+        only situations in which we may have orphaned models in the models directory.
        """
-        Scan the models directory for new and missing models.

-        New models will be added to the storage backend. Missing models
-        will be deleted.
-        """
-        defunct_models = set()
-        installed = set()
+        installed_model_paths = {Path(x.path).resolve() for x in self.record_store.all_models()}

-        with Chdir(self._app_config.models_path):
-            self._logger.info("Checking for models that have been moved or deleted from disk")
-            for model_config in self.record_store.all_models():
-                path = Path(model_config.path)
-                if not path.exists():
-                    self._logger.info(f"{model_config.name}: path {path.as_posix()} no longer exists. Unregistering")
-                    defunct_models.add(model_config.key)
-            for key in defunct_models:
-                self.unregister(key)
+        # The bool returned by this callback determines if the model is added to the list of models found by the search
+        def on_model_found(model_path: Path) -> bool:
+            resolved_path = model_path.resolve()
+            # Already registered models should be in the list of found models, but not re-registered.
+            if resolved_path in installed_model_paths:
+                return True
+            # Skip core models entirely - these aren't registered with the model manager.
+            if str(resolved_path).startswith(str(self.app_config.models_path / "core")):
+                return False
+            try:
+                model_id = self.register_path(model_path)
+                self._logger.info(f"Registered {model_path.name} with id {model_id}")
+            except DuplicateModelException:
+                # In case a duplicate models sneaks by, we will ignore this error - we "found" the model
+                pass
+            return True

-            self._logger.info(f"Scanning {self._app_config.models_path} for new and orphaned models")
-            for cur_base_model in BaseModelType:
-                for cur_model_type in ModelType:
-                    models_dir = self._app_config.models_path / Path(cur_base_model.value, cur_model_type.value)
-                    if not models_dir.exists():
-                        continue
-                    installed.update(self.scan_directory(models_dir))
-            self._logger.info(f"{len(installed)} new models registered; {len(defunct_models)} unregistered")
+        self._logger.info(f"Scanning {self._app_config.models_path} for orphaned models")
+        search = ModelSearch(on_model_found=on_model_found)
+        found_models = search.search(self._app_config.models_path)
+        self._logger.info(f"{len(found_models)} new models registered")

    def sync_model_path(self, key: str) -> AnyModelConfig:
        """
@ -567,29 +565,6 @@ class ModelInstallService(ModelInstallServiceBase):
        self.record_store.update_model(key, ModelRecordChanges(path=model.path))
        return model

-    def _scan_register(self, model: Path) -> bool:
-        if model.resolve() in self._cached_model_paths:
-            return True
-        try:
-            id = self.register_path(model)
-            self.sync_model_path(id)  # possibly move it to right place in `models`
-            self._logger.info(f"Registered {model.name} with id {id}")
-            self._models_installed.add(id)
-        except DuplicateModelException:
-            pass
-        return True
-
-    def _scan_install(self, model: Path) -> bool:
-        if model in self._cached_model_paths:
-            return True
-        try:
-            id = self.install_path(model)
-            self._logger.info(f"Installed {model} with id {id}")
-            self._models_installed.add(id)
-        except DuplicateModelException:
-            pass
-        return True
-
    def _copy_model(self, old_path: Path, new_path: Path) -> Path:
        if old_path == new_path:
            return old_path