feat(mm): remove autoimport; revise startup model scanning

These two changes are interrelated.

## Autoimport

The autoimport feature can be easily replicated using the scan folder tab in the model manager. Removing the implicit autoimport reduces surface area and unifies all model installation into the UI.

This functionality is removed, and the `autoimport_dir` config setting is removed.

## Startup model dir scanning

We scanned the invoke-managed models dir on startup and took certain actions:

- Register orphaned model files
- Remove model records from the db when the model path doesn't exist

### Orphaned model files

We should never have orphaned model files during normal use - we manage the models directory, and we only delete files when the user requests it.

During testing or development, when a fresh DB or memory DB is used, we could end up with orphaned models that should be registered.

Instead of always scanning for orphaned models and registering them, we now only do the scan if the new `scan_models_on_startup` config flag is set.

The description for this setting indicates it is intended for use for testing only.

### Remove records for missing model files

This functionality could unexpectedly wipe models from the db.

For example, if your models dir was on external media, and that media was inaccessible during startup, the scan would see all your models as missing and delete them from the db.

The "proactive" scan is removed. Instead, we will scan for missing models and log a warning if we find a model whose path doesn't exist. No possibility for data loss.
This commit is contained in:
psychedelicious 2024-03-27 15:24:18 +11:00
parent 2f6cce48af
commit 73c326680a
4 changed files with 48 additions and 112 deletions

View File

@ -592,25 +592,6 @@ async def prune_model_install_jobs() -> Response:
return Response(status_code=204)
@model_manager_router.patch(
"/sync",
operation_id="sync_models_to_config",
responses={
204: {"description": "Model config record database resynced with files on disk"},
400: {"description": "Bad request"},
},
)
async def sync_models_to_config() -> Response:
"""
Traverse the models and autoimport directories.
Model files without a corresponding
record in the database are added. Orphan records without a models file are deleted.
"""
ApiDependencies.invoker.services.model_manager.install.sync_to_config()
return Response(status_code=204)
@model_manager_router.put(
"/convert/{key}",
operation_id="convert_model",

View File

@ -83,7 +83,6 @@ class InvokeAIAppConfig(BaseSettings):
ssl_keyfile: SSL key file for HTTPS. See https://www.uvicorn.org/settings/#https.
log_tokenization: Enable logging of parsed prompt tokens.
patchmatch: Enable patchmatch inpaint code.
autoimport_dir: Path to a directory of models files to be imported on startup.
models_dir: Path to the models directory.
convert_cache_dir: Path to the converted models cache directory. When loading a non-diffusers model, it will be converted and store on disk at this location.
legacy_conf_dir: Path to directory of legacy checkpoint config files.
@ -117,6 +116,7 @@ class InvokeAIAppConfig(BaseSettings):
node_cache_size: How many cached nodes to keep in memory.
hashing_algorithm: Model hashing algorthim for model installs. 'blake3_multi' is best for SSDs. 'blake3_single' is best for spinning disk HDDs. 'random' disables hashing, instead assigning a UUID to models. Useful when using a memory db to reduce model installation time, or if you don't care about storing stable hashes for models. Alternatively, any other hashlib algorithm is accepted, though these are not nearly as performant as blake3.<br>Valid values: `blake3_multi`, `blake3_single`, `random`, `md5`, `sha1`, `sha224`, `sha256`, `sha384`, `sha512`, `blake2b`, `blake2s`, `sha3_224`, `sha3_256`, `sha3_384`, `sha3_512`, `shake_128`, `shake_256`
remote_api_tokens: List of regular expression and token pairs used when downloading models from URLs. The download URL is tested against the regex, and if it matches, the token is provided in as a Bearer token.
scan_models_on_startup: Scan the models directory on startup, registering orphaned models. This is typically only used in conjunction with `use_memory_db` for testing purposes.
"""
_root: Optional[Path] = PrivateAttr(default=None)
@ -144,7 +144,6 @@ class InvokeAIAppConfig(BaseSettings):
patchmatch: bool = Field(default=True, description="Enable patchmatch inpaint code.")
# PATHS
autoimport_dir: Path = Field(default=Path("autoimport"), description="Path to a directory of models files to be imported on startup.")
models_dir: Path = Field(default=Path("models"), description="Path to the models directory.")
convert_cache_dir: Path = Field(default=Path("models/.cache"), description="Path to the converted models cache directory. When loading a non-diffusers model, it will be converted and store on disk at this location.")
legacy_conf_dir: Path = Field(default=Path("configs"), description="Path to directory of legacy checkpoint config files.")
@ -193,6 +192,7 @@ class InvokeAIAppConfig(BaseSettings):
# MODEL INSTALL
hashing_algorithm: HASHING_ALGORITHMS = Field(default="blake3_single", description="Model hashing algorthim for model installs. 'blake3_multi' is best for SSDs. 'blake3_single' is best for spinning disk HDDs. 'random' disables hashing, instead assigning a UUID to models. Useful when using a memory db to reduce model installation time, or if you don't care about storing stable hashes for models. Alternatively, any other hashlib algorithm is accepted, though these are not nearly as performant as blake3.")
remote_api_tokens: Optional[list[URLRegexTokenPair]] = Field(default=None, description="List of regular expression and token pairs used when downloading models from URLs. The download URL is tested against the regex, and if it matches, the token is provided in as a Bearer token.")
scan_models_on_startup: bool = Field(default=False, description="Scan the models directory on startup, registering orphaned models. This is typically only used in conjunction with `use_memory_db` for testing purposes.")
# fmt: on
@ -275,11 +275,6 @@ class InvokeAIAppConfig(BaseSettings):
assert resolved_path is not None
return resolved_path
@property
def autoimport_path(self) -> Path:
"""Path to the autoimports directory, resolved to an absolute path.."""
return self._resolve(self.autoimport_dir)
@property
def outputs_path(self) -> Optional[Path]:
"""Path to the outputs directory, resolved to an absolute path.."""
@ -423,7 +418,6 @@ def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
else:
# Attempt to load as a v4 config file
try:
# Meta is not included in the model fields, so we need to validate it separately
config = InvokeAIAppConfig.model_validate(loaded_config_dict)
assert (
config.schema_version == CONFIG_SCHEMA_VERSION

View File

@ -454,20 +454,6 @@ class ModelInstallServiceBase(ABC):
will block indefinitely until the installs complete.
"""
@abstractmethod
def scan_directory(self, scan_dir: Path, install: bool = False) -> List[str]:
"""
Recursively scan directory for new models and register or install them.
:param scan_dir: Path to the directory to scan.
:param install: Install if True, otherwise register in place.
:returns list of IDs: Returns list of IDs of models registered/installed
"""
@abstractmethod
def sync_to_config(self) -> None:
"""Synchronize models on disk to those in the model record database."""
@abstractmethod
def sync_model_path(self, key: str) -> AnyModelConfig:
"""

View File

@ -10,7 +10,7 @@ from pathlib import Path
from queue import Empty, Queue
from shutil import copyfile, copytree, move, rmtree
from tempfile import mkdtemp
from typing import Any, Dict, List, Optional, Set, Union
from typing import Any, Dict, List, Optional, Union
import yaml
from huggingface_hub import HfFolder
@ -25,12 +25,10 @@ from invokeai.app.services.model_records import DuplicateModelException, ModelRe
from invokeai.app.services.model_records.model_records_base import ModelRecordChanges
from invokeai.backend.model_manager.config import (
AnyModelConfig,
BaseModelType,
CheckpointConfigBase,
InvalidModelConfigException,
ModelRepoVariant,
ModelSourceType,
ModelType,
)
from invokeai.backend.model_manager.metadata import (
AnyModelRepoMetadata,
@ -42,7 +40,7 @@ from invokeai.backend.model_manager.metadata import (
from invokeai.backend.model_manager.metadata.metadata_base import HuggingFaceMetadata
from invokeai.backend.model_manager.probe import ModelProbe
from invokeai.backend.model_manager.search import ModelSearch
from invokeai.backend.util import Chdir, InvokeAILogger
from invokeai.backend.util import InvokeAILogger
from invokeai.backend.util.devices import choose_precision, choose_torch_device
from .model_install_base import (
@ -84,8 +82,6 @@ class ModelInstallService(ModelInstallServiceBase):
self._logger = InvokeAILogger.get_logger(name=self.__class__.__name__)
self._install_jobs: List[ModelInstallJob] = []
self._install_queue: Queue[ModelInstallJob] = Queue()
self._cached_model_paths: Set[Path] = set()
self._models_installed: Set[str] = set()
self._lock = threading.Lock()
self._stop_event = threading.Event()
self._downloads_changed_event = threading.Event()
@ -131,7 +127,16 @@ class ModelInstallService(ModelInstallServiceBase):
self._start_installer_thread()
self._remove_dangling_install_dirs()
self._migrate_yaml()
self.sync_to_config()
# In normal use, we do not want to scan the models directory - it should never have orphaned models.
# We should only do the scan when the flag is set (which should only be set when testing).
if self.app_config.scan_models_on_startup:
self._register_orphaned_models()
# Check all models' paths and confirm they exist. A model could be missing if it was installed on a volume
# that isn't currently mounted. In this case, we don't want to delete the model from the database, but we do
# want to alert the user.
for model in self._scan_for_missing_models():
self._logger.warning(f"Missing model file: {model.name} at {model.path}")
def stop(self, invoker: Optional[Invoker] = None) -> None:
"""Stop the installer thread; after this the object can be deleted and garbage collected."""
@ -306,15 +311,6 @@ class ModelInstallService(ModelInstallServiceBase):
unfinished_jobs = [x for x in self._install_jobs if not x.in_terminal_state]
self._install_jobs = unfinished_jobs
def sync_to_config(self) -> None:
"""Synchronize models on disk to those in the config record store database."""
self._scan_models_directory()
if self._app_config.autoimport_path:
self._logger.info("Scanning autoimport directory for new models")
installed = self.scan_directory(self._app_config.autoimport_path)
self._logger.info(f"{len(installed)} new models registered")
self._logger.info("Model installer (re)initialized")
def _migrate_yaml(self) -> None:
db_models = self.record_store.all_models()
@ -366,14 +362,6 @@ class ModelInstallService(ModelInstallServiceBase):
# Unset the path - we are done with it either way
self._app_config.legacy_models_yaml_path = None
def scan_directory(self, scan_dir: Path, install: bool = False) -> List[str]: # noqa D102
self._cached_model_paths = {Path(x.path).resolve() for x in self.record_store.all_models()}
callback = self._scan_install if install else self._scan_register
search = ModelSearch(on_model_found=callback)
self._models_installed.clear()
search.search(scan_dir)
return list(self._models_installed)
def unregister(self, key: str) -> None: # noqa D102
self.record_store.del_model(key)
@ -509,34 +497,44 @@ class ModelInstallService(ModelInstallServiceBase):
self._logger.info(f"Removing dangling temporary directory {tmpdir}")
rmtree(tmpdir)
def _scan_models_directory(self) -> None:
def _scan_for_missing_models(self) -> list[AnyModelConfig]:
"""Scan the models directory for missing models and return a list of them."""
missing_models: list[AnyModelConfig] = []
for x in self.record_store.all_models():
if not Path(x.path).resolve().exists():
missing_models.append(x)
return missing_models
def _register_orphaned_models(self) -> None:
"""Scan the invoke-managed models directory for orphaned models and registers them.
This is typically only used during testing with a new DB or when using the memory DB, because those are the
only situations in which we may have orphaned models in the models directory.
"""
Scan the models directory for new and missing models.
New models will be added to the storage backend. Missing models
will be deleted.
"""
defunct_models = set()
installed = set()
installed_model_paths = {Path(x.path).resolve() for x in self.record_store.all_models()}
with Chdir(self._app_config.models_path):
self._logger.info("Checking for models that have been moved or deleted from disk")
for model_config in self.record_store.all_models():
path = Path(model_config.path)
if not path.exists():
self._logger.info(f"{model_config.name}: path {path.as_posix()} no longer exists. Unregistering")
defunct_models.add(model_config.key)
for key in defunct_models:
self.unregister(key)
# The bool returned by this callback determines if the model is added to the list of models found by the search
def on_model_found(model_path: Path) -> bool:
resolved_path = model_path.resolve()
# Already registered models should be in the list of found models, but not re-registered.
if resolved_path in installed_model_paths:
return True
# Skip core models entirely - these aren't registered with the model manager.
if str(resolved_path).startswith(str(self.app_config.models_path / "core")):
return False
try:
model_id = self.register_path(model_path)
self._logger.info(f"Registered {model_path.name} with id {model_id}")
except DuplicateModelException:
# In case a duplicate models sneaks by, we will ignore this error - we "found" the model
pass
return True
self._logger.info(f"Scanning {self._app_config.models_path} for new and orphaned models")
for cur_base_model in BaseModelType:
for cur_model_type in ModelType:
models_dir = self._app_config.models_path / Path(cur_base_model.value, cur_model_type.value)
if not models_dir.exists():
continue
installed.update(self.scan_directory(models_dir))
self._logger.info(f"{len(installed)} new models registered; {len(defunct_models)} unregistered")
self._logger.info(f"Scanning {self._app_config.models_path} for orphaned models")
search = ModelSearch(on_model_found=on_model_found)
found_models = search.search(self._app_config.models_path)
self._logger.info(f"{len(found_models)} new models registered")
def sync_model_path(self, key: str) -> AnyModelConfig:
"""
@ -567,29 +565,6 @@ class ModelInstallService(ModelInstallServiceBase):
self.record_store.update_model(key, ModelRecordChanges(path=model.path))
return model
def _scan_register(self, model: Path) -> bool:
if model.resolve() in self._cached_model_paths:
return True
try:
id = self.register_path(model)
self.sync_model_path(id) # possibly move it to right place in `models`
self._logger.info(f"Registered {model.name} with id {id}")
self._models_installed.add(id)
except DuplicateModelException:
pass
return True
def _scan_install(self, model: Path) -> bool:
if model in self._cached_model_paths:
return True
try:
id = self.install_path(model)
self._logger.info(f"Installed {model} with id {id}")
self._models_installed.add(id)
except DuplicateModelException:
pass
return True
def _copy_model(self, old_path: Path, new_path: Path) -> Path:
if old_path == new_path:
return old_path