InvokeAI/invokeai/backend/model_manager/load/load_base.py

# Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team
"""
Base class for model loading in InvokeAI.
"""

from abc import ABC, abstractmethod
from contextlib import contextmanager
from dataclasses import dataclass
from logging import Logger
from pathlib import Path
from typing import Any, Dict, Generator, Optional, Tuple

import torch

from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.model_manager.config import (
    AnyModel,
    AnyModelConfig,
    SubModelType,
)
from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase


@dataclass
class LoadedModelWithoutConfig:
    """
    Context manager object that mediates transfer from RAM<->VRAM.

    This is a context manager object that has two distinct APIs:

    1. Older API (deprecated):
    Use the LoadedModel object directly as a context manager.
    It will move the model into VRAM (on CUDA devices), and
    return the model in a form suitable for passing to torch.
    Example:
    ```
    loaded_model_= loader.get_model_by_key('f13dd932', SubModelType('vae'))
    with loaded_model as vae:
      image = vae.decode(latents)[0]
    ```

    2. Newer API (recommended):
    Call the LoadedModel's `model_on_device()` method in a
    context. It returns a tuple consisting of a copy of
    the model's state dict in CPU RAM followed by a copy
    of the model in VRAM. The state dict is provided to allow
    LoRAs and other model patchers to return the model to
    its unpatched state without expensive copy and restore
    operations.

    Example:
    ```
    loaded_model_= loader.get_model_by_key('f13dd932', SubModelType('vae'))
    with loaded_model.model_on_device() as (state_dict, vae):
        image = vae.decode(latents)[0]
    ```

    The state_dict should be treated as a read-only object and
    never modified. Also be aware that some loadable models do
    not have a state_dict, in which case this value will be None.
    """

    _locker: ModelLockerBase

    def __enter__(self) -> AnyModel:
        """Context entry."""
        self._locker.lock()
        return self.model

    def __exit__(self, *args: Any, **kwargs: Any) -> None:
        """Context exit."""
        self._locker.unlock()

    @contextmanager
    def model_on_device(self) -> Generator[Tuple[Optional[Dict[str, torch.Tensor]], AnyModel], None, None]:
        """Return a tuple consisting of the model's state dict (if it exists) and the locked model on execution device."""
        locked_model = self._locker.lock()
        try:
            state_dict = self._locker.get_state_dict()
            yield (state_dict, locked_model)
        finally:
            self._locker.unlock()

    @property
    def model(self) -> AnyModel:
        """Return the model without locking it."""
        return self._locker.model


@dataclass
class LoadedModel(LoadedModelWithoutConfig):
    """Context manager object that mediates transfer from RAM<->VRAM."""

    config: Optional[AnyModelConfig] = None


# TODO(MM2):
# Some "intermediary" subclasses in the ModelLoaderBase class hierarchy define methods that their subclasses don't
# know about. I think the problem may be related to this class being an ABC.
#
# For example, GenericDiffusersLoader defines `get_hf_load_class()`, and StableDiffusionDiffusersModel attempts to
# call it. However, the method is not defined in the ABC, so it is not guaranteed to be implemented.


class ModelLoaderBase(ABC):
    """Abstract base class for loading models into RAM/VRAM."""

    @abstractmethod
    def __init__(
        self,
        app_config: InvokeAIAppConfig,
        logger: Logger,
        ram_cache: ModelCacheBase[AnyModel],
    ):
        """Initialize the loader."""
        pass

    @abstractmethod
    def load_model(self, model_config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> LoadedModel:
        """
        Return a model given its confguration.

        Given a model identified in the model configuration backend,
        return a ModelInfo object that can be used to retrieve the model.

        :param model_config: Model configuration, as returned by ModelConfigRecordStore
        :param submodel_type: an ModelType enum indicating the portion of
               the model to retrieve (e.g. ModelType.Vae)
        """
        pass

    @abstractmethod
    def get_size_fs(
        self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None
    ) -> int:
        """Return size in bytes of the model, calculated before loading."""
        pass

    @property
    @abstractmethod
    def ram_cache(self) -> ModelCacheBase[AnyModel]:
        """Return the ram cache associated with this loader."""
        pass
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`# Copyright (c) 2024, Lincoln D. Stein and the InvokeAI Development Team`
			`"""`
			`Base class for model loading in InvokeAI.`
			`"""`

			`from abc import ABC, abstractmethod`
LoRA patching optimization (#6439) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes added during penultimate review --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> 2024-06-06 13:53:35 +00:00			`from contextlib import contextmanager`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`from dataclasses import dataclass`
			`from logging import Logger`
			`from pathlib import Path`
LoRA patching optimization (#6439) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes added during penultimate review --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> 2024-06-06 13:53:35 +00:00			`from typing import Any, Dict, Generator, Optional, Tuple`

			`import torch`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
			`from invokeai.app.services.config import InvokeAIAppConfig`
make model manager v2 ready for PR review - Replace legacy model manager service with the v2 manager. - Update invocations to use new load interface. - Fixed many but not all type checking errors in the invocations. Most were unrelated to model manager - Updated routes. All the new routes live under the route tag `model_manager_v2`. To avoid confusion with the old routes, they have the URL prefix `/api/v2/models`. The old routes have been de-registered. - Added a pytest for the loader. - Updated documentation in contributing/MODEL_MANAGER.md 2024-02-10 23:09:45 +00:00			`from invokeai.backend.model_manager.config import (`
			`AnyModel,`
			`AnyModelConfig,`
			`SubModelType,`
			`)`
loaders for main, controlnet, ip-adapter, clipvision and t2i 2024-02-04 22:23:10 +00:00			`from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase`

add ram cache module and support files 2024-02-01 04:37:59 +00:00
			`@dataclass`
add support for generic loading of diffusers directories 2024-06-04 00:31:05 +00:00			`class LoadedModelWithoutConfig:`
LoRA patching optimization (#6439) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes added during penultimate review --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> 2024-06-06 13:53:35 +00:00			`"""`
			`Context manager object that mediates transfer from RAM<->VRAM.`

			`This is a context manager object that has two distinct APIs:`

			`1. Older API (deprecated):`
			`Use the LoadedModel object directly as a context manager.`
			`It will move the model into VRAM (on CUDA devices), and`
			`return the model in a form suitable for passing to torch.`
			`Example:`
			```
			`loaded_model_= loader.get_model_by_key('f13dd932', SubModelType('vae'))`
			`with loaded_model as vae:`
			`image = vae.decode(latents)[0]`
			```

			`2. Newer API (recommended):`
			Call the LoadedModel's `model_on_device()` method in a
			`context. It returns a tuple consisting of a copy of`
			`the model's state dict in CPU RAM followed by a copy`
			`of the model in VRAM. The state dict is provided to allow`
			`LoRAs and other model patchers to return the model to`
			`its unpatched state without expensive copy and restore`
			`operations.`

			`Example:`
			```
			`loaded_model_= loader.get_model_by_key('f13dd932', SubModelType('vae'))`
			`with loaded_model.model_on_device() as (state_dict, vae):`
			`image = vae.decode(latents)[0]`
			```

			`The state_dict should be treated as a read-only object and`
			`never modified. Also be aware that some loadable models do`
			`not have a state_dict, in which case this value will be None.`
			`"""`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
Fix issues identified during PR review by RyanjDick and brandonrising - ModelMetadataStoreService is now injected into ModelRecordStoreService (these two services are really joined at the hip, and should someday be merged) - ModelRecordStoreService is now injected into ModelManagerService - Reduced timeout value for the various installer and download wait*() methods - Introduced a Mock modelmanager for testing - Removed bare print() statement with _logger in the install helper backend. - Removed unused code from model loader init file - Made `locker` a private variable in the `LoadedModel` object. - Fixed up model merge frontend (will be deprecated anyway!) 2024-02-16 03:41:29 +00:00			`_locker: ModelLockerBase`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
make model manager v2 ready for PR review - Replace legacy model manager service with the v2 manager. - Update invocations to use new load interface. - Fixed many but not all type checking errors in the invocations. Most were unrelated to model manager - Updated routes. All the new routes live under the route tag `model_manager_v2`. To avoid confusion with the old routes, they have the URL prefix `/api/v2/models`. The old routes have been de-registered. - Added a pytest for the loader. - Updated documentation in contributing/MODEL_MANAGER.md 2024-02-10 23:09:45 +00:00			`def __enter__(self) -> AnyModel:`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`"""Context entry."""`
Fix issues identified during PR review by RyanjDick and brandonrising - ModelMetadataStoreService is now injected into ModelRecordStoreService (these two services are really joined at the hip, and should someday be merged) - ModelRecordStoreService is now injected into ModelManagerService - Reduced timeout value for the various installer and download wait*() methods - Introduced a Mock modelmanager for testing - Removed bare print() statement with _logger in the install helper backend. - Removed unused code from model loader init file - Made `locker` a private variable in the `LoadedModel` object. - Fixed up model merge frontend (will be deprecated anyway!) 2024-02-16 03:41:29 +00:00			`self._locker.lock()`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`return self.model`

			`def __exit__(self, args: Any, *kwargs: Any) -> None:`
			`"""Context exit."""`
Fix issues identified during PR review by RyanjDick and brandonrising - ModelMetadataStoreService is now injected into ModelRecordStoreService (these two services are really joined at the hip, and should someday be merged) - ModelRecordStoreService is now injected into ModelManagerService - Reduced timeout value for the various installer and download wait*() methods - Introduced a Mock modelmanager for testing - Removed bare print() statement with _logger in the install helper backend. - Removed unused code from model loader init file - Made `locker` a private variable in the `LoadedModel` object. - Fixed up model merge frontend (will be deprecated anyway!) 2024-02-16 03:41:29 +00:00			`self._locker.unlock()`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
LoRA patching optimization (#6439) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes added during penultimate review --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> 2024-06-06 13:53:35 +00:00			`@contextmanager`
			`def model_on_device(self) -> Generator[Tuple[Optional[Dict[str, torch.Tensor]], AnyModel], None, None]:`
			`"""Return a tuple consisting of the model's state dict (if it exists) and the locked model on execution device."""`
			`locked_model = self._locker.lock()`
			`try:`
			`state_dict = self._locker.get_state_dict()`
			`yield (state_dict, locked_model)`
			`finally:`
			`self._locker.unlock()`

add ram cache module and support files 2024-02-01 04:37:59 +00:00			`@property`
			`def model(self) -> AnyModel:`
			`"""Return the model without locking it."""`
Fix issues identified during PR review by RyanjDick and brandonrising - ModelMetadataStoreService is now injected into ModelRecordStoreService (these two services are really joined at the hip, and should someday be merged) - ModelRecordStoreService is now injected into ModelManagerService - Reduced timeout value for the various installer and download wait*() methods - Introduced a Mock modelmanager for testing - Removed bare print() statement with _logger in the install helper backend. - Removed unused code from model loader init file - Made `locker` a private variable in the `LoadedModel` object. - Fixed up model merge frontend (will be deprecated anyway!) 2024-02-16 03:41:29 +00:00			`return self._locker.model`
add ram cache module and support files 2024-02-01 04:37:59 +00:00

add support for generic loading of diffusers directories 2024-06-04 00:31:05 +00:00			`@dataclass`
			`class LoadedModel(LoadedModelWithoutConfig):`
			`"""Context manager object that mediates transfer from RAM<->VRAM."""`

			`config: Optional[AnyModelConfig] = None`


final tidying before marking PR as ready for review - Replace AnyModelLoader with ModelLoaderRegistry - Fix type check errors in multiple files - Remove apparently unneeded `get_model_config_enum()` method from model manager - Remove last vestiges of old model manager - Updated tests and documentation resolve conflict with seamless.py 2024-02-18 06:27:42 +00:00			`# TODO(MM2):`
			`# Some "intermediary" subclasses in the ModelLoaderBase class hierarchy define methods that their subclasses don't`
			`# know about. I think the problem may be related to this class being an ABC.`
			`#`
			# For example, GenericDiffusersLoader defines `get_hf_load_class()`, and StableDiffusionDiffusersModel attempts to
			`# call it. However, the method is not defined in the ABC, so it is not guaranteed to be implemented.`


add ram cache module and support files 2024-02-01 04:37:59 +00:00			`class ModelLoaderBase(ABC):`
			`"""Abstract base class for loading models into RAM/VRAM."""`

			`@abstractmethod`
			`def __init__(`
			`self,`
			`app_config: InvokeAIAppConfig,`
			`logger: Logger,`
loaders for main, controlnet, ip-adapter, clipvision and t2i 2024-02-04 22:23:10 +00:00			`ram_cache: ModelCacheBase[AnyModel],`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`):`
			`"""Initialize the loader."""`
			`pass`

			`@abstractmethod`
final tidying before marking PR as ready for review - Replace AnyModelLoader with ModelLoaderRegistry - Fix type check errors in multiple files - Remove apparently unneeded `get_model_config_enum()` method from model manager - Remove last vestiges of old model manager - Updated tests and documentation resolve conflict with seamless.py 2024-02-18 06:27:42 +00:00			`def load_model(self, model_config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> LoadedModel:`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`"""`
model loading and conversion implemented for vaes 2024-02-04 03:55:09 +00:00			`Return a model given its confguration.`
add ram cache module and support files 2024-02-01 04:37:59 +00:00
model loading and conversion implemented for vaes 2024-02-04 03:55:09 +00:00			`Given a model identified in the model configuration backend,`
add ram cache module and support files 2024-02-01 04:37:59 +00:00			`return a ModelInfo object that can be used to retrieve the model.`

			`:param model_config: Model configuration, as returned by ModelConfigRecordStore`
			`:param submodel_type: an ModelType enum indicating the portion of`
			`the model to retrieve (e.g. ModelType.Vae)`
			`"""`
			`pass`

			`@abstractmethod`
			`def get_size_fs(`
			`self, config: AnyModelConfig, model_path: Path, submodel_type: Optional[SubModelType] = None`
			`) -> int:`
			`"""Return size in bytes of the model, calculated before loading."""`
			`pass`
[mm] Do not write diffuser model to disk when convert_cache set to zero (#6072) * pass model config to _load_model * make conversion work again * do not write diffusers to disk when convert_cache set to 0 * adding same model to cache twice is a no-op, not an assertion error * fix issues identified by psychedelicious during pr review * following conversion, avoid redundant read of cached submodels * fix error introduced while merging --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> 2024-03-29 20:11:08 +00:00
			`@property`
			`@abstractmethod`
			`def ram_cache(self) -> ModelCacheBase[AnyModel]:`
			`"""Return the ram cache associated with this loader."""`
			`pass`