Merge remote-tracking branch 'origin/main' into lstein/feat/simple-mm2-api

2024-08-30 20:32:17 +00:00 · 2024-06-07 14:23:41 +10:00
parent dc134935c8 6d067e56f2
commit fde58ce0a3
42 changed files with 1659 additions and 828 deletions
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py
@ -30,6 +30,11 @@ class ModelLockerBase(ABC):
        """Unlock the contained model, and remove it from VRAM."""
        pass

+    @abstractmethod
+    def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
+        """Return the state dict (if any) for the cached model."""
+        pass
+
    @property
    @abstractmethod
    def model(self) -> AnyModel:
@ -56,6 +61,11 @@ class CacheRecord(Generic[T]):
    and then injected into the model. When the model is finished, the VRAM
    copy of the state dict is deleted, and the RAM version is reinjected
    into the model.
+
+    The state_dict should be treated as a read-only attribute. Do not attempt
+    to patch or otherwise modify it. Instead, patch the copy of the state_dict
+    after it is loaded into the execution device (e.g. CUDA) using the `LoadedModel`
+    context manager call `model_on_device()`.
    """

    key: str
--- a/invokeai/backend/model_manager/load/model_cache/model_locker.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py
@ -2,6 +2,8 @@
 Base class and implementation of a class that moves models in and out of VRAM.
 """

+from typing import Dict, Optional
+
 import torch

 from invokeai.backend.model_manager import AnyModel
@ -27,16 +29,18 @@ class ModelLocker(ModelLockerBase):
        """Return the model without moving it around."""
        return self._cache_entry.model

+    def get_state_dict(self) -> Optional[Dict[str, torch.Tensor]]:
+        """Return the state dict (if any) for the cached model."""
+        return self._cache_entry.state_dict
+
    def lock(self) -> AnyModel:
        """Move the model into the execution device (GPU) and lock it."""
        self._cache_entry.lock()
        try:
            if self._cache.lazy_offloading:
                self._cache.offload_unlocked_models(self._cache_entry.size)
-
            self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device)
            self._cache_entry.loaded = True
-
            self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
            self._cache.print_cuda_stats()
        except torch.cuda.OutOfMemoryError: