support VRAM caching of dict models that lack to()

2024-08-30 20:32:17 +00:00 · 2024-04-28 13:41:06 -04:00
parent a26667d3ca
commit 7c39929758
5 changed files with 12 additions and 20 deletions
--- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py
@ -252,23 +252,22 @@ class ModelCache(ModelCacheBase[AnyModel]):

        May raise a torch.cuda.OutOfMemoryError
        """
-        # These attributes are not in the base ModelMixin class but in various derived classes.
-        # Some models don't have these attributes, in which case they run in RAM/CPU.
        self.logger.debug(f"Called to move {cache_entry.key} to {target_device}")
-        if not (hasattr(cache_entry.model, "device") and hasattr(cache_entry.model, "to")):
-            return
+        model = cache_entry.model

-        source_device = cache_entry.model.device
-
-        # Note: We compare device types only so that 'cuda' == 'cuda:0'.
-        # This would need to be revised to support multi-GPU.
+        source_device = model.device if hasattr(model, "device") else self.storage_device
        if torch.device(source_device).type == torch.device(target_device).type:
            return

        start_model_to_time = time.time()
        snapshot_before = self._capture_memory_snapshot()
        try:
-            cache_entry.model.to(target_device)
+            if hasattr(model, "to"):
+                model.to(target_device)
+            elif isinstance(model, dict):
+                for _, v in model.items():
+                    if hasattr(v, "to"):
+                        v.to(target_device)
        except Exception as e:  # blow away cache entry
            self._delete_cache_entry(cache_entry)
            raise e
--- a/invokeai/backend/model_manager/load/model_cache/model_locker.py
+++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py
@ -29,10 +29,6 @@ class ModelLocker(ModelLockerBase):

    def lock(self) -> AnyModel:
        """Move the model into the execution device (GPU) and lock it."""
-        if not hasattr(self.model, "to"):
-            return self.model
-
-        # NOTE that the model has to have the to() method in order for this code to move it into GPU!
        self._cache_entry.lock()
        try:
            if self._cache.lazy_offloading:
@ -55,9 +51,6 @@ class ModelLocker(ModelLockerBase):

    def unlock(self) -> None:
        """Call upon exit from context."""
-        if not hasattr(self.model, "to"):
-            return
-
        self._cache_entry.unlock()
        if not self._cache.lazy_offloading:
            self._cache.offload_unlocked_models(self._cache_entry.size)