From b3abc7252d59bfd7d327fa8b416e6742834de085 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lstein@gmail.com>
Date: Sat, 24 Feb 2024 12:10:52 -0500
Subject: [PATCH] recover gracefully from VRAM out of memory errors

---
 invokeai/backend/model_management/model_cache.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index 2a7f4b5a95..a9ec051f3e 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -287,6 +287,14 @@ class ModelCache(object):
         if torch.device(source_device).type == torch.device(target_device).type:
             return
 
+        if target_device.type == "cuda":
+            vram_device = (
+                target_device if target_device.index is not None else torch.device(str(target_device), index=0)
+            )
+            free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
+            if cache_entry.size > free_mem:
+                raise torch.cuda.OutOfMemoryError
+
         start_model_to_time = time.time()
         snapshot_before = self._capture_memory_snapshot()
         cache_entry.model.to(target_device)
@@ -356,6 +364,10 @@ class ModelCache(object):
                     self.cache.logger.debug(f"Locking {self.key} in {self.cache.execution_device}")
                     self.cache._print_cuda_stats()
 
+                except torch.cuda.OutOfMemoryError:
+                    self.cache.logger.warning("Out of GPU memory encountered.")
+                    self.cache_entry.unlock()
+                    raise
                 except Exception:
                     self.cache_entry.unlock()
                     raise
@@ -524,7 +536,6 @@ class ModelCache(object):
                 break
             if not cache_entry.locked and cache_entry.loaded:
                 self._move_model_to_device(model_key, self.storage_device)
-
                 vram_in_use = torch.cuda.memory_allocated()
                 self.logger.debug(f"{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB")