diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index bcd61a77f8..b4db4cf5ea 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -265,6 +265,44 @@ class ModelCache(object):
 
         return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
 
+    def _move_model_to_device(self, key, target_device):
+        cache_entry = self._cached_models[key]
+
+        source_device = cache_entry.model.device
+        if source_device == target_device:
+            return
+
+        start_model_to_time = time.time()
+        snapshot_before = MemorySnapshot.capture()
+        cache_entry.model.to(target_device)
+        snapshot_after = MemorySnapshot.capture()
+        end_model_to_time = time.time()
+        self.logger.debug(
+            f"Moved model '{key}' from {source_device} to"
+            f" {target_device} in {(end_model_to_time-start_model_to_time):.2f}s."
+            f" Estimated model size: {(cache_entry.size/GIG):.2f} GB."
+            f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
+        )
+
+        # If the estimated model size does not match the change in VRAM, log a warning.
+        if (
+            snapshot_before.vram is not None
+            and snapshot_after.vram is not None
+            and not math.isclose(
+                abs(snapshot_before.vram - snapshot_after.vram),
+                cache_entry.size,
+                rel_tol=0.1,
+                abs_tol=10 * MB,
+            )
+        ):
+            self.logger.warning(
+                f"Moving model '{key}' from {source_device} to"
+                f" {target_device} caused an unexpected change in VRAM usage. The model's"
+                " estimated size may be incorrect. Estimated model size:"
+                f" {(cache_entry.size/GIG):.2f} GB."
+                f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
+            )
+
     class ModelLocker(object):
         def __init__(self, cache, key, model, gpu_load, size_needed):
             """
@@ -294,32 +332,7 @@ class ModelCache(object):
                     if self.cache.lazy_offloading:
                         self.cache._offload_unlocked_models(self.size_needed)
 
-                    if self.model.device != self.cache.execution_device:
-                        start_model_to_time = time.time()
-                        snapshot_before = MemorySnapshot.capture()
-                        self.model.to(self.cache.execution_device)  # move into GPU
-                        snapshot_after = MemorySnapshot.capture()
-                        end_model_to_time = time.time()
-                        self.cache.logger.debug(
-                            f"Moved model '{self.key}' from {self.cache.storage_device} to"
-                            f" {self.cache.execution_device} in {(end_model_to_time-start_model_to_time):.2f}s."
-                            f" Estimated model size: {(self.cache_entry.size/GIG):.2f} GB."
-                            f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
-                        )
-
-                        if not math.isclose(
-                            abs((snapshot_before.vram or 0) - (snapshot_after.vram or 0)),
-                            self.cache_entry.size,
-                            rel_tol=0.1,
-                            abs_tol=10 * MB,
-                        ):
-                            self.cache.logger.warning(
-                                f"Moving '{self.key}' from {self.cache.storage_device} to"
-                                f" {self.cache.execution_device} caused an unexpected change in VRAM usage. The model's"
-                                " estimated size may be incorrect. Estimated model size:"
-                                f" {(self.cache_entry.size/GIG):.2f} GB."
-                                f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
-                            )
+                    self.cache._move_model_to_device(self.key, self.cache.execution_device)
 
                     self.cache.logger.debug(f"Locking {self.key} in {self.cache.execution_device}")
                     self.cache._print_cuda_stats()
@@ -332,7 +345,7 @@ class ModelCache(object):
             # in the event that the caller wants the model in RAM, we
             # move it into CPU if it is in GPU and not locked
             elif self.cache_entry.loaded and not self.cache_entry.locked:
-                self.model.to(self.cache.storage_device)
+                self.cache._move_model_to_device(self.key, self.cache.storage_device)
 
             return self.model
 
@@ -472,33 +485,9 @@ class ModelCache(object):
             if vram_in_use <= reserved:
                 break
             if not cache_entry.locked and cache_entry.loaded:
-                start_model_to_time = time.time()
-                snapshot_before = MemorySnapshot.capture()
-                cache_entry.model.to(self.storage_device)
-                snapshot_after = MemorySnapshot.capture()
-                end_model_to_time = time.time()
-                self.logger.debug(
-                    f"Moved model '{model_key}' from {self.execution_device} to {self.storage_device} in"
-                    f" {(end_model_to_time-start_model_to_time):.2f}s. Estimated model size:"
-                    f" {(cache_entry.size/GIG):.2f} GB."
-                    f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
-                )
+                self._move_model_to_device(model_key, self.storage_device)
 
-                if not math.isclose(
-                    abs((snapshot_before.vram or 0) - (snapshot_after.vram or 0)),
-                    cache_entry.size,
-                    rel_tol=0.1,
-                    abs_tol=10 * MB,
-                ):
-                    self.logger.warning(
-                        f"Moving '{model_key}' from {self.execution_device} to"
-                        f" {self.storage_device} caused an unexpected change in VRAM usage. The model's"
-                        " estimated size may be incorrect. Estimated model size:"
-                        f" {(cache_entry.size/GIG):.2f} GB."
-                        f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
-                    )
-
-                vram_in_use = snapshot_after.vram or 0
+                vram_in_use = torch.cuda.memory_allocated()
                 self.logger.debug(f"{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB")
 
         gc.collect()