From 123f2b2dbccddce1fc5210e0ef06bf32f1026d22 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Fri, 29 Sep 2023 11:29:48 -0400
Subject: [PATCH] Update cache model size estimates based on changes in VRAM
 when moving models to/from CUDA.

---
 .../backend/model_management/model_cache.py   | 55 +++++++++++++------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index 12d718ff86..419c2c8ac2 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -236,9 +236,10 @@ class ModelCache(object):
                 f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
             )
 
-            if not math.isclose(
-                self_reported_model_size_before_load, self_reported_model_size_after_load, abs_tol=10 * MB
-            ):
+            # We only log a warning for over-reported (not under-reported) model sizes before load. There is a known
+            # issue where models report their fp32 size before load, and are then loaded as fp16. Once this issue is
+            # addressed, it would make sense to log a warning for both over-reported and under-reported model sizes.
+            if (self_reported_model_size_after_load - self_reported_model_size_before_load) > 10 * MB:
                 self.logger.warning(
                     f"Model '{key}' mis-reported its size before load. Self-reported size before/after load:"
                     f" {(self_reported_model_size_before_load/GIG):.2f}GB /"
@@ -286,24 +287,42 @@ class ModelCache(object):
             f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
         )
 
-        # If the estimated model size does not match the change in VRAM, log a warning.
-        if (
-            snapshot_before.vram is not None
-            and snapshot_after.vram is not None
-            and not math.isclose(
-                abs(snapshot_before.vram - snapshot_after.vram),
+        if snapshot_before.vram is not None and snapshot_after.vram is not None:
+            vram_change = abs(snapshot_before.vram - snapshot_after.vram)
+
+            # If the estimated model size does not match the change in VRAM, log a warning.
+            if not math.isclose(
+                vram_change,
                 cache_entry.size,
                 rel_tol=0.1,
                 abs_tol=10 * MB,
-            )
-        ):
-            self.logger.warning(
-                f"Moving model '{key}' from {source_device} to"
-                f" {target_device} caused an unexpected change in VRAM usage. The model's"
-                " estimated size may be incorrect. Estimated model size:"
-                f" {(cache_entry.size/GIG):.2f} GB."
-                f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
-            )
+            ):
+                self.logger.warning(
+                    f"Moving model '{key}' from {source_device} to"
+                    f" {target_device} caused an unexpected change in VRAM usage. The model's"
+                    " estimated size may be incorrect. Estimated model size:"
+                    f" {(cache_entry.size/GIG):.2f} GB."
+                    f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}."
+                )
+
+                # Now, we will update our size estimate for `cache_entry` based on the change in VRAM usage. We only use the
+                # change in VRAM usage, not the change in RAM usage, because it is a more accurate measurement. The VRAM
+                # usage measurement only includes the memory used by PyTorch tensors, whereas the RAM usage measurement is
+                # of total process memory and is influenced by other factors.
+
+                # We want to err on the side of over-estimating the model's size, so we only update our estimate if the new
+                # information suggests that the model is larger than we previously thought.
+                if vram_change > cache_entry.size:
+                    self.logger.info(
+                        f"Updating the cache size estimate for model '{key}'. {(cache_entry.size/GIG):.2f}GB ->"
+                        f" {(vram_change/GIG):.2f}GB."
+                    )
+                    cache_entry.size = vram_change
+
+                    self.logger.info(
+                        "Clearing models from cache, if necessary, after updating a model's size estimate."
+                    )
+                    self._make_cache_room(0)
 
     class ModelLocker(object):
         def __init__(self, cache, key, model, gpu_load, size_needed):