From 123f2b2dbccddce1fc5210e0ef06bf32f1026d22 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 29 Sep 2023 11:29:48 -0400 Subject: [PATCH] Update cache model size estimates based on changes in VRAM when moving models to/from CUDA. --- .../backend/model_management/model_cache.py | 55 +++++++++++++------ 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index 12d718ff86..419c2c8ac2 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -236,9 +236,10 @@ class ModelCache(object): f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}." ) - if not math.isclose( - self_reported_model_size_before_load, self_reported_model_size_after_load, abs_tol=10 * MB - ): + # We only log a warning for over-reported (not under-reported) model sizes before load. There is a known + # issue where models report their fp32 size before load, and are then loaded as fp16. Once this issue is + # addressed, it would make sense to log a warning for both over-reported and under-reported model sizes. + if (self_reported_model_size_after_load - self_reported_model_size_before_load) > 10 * MB: self.logger.warning( f"Model '{key}' mis-reported its size before load. Self-reported size before/after load:" f" {(self_reported_model_size_before_load/GIG):.2f}GB /" @@ -286,24 +287,42 @@ class ModelCache(object): f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}." ) - # If the estimated model size does not match the change in VRAM, log a warning. - if ( - snapshot_before.vram is not None - and snapshot_after.vram is not None - and not math.isclose( - abs(snapshot_before.vram - snapshot_after.vram), + if snapshot_before.vram is not None and snapshot_after.vram is not None: + vram_change = abs(snapshot_before.vram - snapshot_after.vram) + + # If the estimated model size does not match the change in VRAM, log a warning. + if not math.isclose( + vram_change, cache_entry.size, rel_tol=0.1, abs_tol=10 * MB, - ) - ): - self.logger.warning( - f"Moving model '{key}' from {source_device} to" - f" {target_device} caused an unexpected change in VRAM usage. The model's" - " estimated size may be incorrect. Estimated model size:" - f" {(cache_entry.size/GIG):.2f} GB." - f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}." - ) + ): + self.logger.warning( + f"Moving model '{key}' from {source_device} to" + f" {target_device} caused an unexpected change in VRAM usage. The model's" + " estimated size may be incorrect. Estimated model size:" + f" {(cache_entry.size/GIG):.2f} GB." + f" {get_pretty_snapshot_diff(snapshot_before, snapshot_after)}." + ) + + # Now, we will update our size estimate for `cache_entry` based on the change in VRAM usage. We only use the + # change in VRAM usage, not the change in RAM usage, because it is a more accurate measurement. The VRAM + # usage measurement only includes the memory used by PyTorch tensors, whereas the RAM usage measurement is + # of total process memory and is influenced by other factors. + + # We want to err on the side of over-estimating the model's size, so we only update our estimate if the new + # information suggests that the model is larger than we previously thought. + if vram_change > cache_entry.size: + self.logger.info( + f"Updating the cache size estimate for model '{key}'. {(cache_entry.size/GIG):.2f}GB ->" + f" {(vram_change/GIG):.2f}GB." + ) + cache_entry.size = vram_change + + self.logger.info( + "Clearing models from cache, if necessary, after updating a model's size estimate." + ) + self._make_cache_room(0) class ModelLocker(object): def __init__(self, cache, key, model, gpu_load, size_needed):