From bda0000acd1551b27c9b146e213bfcc1cd146104 Mon Sep 17 00:00:00 2001 From: Sergey Borisov Date: Tue, 18 Jul 2023 23:21:18 +0300 Subject: [PATCH] Cleanup vram after models offloading, tweak to cleanup local variable references on ram offload --- .../backend/model_management/model_cache.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index 7c3d43e3e2..5ca17f00fc 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -328,6 +328,25 @@ class ModelCache(object): refs = sys.getrefcount(cache_entry.model) + # manualy clear local variable references of just finished function calls + # for some reason python don't want to collect it even by gc.collect() immidiately + if refs > 2: + while True: + cleared = False + for referrer in gc.get_referrers(cache_entry.model): + if type(referrer).__name__ == "frame": + # RuntimeError: cannot clear an executing frame + with suppress(RuntimeError): + referrer.clear() + cleared = True + #break + + # repeat if referrers changes(due to frame clear), else exit loop + if cleared: + gc.collect() + else: + break + device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None self.logger.debug(f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}, refs: {refs}") @@ -363,6 +382,9 @@ class ModelCache(object): self.logger.debug(f'GPU VRAM freed: {(mem.vram_used/GIG):.2f} GB') vram_in_use += mem.vram_used # note vram_used is negative self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB') + + gc.collect() + torch.cuda.empty_cache() def _local_model_hash(self, model_path: Union[str, Path]) -> str: sha = hashlib.sha256()