diff --git a/invokeai/app/services/model_manager/model_manager_default.py b/invokeai/app/services/model_manager/model_manager_default.py index b160ff6fed..de6e5f09d8 100644 --- a/invokeai/app/services/model_manager/model_manager_default.py +++ b/invokeai/app/services/model_manager/model_manager_default.py @@ -80,6 +80,7 @@ class ModelManagerService(ModelManagerServiceBase): ram_cache = ModelCache( max_cache_size=app_config.ram, max_vram_cache_size=app_config.vram, + lazy_offloading=app_config.lazy_offload, logger=logger, execution_device=execution_device, ) diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py index b8312f619a..4d5d09864e 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py @@ -421,13 +421,20 @@ class ModelCache(ModelCacheBase[AnyModel]): self.logger.debug(f"After making room: cached_models={len(self._cached_models)}") + def _free_vram(self, device: torch.device) -> int: + vram_device = ( # mem_get_info() needs an indexed device + device if device.index is not None else torch.device(str(device), index=0) + ) + free_mem, _ = torch.cuda.mem_get_info(vram_device) + for _, cache_entry in self._cached_models.items(): + if cache_entry.loaded and not cache_entry.locked: + free_mem += cache_entry.size + return free_mem + def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None: if target_device.type != "cuda": return - vram_device = ( # mem_get_info() needs an indexed device - target_device if target_device.index is not None else torch.device(str(target_device), index=0) - ) - free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device)) + free_mem = self._free_vram(target_device) if needed_size > free_mem: needed_gb = round(needed_size / GIG, 2) free_gb = round(free_mem / GIG, 2) diff --git a/invokeai/backend/model_manager/load/model_cache/model_locker.py b/invokeai/backend/model_manager/load/model_cache/model_locker.py index 81dca346e5..ea38d3773c 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_locker.py +++ b/invokeai/backend/model_manager/load/model_cache/model_locker.py @@ -33,14 +33,13 @@ class ModelLocker(ModelLockerBase): return self.model # NOTE that the model has to have the to() method in order for this code to move it into GPU! - self._cache_entry.lock() - try: if self._cache.lazy_offloading: self._cache.offload_unlocked_models(self._cache_entry.size) self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device) self._cache_entry.loaded = True + self._cache_entry.lock() self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}") self._cache.print_cuda_stats() @@ -51,6 +50,7 @@ class ModelLocker(ModelLockerBase): except Exception: self._cache_entry.unlock() raise + return self.model def unlock(self) -> None: