mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
adjust free vram calculation for models that will be removed by lazy offloading (#6150)
Co-authored-by: Lincoln Stein <lstein@gmail.com>
This commit is contained in:
parent
3006285d13
commit
812f10730f
@ -80,6 +80,7 @@ class ModelManagerService(ModelManagerServiceBase):
|
||||
ram_cache = ModelCache(
|
||||
max_cache_size=app_config.ram,
|
||||
max_vram_cache_size=app_config.vram,
|
||||
lazy_offloading=app_config.lazy_offload,
|
||||
logger=logger,
|
||||
execution_device=execution_device,
|
||||
)
|
||||
|
@ -421,13 +421,20 @@ class ModelCache(ModelCacheBase[AnyModel]):
|
||||
|
||||
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
|
||||
|
||||
def _free_vram(self, device: torch.device) -> int:
|
||||
vram_device = ( # mem_get_info() needs an indexed device
|
||||
device if device.index is not None else torch.device(str(device), index=0)
|
||||
)
|
||||
free_mem, _ = torch.cuda.mem_get_info(vram_device)
|
||||
for _, cache_entry in self._cached_models.items():
|
||||
if cache_entry.loaded and not cache_entry.locked:
|
||||
free_mem += cache_entry.size
|
||||
return free_mem
|
||||
|
||||
def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
|
||||
if target_device.type != "cuda":
|
||||
return
|
||||
vram_device = ( # mem_get_info() needs an indexed device
|
||||
target_device if target_device.index is not None else torch.device(str(target_device), index=0)
|
||||
)
|
||||
free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
|
||||
free_mem = self._free_vram(target_device)
|
||||
if needed_size > free_mem:
|
||||
needed_gb = round(needed_size / GIG, 2)
|
||||
free_gb = round(free_mem / GIG, 2)
|
||||
|
@ -33,14 +33,13 @@ class ModelLocker(ModelLockerBase):
|
||||
return self.model
|
||||
|
||||
# NOTE that the model has to have the to() method in order for this code to move it into GPU!
|
||||
self._cache_entry.lock()
|
||||
|
||||
try:
|
||||
if self._cache.lazy_offloading:
|
||||
self._cache.offload_unlocked_models(self._cache_entry.size)
|
||||
|
||||
self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device)
|
||||
self._cache_entry.loaded = True
|
||||
self._cache_entry.lock()
|
||||
|
||||
self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
|
||||
self._cache.print_cuda_stats()
|
||||
@ -51,6 +50,7 @@ class ModelLocker(ModelLockerBase):
|
||||
except Exception:
|
||||
self._cache_entry.unlock()
|
||||
raise
|
||||
|
||||
return self.model
|
||||
|
||||
def unlock(self) -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user