adjust free vram calculation for models that will be removed by lazy offloading (#6150)

Co-authored-by: Lincoln Stein <lstein@gmail.com>
This commit is contained in:
Lincoln Stein 2024-04-04 22:51:12 -04:00 committed by GitHub
parent 3006285d13
commit 812f10730f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 6 deletions

View File

@ -80,6 +80,7 @@ class ModelManagerService(ModelManagerServiceBase):
ram_cache = ModelCache( ram_cache = ModelCache(
max_cache_size=app_config.ram, max_cache_size=app_config.ram,
max_vram_cache_size=app_config.vram, max_vram_cache_size=app_config.vram,
lazy_offloading=app_config.lazy_offload,
logger=logger, logger=logger,
execution_device=execution_device, execution_device=execution_device,
) )

View File

@ -421,13 +421,20 @@ class ModelCache(ModelCacheBase[AnyModel]):
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}") self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
def _free_vram(self, device: torch.device) -> int:
vram_device = ( # mem_get_info() needs an indexed device
device if device.index is not None else torch.device(str(device), index=0)
)
free_mem, _ = torch.cuda.mem_get_info(vram_device)
for _, cache_entry in self._cached_models.items():
if cache_entry.loaded and not cache_entry.locked:
free_mem += cache_entry.size
return free_mem
def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None: def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
if target_device.type != "cuda": if target_device.type != "cuda":
return return
vram_device = ( # mem_get_info() needs an indexed device free_mem = self._free_vram(target_device)
target_device if target_device.index is not None else torch.device(str(target_device), index=0)
)
free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
if needed_size > free_mem: if needed_size > free_mem:
needed_gb = round(needed_size / GIG, 2) needed_gb = round(needed_size / GIG, 2)
free_gb = round(free_mem / GIG, 2) free_gb = round(free_mem / GIG, 2)

View File

@ -33,14 +33,13 @@ class ModelLocker(ModelLockerBase):
return self.model return self.model
# NOTE that the model has to have the to() method in order for this code to move it into GPU! # NOTE that the model has to have the to() method in order for this code to move it into GPU!
self._cache_entry.lock()
try: try:
if self._cache.lazy_offloading: if self._cache.lazy_offloading:
self._cache.offload_unlocked_models(self._cache_entry.size) self._cache.offload_unlocked_models(self._cache_entry.size)
self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device) self._cache.move_model_to_device(self._cache_entry, self._cache.execution_device)
self._cache_entry.loaded = True self._cache_entry.loaded = True
self._cache_entry.lock()
self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}") self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
self._cache.print_cuda_stats() self._cache.print_cuda_stats()
@ -51,6 +50,7 @@ class ModelLocker(ModelLockerBase):
except Exception: except Exception:
self._cache_entry.unlock() self._cache_entry.unlock()
raise raise
return self.model return self.model
def unlock(self) -> None: def unlock(self) -> None: