mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
don't unload models from GPU until the space is needed
This commit is contained in:
parent
11d78ad75f
commit
d6cb0e54b3
@ -201,14 +201,22 @@ class ModelCache(object):
|
|||||||
self._cache_stack.remove(key)
|
self._cache_stack.remove(key)
|
||||||
self._cache_stack.append(key)
|
self._cache_stack.append(key)
|
||||||
|
|
||||||
return self.ModelLocker(self, key, cache_entry.model, gpu_load)
|
return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
|
||||||
|
|
||||||
class ModelLocker(object):
|
class ModelLocker(object):
|
||||||
def __init__(self, cache, key, model, gpu_load):
|
def __init__(self, cache, key, model, gpu_load, size_needed):
|
||||||
|
'''
|
||||||
|
:param cache: The model_cache object
|
||||||
|
:param key: The key of the model to lock in GPU
|
||||||
|
:param model: The model to lock
|
||||||
|
:param gpu_load: True if load into gpu
|
||||||
|
:param size_needed: Size of the model to load
|
||||||
|
'''
|
||||||
self.gpu_load = gpu_load
|
self.gpu_load = gpu_load
|
||||||
self.cache = cache
|
self.cache = cache
|
||||||
self.key = key
|
self.key = key
|
||||||
self.model = model
|
self.model = model
|
||||||
|
self.size_needed = size_needed
|
||||||
self.cache_entry = self.cache._cached_models[self.key]
|
self.cache_entry = self.cache._cached_models[self.key]
|
||||||
|
|
||||||
def __enter__(self) -> Any:
|
def __enter__(self) -> Any:
|
||||||
@ -222,7 +230,7 @@ class ModelCache(object):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if self.cache.lazy_offloading:
|
if self.cache.lazy_offloading:
|
||||||
self.cache._offload_unlocked_models()
|
self.cache._offload_unlocked_models(self.size_needed)
|
||||||
|
|
||||||
if self.model.device != self.cache.execution_device:
|
if self.model.device != self.cache.execution_device:
|
||||||
self.cache.logger.debug(f'Moving {self.key} into {self.cache.execution_device}')
|
self.cache.logger.debug(f'Moving {self.key} into {self.cache.execution_device}')
|
||||||
@ -337,9 +345,12 @@ class ModelCache(object):
|
|||||||
|
|
||||||
self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
|
self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
|
||||||
|
|
||||||
|
def _offload_unlocked_models(self, size_needed: int=0):
|
||||||
def _offload_unlocked_models(self):
|
for model_key, cache_entry in sorted(self._cached_models.items(), key=lambda x:x[1].size):
|
||||||
for model_key, cache_entry in self._cached_models.items():
|
free_mem, used_mem = torch.cuda.mem_get_info()
|
||||||
|
self.logger.debug(f'Require {(size_needed/GIG):.2f}GB VRAM. Have {((free_mem-reserve)/GIG):.2f}GB available.')
|
||||||
|
if free_mem-reserve > size_needed:
|
||||||
|
return
|
||||||
if not cache_entry.locked and cache_entry.loaded:
|
if not cache_entry.locked and cache_entry.loaded:
|
||||||
self.logger.debug(f'Offloading {model_key} from {self.execution_device} into {self.storage_device}')
|
self.logger.debug(f'Offloading {model_key} from {self.execution_device} into {self.storage_device}')
|
||||||
with VRAMUsage() as mem:
|
with VRAMUsage() as mem:
|
||||||
|
Loading…
Reference in New Issue
Block a user