fix conflicts

This commit is contained in:
Lincoln Stein 2023-07-11 15:55:10 -04:00
commit 25591788c1
3 changed files with 35 additions and 13 deletions

View File

@ -23,7 +23,8 @@ InvokeAI:
xformers_enabled: false
sequential_guidance: false
precision: float16
max_loaded_models: 4
max_cache_size: 6
max_vram_cache_size: 2.7
always_use_cpu: false
free_gpu_mem: false
Features:
@ -269,8 +270,9 @@ class InvokeAISettings(BaseSettings):
parser.add_parser(cls.cmd_name(), help=cls.__doc__)
@classmethod
def _excluded(self)->Set[str]:
return {'type','initconf','version'}
def _excluded(self)->List[str]:
# combination of deprecated parameters and internal ones
return ['type','initconf', 'gpu_mem_reserved', 'max_loaded_models', 'version']
class Config:
env_file_encoding = 'utf-8'
@ -363,8 +365,10 @@ setting environment variables INVOKEAI_<setting>.
always_use_cpu : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance')
free_gpu_mem : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance')
max_loaded_models : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='Memory/Performance')
max_loaded_models : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='DEPRECATED')
max_cache_size : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance')
max_vram_cache_size : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
gpu_mem_reserved : float = Field(default=2.75, ge=0, description="DEPRECATED: use max_vram_cache_size. Amount of VRAM reserved for model storage", category='DEPRECATED')
precision : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='float16',description='Floating point precision', category='Memory/Performance')
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance')
xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')

View File

@ -36,6 +36,9 @@ from .models import BaseModelType, ModelType, SubModelType, ModelBase
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
DEFAULT_MAX_CACHE_SIZE = 6.0
# amount of GPU memory to hold in reserve for use by generations (GB)
DEFAULT_MAX_VRAM_CACHE_SIZE= 2.75
# actual size of a gig
GIG = 1073741824
@ -82,6 +85,7 @@ class ModelCache(object):
def __init__(
self,
max_cache_size: float=DEFAULT_MAX_CACHE_SIZE,
max_vram_cache_size: float=DEFAULT_MAX_VRAM_CACHE_SIZE,
execution_device: torch.device=torch.device('cuda'),
storage_device: torch.device=torch.device('cpu'),
precision: torch.dtype=torch.float16,
@ -99,12 +103,11 @@ class ModelCache(object):
:param sequential_offload: Conserve VRAM by loading and unloading each stage of the pipeline sequentially
:param sha_chunksize: Chunksize to use when calculating sha256 model hash
'''
#max_cache_size = 9999
self.model_infos: Dict[str, ModelBase] = dict()
self.lazy_offloading = lazy_offloading
#self.sequential_offload: bool=sequential_offload
self.precision: torch.dtype=precision
self.max_cache_size: int=max_cache_size
self.max_cache_size: float=max_cache_size
self.max_vram_cache_size: float=max_vram_cache_size
self.execution_device: torch.device=execution_device
self.storage_device: torch.device=storage_device
self.sha_chunksize=sha_chunksize
@ -201,14 +204,22 @@ class ModelCache(object):
self._cache_stack.remove(key)
self._cache_stack.append(key)
return self.ModelLocker(self, key, cache_entry.model, gpu_load)
return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
class ModelLocker(object):
def __init__(self, cache, key, model, gpu_load):
def __init__(self, cache, key, model, gpu_load, size_needed):
'''
:param cache: The model_cache object
:param key: The key of the model to lock in GPU
:param model: The model to lock
:param gpu_load: True if load into gpu
:param size_needed: Size of the model to load
'''
self.gpu_load = gpu_load
self.cache = cache
self.key = key
self.model = model
self.size_needed = size_needed
self.cache_entry = self.cache._cached_models[self.key]
def __enter__(self) -> Any:
@ -222,7 +233,7 @@ class ModelCache(object):
try:
if self.cache.lazy_offloading:
self.cache._offload_unlocked_models()
self.cache._offload_unlocked_models(self.size_needed)
if self.model.device != self.cache.execution_device:
self.cache.logger.debug(f'Moving {self.key} into {self.cache.execution_device}')
@ -337,14 +348,20 @@ class ModelCache(object):
self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
def _offload_unlocked_models(self):
for model_key, cache_entry in self._cached_models.items():
def _offload_unlocked_models(self, size_needed: int=0):
reserved = self.max_vram_cache_size * GIG
vram_in_use = torch.cuda.memory_allocated()
self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB')
for model_key, cache_entry in sorted(self._cached_models.items(), key=lambda x:x[1].size):
if vram_in_use <= reserved:
break
if not cache_entry.locked and cache_entry.loaded:
self.logger.debug(f'Offloading {model_key} from {self.execution_device} into {self.storage_device}')
with VRAMUsage() as mem:
cache_entry.model.to(self.storage_device)
self.logger.debug(f'GPU VRAM freed: {(mem.vram_used/GIG):.2f} GB')
vram_in_use += mem.vram_used # note vram_used is negative
self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB')
def _local_model_hash(self, model_path: Union[str, Path]) -> str:
sha = hashlib.sha256()

View File

@ -340,6 +340,7 @@ class ModelManager(object):
self.logger = logger
self.cache = ModelCache(
max_cache_size=max_cache_size,
max_vram_cache_size = self.app_config.max_vram_cache_size,
execution_device = device_type,
precision = precision,
sequential_offload = sequential_offload,