rename gpu_mem_reserved to max_vram_cache_size

To be consistent with max_cache_size, the amount of memory to hold in
VRAM for model caching is now controlled by the max_vram_cache_size
configuration parameter.
This commit is contained in:
Lincoln Stein 2023-07-11 15:25:39 -04:00
parent d32f9f7cb0
commit dab03fb646
3 changed files with 10 additions and 9 deletions

View File

@ -24,9 +24,9 @@ InvokeAI:
sequential_guidance: false sequential_guidance: false
precision: float16 precision: float16
max_cache_size: 6 max_cache_size: 6
max_vram_cache_size: 2.7
always_use_cpu: false always_use_cpu: false
free_gpu_mem: false free_gpu_mem: false
gpu_mem_reserved: 2.7
Features: Features:
nsfw_checker: true nsfw_checker: true
restore: true restore: true
@ -271,7 +271,7 @@ class InvokeAISettings(BaseSettings):
@classmethod @classmethod
def _excluded(self)->List[str]: def _excluded(self)->List[str]:
return ['type','initconf'] return ['type','initconf', 'gpu_mem_reserved', 'max_loaded_models']
class Config: class Config:
env_file_encoding = 'utf-8' env_file_encoding = 'utf-8'
@ -364,9 +364,10 @@ setting environment variables INVOKEAI_<setting>.
always_use_cpu : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance') always_use_cpu : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance')
free_gpu_mem : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance') free_gpu_mem : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance')
max_loaded_models : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='Memory/Performance') max_loaded_models : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='DEPRECATED')
max_cache_size : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance') max_cache_size : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance')
gpu_mem_reserved : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance') max_vram_cache_size : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
gpu_mem_reserved : float = Field(default=2.75, ge=0, description="DEPRECATED: use max_vram_cache_size. Amount of VRAM reserved for model storage", category='DEPRECATED')
precision : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='float16',description='Floating point precision', category='Memory/Performance') precision : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='float16',description='Floating point precision', category='Memory/Performance')
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance') sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance')
xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance') xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')

View File

@ -37,7 +37,7 @@ from .models import BaseModelType, ModelType, SubModelType, ModelBase
DEFAULT_MAX_CACHE_SIZE = 6.0 DEFAULT_MAX_CACHE_SIZE = 6.0
# amount of GPU memory to hold in reserve for use by generations (GB) # amount of GPU memory to hold in reserve for use by generations (GB)
DEFAULT_GPU_MEM_RESERVED= 2.75 DEFAULT_MAX_VRAM_CACHE_SIZE= 2.75
# actual size of a gig # actual size of a gig
GIG = 1073741824 GIG = 1073741824
@ -85,13 +85,13 @@ class ModelCache(object):
def __init__( def __init__(
self, self,
max_cache_size: float=DEFAULT_MAX_CACHE_SIZE, max_cache_size: float=DEFAULT_MAX_CACHE_SIZE,
max_vram_cache_size: float=DEFAULT_MAX_VRAM_CACHE_SIZE,
execution_device: torch.device=torch.device('cuda'), execution_device: torch.device=torch.device('cuda'),
storage_device: torch.device=torch.device('cpu'), storage_device: torch.device=torch.device('cpu'),
precision: torch.dtype=torch.float16, precision: torch.dtype=torch.float16,
sequential_offload: bool=False, sequential_offload: bool=False,
lazy_offloading: bool=True, lazy_offloading: bool=True,
sha_chunksize: int = 16777216, sha_chunksize: int = 16777216,
gpu_mem_reserved: float=DEFAULT_GPU_MEM_RESERVED,
logger: types.ModuleType = logger logger: types.ModuleType = logger
): ):
''' '''
@ -107,7 +107,7 @@ class ModelCache(object):
self.lazy_offloading = lazy_offloading self.lazy_offloading = lazy_offloading
self.precision: torch.dtype=precision self.precision: torch.dtype=precision
self.max_cache_size: float=max_cache_size self.max_cache_size: float=max_cache_size
self.gpu_mem_reserved: float=gpu_mem_reserved self.max_vram_cache_size: float=max_vram_cache_size
self.execution_device: torch.device=execution_device self.execution_device: torch.device=execution_device
self.storage_device: torch.device=storage_device self.storage_device: torch.device=storage_device
self.sha_chunksize=sha_chunksize self.sha_chunksize=sha_chunksize
@ -349,7 +349,7 @@ class ModelCache(object):
self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}") self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
def _offload_unlocked_models(self, size_needed: int=0): def _offload_unlocked_models(self, size_needed: int=0):
reserved = self.gpu_mem_reserved * GIG reserved = self.max_vram_cache_size * GIG
vram_in_use = torch.cuda.memory_allocated() vram_in_use = torch.cuda.memory_allocated()
self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB') self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB')
for model_key, cache_entry in sorted(self._cached_models.items(), key=lambda x:x[1].size): for model_key, cache_entry in sorted(self._cached_models.items(), key=lambda x:x[1].size):

View File

@ -336,11 +336,11 @@ class ModelManager(object):
self.logger = logger self.logger = logger
self.cache = ModelCache( self.cache = ModelCache(
max_cache_size=max_cache_size, max_cache_size=max_cache_size,
max_vram_cache_size = self.app_config.max_vram_cache_size,
execution_device = device_type, execution_device = device_type,
precision = precision, precision = precision,
sequential_offload = sequential_offload, sequential_offload = sequential_offload,
logger = logger, logger = logger,
gpu_mem_reserved = self.app_config.gpu_mem_reserved
) )
self.cache_keys = dict() self.cache_keys = dict()