From d32f9f7cb0f88390271cb85e212e9c06db9707c2 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 11 Jul 2023 15:16:35 -0400 Subject: [PATCH] reverse logic of gpu_mem_reserved - gpu_mem_reserved now indicates the amount of VRAM that will be reserved for model caching (similar to max_cache_size). --- invokeai/app/services/config.py | 4 ++-- invokeai/backend/model_management/model_cache.py | 11 ++++++----- invokeai/backend/model_management/model_manager.py | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/invokeai/app/services/config.py b/invokeai/app/services/config.py index f37145a4bf..548700e816 100644 --- a/invokeai/app/services/config.py +++ b/invokeai/app/services/config.py @@ -26,7 +26,7 @@ InvokeAI: max_cache_size: 6 always_use_cpu: false free_gpu_mem: false - gpu_mem_reserved: 1 + gpu_mem_reserved: 2.7 Features: nsfw_checker: true restore: true @@ -366,7 +366,7 @@ setting environment variables INVOKEAI_. free_gpu_mem : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance') max_loaded_models : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='Memory/Performance') max_cache_size : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance') - gpu_mem_reserved : float = Field(default=1.75, ge=0, description="Amount of VRAM to reserve for use during generation", category='Memory/Performance') + gpu_mem_reserved : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance') precision : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='float16',description='Floating point precision', category='Memory/Performance') sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance') xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance') diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index f6d3c49bc0..b3284226e1 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -37,7 +37,7 @@ from .models import BaseModelType, ModelType, SubModelType, ModelBase DEFAULT_MAX_CACHE_SIZE = 6.0 # amount of GPU memory to hold in reserve for use by generations (GB) -DEFAULT_GPU_MEM_RESERVED= 1.75 +DEFAULT_GPU_MEM_RESERVED= 2.75 # actual size of a gig GIG = 1073741824 @@ -350,17 +350,18 @@ class ModelCache(object): def _offload_unlocked_models(self, size_needed: int=0): reserved = self.gpu_mem_reserved * GIG + vram_in_use = torch.cuda.memory_allocated() + self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB') for model_key, cache_entry in sorted(self._cached_models.items(), key=lambda x:x[1].size): - free_mem, used_mem = torch.cuda.mem_get_info() - free_mem -= reserved - self.logger.debug(f'Require {(size_needed/GIG):.2f}GB VRAM. Have {(free_mem/GIG):.2f}GB available ({(reserved/GIG):.2f} reserved).') - if free_mem > size_needed: + if vram_in_use <= reserved: break if not cache_entry.locked and cache_entry.loaded: self.logger.debug(f'Offloading {model_key} from {self.execution_device} into {self.storage_device}') with VRAMUsage() as mem: cache_entry.model.to(self.storage_device) self.logger.debug(f'GPU VRAM freed: {(mem.vram_used/GIG):.2f} GB') + vram_in_use += mem.vram_used # note vram_used is negative + self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB') def _local_model_hash(self, model_path: Union[str, Path]) -> str: sha = hashlib.sha256() diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index d092e05c05..f74be90639 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -340,6 +340,7 @@ class ModelManager(object): precision = precision, sequential_offload = sequential_offload, logger = logger, + gpu_mem_reserved = self.app_config.gpu_mem_reserved ) self.cache_keys = dict()