From dab03fb6460b5ba233216f7903f99f800b9297c8 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lstein@gmail.com>
Date: Tue, 11 Jul 2023 15:25:39 -0400
Subject: [PATCH] rename gpu_mem_reserved to max_vram_cache_size

To be consistent with max_cache_size, the amount of memory to hold in
VRAM for model caching is now controlled by the max_vram_cache_size
configuration parameter.
---
 invokeai/app/services/config.py                    | 9 +++++----
 invokeai/backend/model_management/model_cache.py   | 8 ++++----
 invokeai/backend/model_management/model_manager.py | 2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/invokeai/app/services/config.py b/invokeai/app/services/config.py
index 548700e816..e5d1612ed6 100644
--- a/invokeai/app/services/config.py
+++ b/invokeai/app/services/config.py
@@ -24,9 +24,9 @@ InvokeAI:
     sequential_guidance: false
     precision: float16
     max_cache_size: 6
+    max_vram_cache_size: 2.7
     always_use_cpu: false
     free_gpu_mem: false
-    gpu_mem_reserved: 2.7
   Features:
     nsfw_checker: true
     restore: true
@@ -271,7 +271,7 @@ class InvokeAISettings(BaseSettings):
 
     @classmethod
     def _excluded(self)->List[str]:
-        return ['type','initconf']
+        return ['type','initconf', 'gpu_mem_reserved', 'max_loaded_models']
 
     class Config:
         env_file_encoding = 'utf-8'
@@ -364,9 +364,10 @@ setting environment variables INVOKEAI_<setting>.
 
     always_use_cpu      : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance')
     free_gpu_mem        : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance')
-    max_loaded_models   : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='Memory/Performance')
+    max_loaded_models   : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='DEPRECATED')
     max_cache_size      : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance')
-    gpu_mem_reserved    : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
+    max_vram_cache_size : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
+    gpu_mem_reserved    : float = Field(default=2.75, ge=0, description="DEPRECATED: use max_vram_cache_size. Amount of VRAM reserved for model storage", category='DEPRECATED')
     precision           : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='float16',description='Floating point precision', category='Memory/Performance')
     sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance')
     xformers_enabled    : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')
diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index b3284226e1..e4cba3517e 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -37,7 +37,7 @@ from .models import BaseModelType, ModelType, SubModelType, ModelBase
 DEFAULT_MAX_CACHE_SIZE = 6.0
 
 # amount of GPU memory to hold in reserve for use by generations (GB)
-DEFAULT_GPU_MEM_RESERVED= 2.75
+DEFAULT_MAX_VRAM_CACHE_SIZE= 2.75
 
 # actual size of a gig
 GIG = 1073741824
@@ -85,13 +85,13 @@ class ModelCache(object):
     def __init__(
         self,
         max_cache_size: float=DEFAULT_MAX_CACHE_SIZE,
+        max_vram_cache_size: float=DEFAULT_MAX_VRAM_CACHE_SIZE,
         execution_device: torch.device=torch.device('cuda'),
         storage_device: torch.device=torch.device('cpu'),
         precision: torch.dtype=torch.float16,
         sequential_offload: bool=False,
         lazy_offloading: bool=True,
         sha_chunksize: int = 16777216,
-        gpu_mem_reserved: float=DEFAULT_GPU_MEM_RESERVED,
         logger: types.ModuleType = logger
     ):
         '''
@@ -107,7 +107,7 @@ class ModelCache(object):
         self.lazy_offloading = lazy_offloading
         self.precision: torch.dtype=precision
         self.max_cache_size: float=max_cache_size
-        self.gpu_mem_reserved: float=gpu_mem_reserved
+        self.max_vram_cache_size: float=max_vram_cache_size
         self.execution_device: torch.device=execution_device
         self.storage_device: torch.device=storage_device
         self.sha_chunksize=sha_chunksize
@@ -349,7 +349,7 @@ class ModelCache(object):
         self.logger.debug(f"After unloading: cached_models={len(self._cached_models)}")
 
     def _offload_unlocked_models(self, size_needed: int=0):
-        reserved = self.gpu_mem_reserved * GIG
+        reserved = self.max_vram_cache_size * GIG
         vram_in_use = torch.cuda.memory_allocated()
         self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB')
         for model_key, cache_entry in sorted(self._cached_models.items(), key=lambda x:x[1].size):
diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py
index f74be90639..6670ca06ae 100644
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@@ -336,11 +336,11 @@ class ModelManager(object):
         self.logger = logger
         self.cache = ModelCache(
             max_cache_size=max_cache_size,
+            max_vram_cache_size = self.app_config.max_vram_cache_size,
             execution_device = device_type,
             precision = precision,
             sequential_offload = sequential_offload,
             logger = logger,
-            gpu_mem_reserved = self.app_config.gpu_mem_reserved
         )
         self.cache_keys = dict()