expose max_cache_size to invokeai-configure interface (#3664)

This PR allows the user to set the model manager cache size from within the `invokeia-configure` TUI.
2024-08-30 20:32:17 +00:00 · 2023-07-07 01:58:22 +12:00
parent 2595c1d86f b229fe19aa
commit 7a0154a7b8
2 changed files with 9 additions and 22 deletions
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@ -430,13 +430,13 @@ to allow InvokeAI to download restricted styles & subjects from the "Concept Lib
            max_height=len(PRECISION_CHOICES) + 1,
            scroll_exit=True,
        )
-        self.max_loaded_models = self.add_widget_intelligent(
+        self.max_cache_size = self.add_widget_intelligent(
            IntTitleSlider,
-            name="Number of models to cache in CPU memory (each will use 2-4 GB!)",
-            value=old_opts.max_loaded_models,
-            out_of=10,
-            lowest=1,
-            begin_entry_at=4,
+            name="Size of the RAM cache used for fast model switching (GB)",
+            value=old_opts.max_cache_size,
+            out_of=20,
+            lowest=3,
+            begin_entry_at=6,
            scroll_exit=True,
        )
        self.nextrely += 1
@ -539,7 +539,7 @@ https://huggingface.co/spaces/CompVis/stable-diffusion-license
                "outdir",
                "nsfw_checker",
                "free_gpu_mem",
-                "max_loaded_models",
+                "max_cache_size",
                "xformers_enabled",
                "always_use_cpu",
        ]:
@ -555,9 +555,6 @@ https://huggingface.co/spaces/CompVis/stable-diffusion-license
        new_opts.license_acceptance = self.license_acceptance.value
        new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
        
-        # widget library workaround to make max_loaded_models an int rather than a float
-        new_opts.max_loaded_models = int(new_opts.max_loaded_models)
-
        return new_opts


--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -8,7 +8,7 @@ The cache returns context manager generators designed to load the
 model into the GPU within the context, and unload outside the
 context. Use like this:

-   cache = ModelCache(max_models_cached=6)
+   cache = ModelCache(max_cache_size=7.5)
   with cache.get_model('runwayml/stable-diffusion-1-5') as SD1,
          cache.get_model('stabilityai/stable-diffusion-2') as SD2:
       do_something_in_GPU(SD1,SD2)
@ -91,7 +91,7 @@ class ModelCache(object):
        logger: types.ModuleType = logger
    ):
        '''
-        :param max_models: Maximum number of models to cache in CPU RAM [4]
+        :param max_cache_size: Maximum size of the RAM cache [6.0 GB]
        :param execution_device: Torch device to load active model into [torch.device('cuda')]
        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
        :param precision: Precision for loaded models [torch.float16]
@ -126,16 +126,6 @@ class ModelCache(object):
            key += f":{submodel_type}"
        return key

-    #def get_model(
-    #    self,
-    #    repo_id_or_path: Union[str, Path],
-    #    model_type: ModelType = ModelType.Diffusers,
-    #    subfolder: Path = None,
-    #    submodel: ModelType = None,
-    #    revision: str = None,
-    #    attach_model_part: Tuple[ModelType, str] = (None, None),
-    #    gpu_load: bool = True,
-    #) -> ModelLocker:  # ?? what does it return
    def _get_model_info(
        self,
        model_path: str,