expose max_cache_size to invokeai-configure interface

2024-08-30 20:32:17 +00:00 · 2023-07-05 20:59:14 -04:00
parent bf25818d76
commit 0a6dccd607
2 changed files with 9 additions and 22 deletions
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@ -430,13 +430,13 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
            max_height=len(PRECISION_CHOICES) + 1,
            scroll_exit=True,
        )
-        self.max_loaded_models = self.add_widget_intelligent(
+        self.max_cache_size = self.add_widget_intelligent(
            IntTitleSlider,
-            name="Number of models to cache in CPU memory (each will use 2-4 GB!)",
+            name="Size of the RAM cache used for fast model switching (GB)",
-            value=old_opts.max_loaded_models,
+            value=old_opts.max_cache_size,
-            out_of=10,
+            out_of=20,
-            lowest=1,
+            lowest=3,
-            begin_entry_at=4,
+            begin_entry_at=6,
            scroll_exit=True,
        )
        self.nextrely += 1
@ -539,7 +539,7 @@ https://huggingface.co/spaces/CompVis/stable-diffusion-license
                "outdir",
                "nsfw_checker",
                "free_gpu_mem",
-                "max_loaded_models",
+                "max_cache_size",
                "xformers_enabled",
                "always_use_cpu",
        ]:
@ -555,9 +555,6 @@ https://huggingface.co/spaces/CompVis/stable-diffusion-license
        new_opts.license_acceptance = self.license_acceptance.value
        new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
        # widget library workaround to make max_loaded_models an int rather than a float
        new_opts.max_loaded_models = int(new_opts.max_loaded_models)
        return new_opts
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -8,7 +8,7 @@ The cache returns context manager generators designed to load the
 model into the GPU within the context, and unload outside the
 context. Use like this:
-   cache = ModelCache(max_models_cached=6)
+   cache = ModelCache(max_cache_size=7.5)
   with cache.get_model('runwayml/stable-diffusion-1-5') as SD1,
          cache.get_model('stabilityai/stable-diffusion-2') as SD2:
       do_something_in_GPU(SD1,SD2)
@ -91,7 +91,7 @@ class ModelCache(object):
        logger: types.ModuleType = logger
    ):
        '''
-        :param max_models: Maximum number of models to cache in CPU RAM [4]
+        :param max_cache_size: Maximum size of the RAM cache [6.0 GB]
        :param execution_device: Torch device to load active model into [torch.device('cuda')]
        :param storage_device: Torch device to save inactive model in [torch.device('cpu')]
        :param precision: Precision for loaded models [torch.float16]
@ -126,16 +126,6 @@ class ModelCache(object):
            key += f":{submodel_type}"
        return key
    #def get_model(
    #    self,
    #    repo_id_or_path: Union[str, Path],
    #    model_type: ModelType = ModelType.Diffusers,
    #    subfolder: Path = None,
    #    submodel: ModelType = None,
    #    revision: str = None,
    #    attach_model_part: Tuple[ModelType, str] = (None, None),
    #    gpu_load: bool = True,
    #) -> ModelLocker:  # ?? what does it return
    def _get_model_info(
        self,
        model_path: str,