From 185c2e23547b5416a2d9a33ce6a162fd285c138f Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 3 Apr 2024 09:36:14 -0400 Subject: [PATCH] add a config variable that disable VRAM OOM conditions --- invokeai/app/services/config/config_default.py | 2 ++ .../app/services/model_manager/model_manager_default.py | 1 + .../model_manager/load/model_cache/model_cache_default.py | 8 +++++++- tests/test_docs.py | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/invokeai/app/services/config/config_default.py b/invokeai/app/services/config/config_default.py index 247835d533..40ed461172 100644 --- a/invokeai/app/services/config/config_default.py +++ b/invokeai/app/services/config/config_default.py @@ -103,6 +103,7 @@ class InvokeAIAppConfig(BaseSettings): convert_cache: Maximum size of on-disk converted models cache (GB). lazy_offload: Keep models in VRAM until their space is needed. log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour. + disable_vram_check: If True, disable the check for sufficient VRAM memory prior to loading a model. This may lead to unpredictable behavior, so use for debugging memory problems only. device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.
Valid values: `auto`, `cpu`, `cuda`, `cuda:1`, `mps` precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.
Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast` sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements. @@ -171,6 +172,7 @@ class InvokeAIAppConfig(BaseSettings): convert_cache: float = Field(default=DEFAULT_CONVERT_CACHE, ge=0, description="Maximum size of on-disk converted models cache (GB).") lazy_offload: bool = Field(default=True, description="Keep models in VRAM until their space is needed.") log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.") + disable_vram_check: bool = Field(default=False, description="If True, disable the check for sufficient VRAM memory prior to loading a model. This may lead to unpredictable behavior, so use for debugging memory problems only.") # DEVICE device: DEVICE = Field(default="auto", description="Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.") diff --git a/invokeai/app/services/model_manager/model_manager_default.py b/invokeai/app/services/model_manager/model_manager_default.py index b160ff6fed..dfffd7954a 100644 --- a/invokeai/app/services/model_manager/model_manager_default.py +++ b/invokeai/app/services/model_manager/model_manager_default.py @@ -82,6 +82,7 @@ class ModelManagerService(ModelManagerServiceBase): max_vram_cache_size=app_config.vram, logger=logger, execution_device=execution_device, + disable_memory_check=app_config.disable_vram_check, ) convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache) loader = ModelLoadService( diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py index 6173d48abe..668e9e4d33 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py @@ -68,6 +68,7 @@ class ModelCache(ModelCacheBase[AnyModel]): sha_chunksize: int = 16777216, log_memory_usage: bool = False, logger: Optional[Logger] = None, + disable_memory_check: bool = False, ): """ Initialize the model RAM cache. @@ -82,6 +83,7 @@ class ModelCache(ModelCacheBase[AnyModel]): operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's behaviour. + :param disable_memory_check: If True disable the check for insufficient VRAM when loading a model. """ # allow lazy offloading only when vram cache enabled self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0 @@ -93,6 +95,7 @@ class ModelCache(ModelCacheBase[AnyModel]): self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__) self._log_memory_usage = log_memory_usage self._stats: Optional[CacheStats] = None + self._disable_memory_check = disable_memory_check self._cached_models: Dict[str, CacheRecord[AnyModel]] = {} self._cache_stack: List[str] = [] @@ -270,7 +273,10 @@ class ModelCache(ModelCacheBase[AnyModel]): return # may raise an exception here if insufficient GPU VRAM - self._check_free_vram(target_device, cache_entry.size) + if self._disable_memory_check: + self.logger.warning("VRAM memory check disabled. Unpredictable behavior may result.") + else: + self._check_free_vram(target_device, cache_entry.size) start_model_to_time = time.time() snapshot_before = self._capture_memory_snapshot() diff --git a/tests/test_docs.py b/tests/test_docs.py index 33240d526a..e577c6bbfb 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -3,7 +3,7 @@ from scripts.update_config_docstring import generate_config_docstrings def test_app_config_docstrings_are_current(): - # If this test fails, run `python scripts/generate_config_docstring.py`. See the comments in that script for + # If this test fails, run `python scripts/update_config_docstring.py`. See the comments in that script for # an explanation of why this is necessary. # # A make target is provided to run the script: `make update-config-docstring`.