add a config variable that disable VRAM OOM conditions

This commit is contained in:
Lincoln Stein 2024-04-03 09:36:14 -04:00
parent 132aadca15
commit 185c2e2354
4 changed files with 11 additions and 2 deletions

View File

@ -103,6 +103,7 @@ class InvokeAIAppConfig(BaseSettings):
convert_cache: Maximum size of on-disk converted models cache (GB). convert_cache: Maximum size of on-disk converted models cache (GB).
lazy_offload: Keep models in VRAM until their space is needed. lazy_offload: Keep models in VRAM until their space is needed.
log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour. log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
disable_vram_check: If True, disable the check for sufficient VRAM memory prior to loading a model. This may lead to unpredictable behavior, so use for debugging memory problems only.
device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda`, `cuda:1`, `mps` device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda`, `cuda:1`, `mps`
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast` precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast`
sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements. sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements.
@ -171,6 +172,7 @@ class InvokeAIAppConfig(BaseSettings):
convert_cache: float = Field(default=DEFAULT_CONVERT_CACHE, ge=0, description="Maximum size of on-disk converted models cache (GB).") convert_cache: float = Field(default=DEFAULT_CONVERT_CACHE, ge=0, description="Maximum size of on-disk converted models cache (GB).")
lazy_offload: bool = Field(default=True, description="Keep models in VRAM until their space is needed.") lazy_offload: bool = Field(default=True, description="Keep models in VRAM until their space is needed.")
log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.") log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.")
disable_vram_check: bool = Field(default=False, description="If True, disable the check for sufficient VRAM memory prior to loading a model. This may lead to unpredictable behavior, so use for debugging memory problems only.")
# DEVICE # DEVICE
device: DEVICE = Field(default="auto", description="Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.") device: DEVICE = Field(default="auto", description="Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.")

View File

@ -82,6 +82,7 @@ class ModelManagerService(ModelManagerServiceBase):
max_vram_cache_size=app_config.vram, max_vram_cache_size=app_config.vram,
logger=logger, logger=logger,
execution_device=execution_device, execution_device=execution_device,
disable_memory_check=app_config.disable_vram_check,
) )
convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache) convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache)
loader = ModelLoadService( loader = ModelLoadService(

View File

@ -68,6 +68,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
sha_chunksize: int = 16777216, sha_chunksize: int = 16777216,
log_memory_usage: bool = False, log_memory_usage: bool = False,
logger: Optional[Logger] = None, logger: Optional[Logger] = None,
disable_memory_check: bool = False,
): ):
""" """
Initialize the model RAM cache. Initialize the model RAM cache.
@ -82,6 +83,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
operation, and the result will be logged (at debug level). There is a time cost to capturing the memory operation, and the result will be logged (at debug level). There is a time cost to capturing the memory
snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
behaviour. behaviour.
:param disable_memory_check: If True disable the check for insufficient VRAM when loading a model.
""" """
# allow lazy offloading only when vram cache enabled # allow lazy offloading only when vram cache enabled
self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0 self._lazy_offloading = lazy_offloading and max_vram_cache_size > 0
@ -93,6 +95,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__) self._logger = logger or InvokeAILogger.get_logger(self.__class__.__name__)
self._log_memory_usage = log_memory_usage self._log_memory_usage = log_memory_usage
self._stats: Optional[CacheStats] = None self._stats: Optional[CacheStats] = None
self._disable_memory_check = disable_memory_check
self._cached_models: Dict[str, CacheRecord[AnyModel]] = {} self._cached_models: Dict[str, CacheRecord[AnyModel]] = {}
self._cache_stack: List[str] = [] self._cache_stack: List[str] = []
@ -270,7 +273,10 @@ class ModelCache(ModelCacheBase[AnyModel]):
return return
# may raise an exception here if insufficient GPU VRAM # may raise an exception here if insufficient GPU VRAM
self._check_free_vram(target_device, cache_entry.size) if self._disable_memory_check:
self.logger.warning("VRAM memory check disabled. Unpredictable behavior may result.")
else:
self._check_free_vram(target_device, cache_entry.size)
start_model_to_time = time.time() start_model_to_time = time.time()
snapshot_before = self._capture_memory_snapshot() snapshot_before = self._capture_memory_snapshot()

View File

@ -3,7 +3,7 @@ from scripts.update_config_docstring import generate_config_docstrings
def test_app_config_docstrings_are_current(): def test_app_config_docstrings_are_current():
# If this test fails, run `python scripts/generate_config_docstring.py`. See the comments in that script for # If this test fails, run `python scripts/update_config_docstring.py`. See the comments in that script for
# an explanation of why this is necessary. # an explanation of why this is necessary.
# #
# A make target is provided to run the script: `make update-config-docstring`. # A make target is provided to run the script: `make update-config-docstring`.