diff --git a/invokeai/app/services/config/config_default.py b/invokeai/app/services/config/config_default.py index ce2302a268..1ac4444aad 100644 --- a/invokeai/app/services/config/config_default.py +++ b/invokeai/app/services/config/config_default.py @@ -105,7 +105,6 @@ class InvokeAIAppConfig(BaseSettings): vram: Amount of VRAM reserved for model storage (GB). convert_cache: Maximum size of on-disk converted models cache (GB). lazy_offload: Keep models in VRAM until their space is needed. - load_sd3_encoder_3: Load the memory-intensive SD3 text_encoder_3. log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour. device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.
Valid values: `auto`, `cpu`, `cuda`, `cuda:1`, `mps` precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.
Valid values: `auto`, `float16`, `bfloat16`, `float32` @@ -176,7 +175,6 @@ class InvokeAIAppConfig(BaseSettings): vram: float = Field(default=DEFAULT_VRAM_CACHE, ge=0, description="Amount of VRAM reserved for model storage (GB).") convert_cache: float = Field(default=DEFAULT_CONVERT_CACHE, ge=0, description="Maximum size of on-disk converted models cache (GB).") lazy_offload: bool = Field(default=True, description="Keep models in VRAM until their space is needed.") - load_sd3_encoder_3: bool = Field(default=False, description="Load the memory-intensive SD3 text_encoder_3.") log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.") # DEVICE diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py index 8194f0befa..11ae507146 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py @@ -241,9 +241,15 @@ class ModelCache(ModelCacheBase[AnyModel]): if vram_in_use <= reserved: break - # only way to remove a quantized model from VRAM is to + # Special handling of the stable-diffusion-3:text_encoder_3 + # submodel, when the user has loaded a quantized model. + # The only way to remove the quantized version of this model from VRAM is to # delete it completely - it can't be moved from device to device + # This also contains a workaround for quantized models that + # persist indefinitely in VRAM if cache_entry.is_quantized: + self._empty_quantized_state_dict(cache_entry.model) + cache_entry.model = None self._delete_cache_entry(cache_entry) vram_in_use = torch.cuda.memory_allocated() + size_required continue @@ -426,3 +432,17 @@ class ModelCache(ModelCacheBase[AnyModel]): del cache_entry gc.collect() TorchDevice.empty_cache() + + def _empty_quantized_state_dict(self, model: AnyModel) -> None: + """Set all keys of a model's state dict to None. + + This is a partial workaround for a poorly-understood bug in + transformers' support for quantized T5EncoderModels (text_encoder_3 + of SD3). This allows most of the model to be unloaded from VRAM, but + still leaks 8K of VRAM each time the model is unloaded. Using the quantized + version of stable-diffusion-3-medium is NOT recommended. + """ + assert isinstance(model, torch.nn.Module) + sd = model.state_dict() + for k in sd.keys(): + sd[k] = None diff --git a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py index 5e0cb508cf..d26e055925 100644 --- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py @@ -40,7 +40,7 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader): model_base_to_model_type = { BaseModelType.StableDiffusion1: "FrozenCLIPEmbedder", BaseModelType.StableDiffusion2: "FrozenOpenCLIPEmbedder", - BaseModelType.StableDiffusion3: "SD3", # non-functional, for completeness only + BaseModelType.StableDiffusion3: "SD3", BaseModelType.StableDiffusionXL: "SDXL", BaseModelType.StableDiffusionXLRefiner: "SDXL-Refiner", } diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 31b16d9c8a..98c6653e6a 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -122,6 +122,13 @@ STARTER_MODELS: list[StarterModel] = [ type=ModelType.Main, dependencies=[sdxl_fp16_vae_fix], ), + StarterModel( + name="Stable Diffusion 3", + base=BaseModelType.StableDiffusion3, + source="stabilityai/stable-diffusion-3-medium-diffusers", + description="The OG Stable Diffusion 3 base model (beta).", + type=ModelType.Main, + ), # endregion # region VAE sdxl_fp16_vae_fix, diff --git a/pyproject.toml b/pyproject.toml index e816343efb..11962c7e79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,26 +34,26 @@ classifiers = [ dependencies = [ # Core generation dependencies, pinned for reproducible builds. "accelerate==0.30.1", - "bitsandbytes", + "bitsandbytes==0.43.1", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", "controlnet-aux==0.0.7", - "diffusers[torch]", + "diffusers[torch]==0.29.0", "invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids "mediapipe==0.10.7", # needed for "mediapipeface" controlnet model - "numpy==1.26.4", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal() + "numpy==1.23.5", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal() "onnx==1.15.0", "onnxruntime==1.16.3", "opencv-python==4.9.0.80", - "pytorch-lightning==2.1.3", + "pytorch-lightning", "safetensors==0.4.3", "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 - "torch", + "torch==2.2.2", "torchmetrics==0.11.4", "torchsde==0.2.6", - "torchvision", - "transformers", - "sentencepiece", + "torchvision==0.17.2", + "transformers==4.41.1", + "sentencepiece==0.1.99", # Core application dependencies, pinned for reproducible builds. "fastapi-events==0.11.0",