From b69f26c85caf74b01116cb8b2cd11baaef16915f Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 17 Aug 2023 16:11:09 -0400 Subject: [PATCH] add support for "balanced" attention slice size --- docs/features/CONFIGURATION.md | 2 +- .../app/services/config/invokeai_config.py | 2 +- .../backend/install/invokeai_configure.py | 11 ++++----- .../stable_diffusion/diffusers_pipeline.py | 8 ++++--- invokeai/backend/util/__init__.py | 1 + invokeai/backend/util/attention.py | 24 +++++++++++++++++++ invokeai/frontend/install/widgets.py | 2 +- 7 files changed, 38 insertions(+), 12 deletions(-) create mode 100644 invokeai/backend/util/attention.py diff --git a/docs/features/CONFIGURATION.md b/docs/features/CONFIGURATION.md index e93faa3ecc..6920d3d97f 100644 --- a/docs/features/CONFIGURATION.md +++ b/docs/features/CONFIGURATION.md @@ -184,7 +184,7 @@ These options tune InvokeAI's memory and performance characteristics. |-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss | | `attention_type` | `auto` | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp` | -| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `max` or the integers 1-8| +| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8| | `force_tiled_decode` | `false` | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance | ### Device diff --git a/invokeai/app/services/config/invokeai_config.py b/invokeai/app/services/config/invokeai_config.py index 11f3742075..ebf063c827 100644 --- a/invokeai/app/services/config/invokeai_config.py +++ b/invokeai/app/services/config/invokeai_config.py @@ -248,7 +248,7 @@ class InvokeAIAppConfig(InvokeAISettings): # GENERATION sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", ) attention_type : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", ) - attention_slice_size: Literal[tuple(["auto", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", ) + attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", ) force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",) # DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES diff --git a/invokeai/backend/install/invokeai_configure.py b/invokeai/backend/install/invokeai_configure.py index 5f2ff3534c..fcce4ceab8 100755 --- a/invokeai/backend/install/invokeai_configure.py +++ b/invokeai/backend/install/invokeai_configure.py @@ -353,7 +353,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle. # old settings for defaults precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto") device = old_opts.device - attention_type = "xformers" if old_opts.xformers_enabled else old_opts.attention_type + attention_type = old_opts.attention_type attention_slice_size = old_opts.attention_slice_size self.nextrely += 1 @@ -443,7 +443,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle. name="Attention Slice Size:", relx=5, editable=False, - hidden=True, + hidden=attention_type != "sliced", color="CONTROL", scroll_exit=True, ) @@ -453,11 +453,10 @@ Use cursor arrows to make a checkbox selection, and space to toggle. columns=len(ATTENTION_SLICE_CHOICES), values=ATTENTION_SLICE_CHOICES, value=ATTENTION_SLICE_CHOICES.index(attention_slice_size), - begin_entry_at=2, relx=30, - hidden=True, + hidden=attention_type != "sliced", max_height=2, - max_width=100, + max_width=110, scroll_exit=True, ) @@ -611,7 +610,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS new_opts.precision = PRECISION_CHOICES[self.precision.value[0]] new_opts.device = DEVICE_CHOICES[self.device.value[0]] new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]] - new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value] + new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]] generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value] for v in GENERATION_OPT_CHOICES: setattr(new_opts, v, v in generation_options) diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index c6785f82f4..63b0c78b51 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -33,7 +33,7 @@ from .diffusion import ( PostprocessingSettings, BasicConditioningInfo, ) -from ..util import normalize_device +from ..util import normalize_device, auto_detect_slice_size @dataclass @@ -296,8 +296,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): return elif config.attention_type == "sliced": slice_size = config.attention_slice_size - if torch.backends.mps.is_available(): # doesn't auto already do this? - slice_size = "max" + if slice_size == "auto": + slice_size = auto_detect_slice_size(latents) + elif slice_size == "balanced": + slice_size = "auto" self.enable_attention_slicing(slice_size=slice_size) return elif config.attention_type == "normal": diff --git a/invokeai/backend/util/__init__.py b/invokeai/backend/util/__init__.py index 2e69af5382..f607a33c4c 100644 --- a/invokeai/backend/util/__init__.py +++ b/invokeai/backend/util/__init__.py @@ -12,3 +12,4 @@ from .devices import ( ) from .log import write_log from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir +from .attention import auto_detect_slice_size diff --git a/invokeai/backend/util/attention.py b/invokeai/backend/util/attention.py new file mode 100644 index 0000000000..ef80898c2e --- /dev/null +++ b/invokeai/backend/util/attention.py @@ -0,0 +1,24 @@ +# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team +""" +Utility routine used for autodetection of optimal slice size +for attention mechanism. +""" +import torch + + +def auto_detect_slice_size(latents: torch.Tensor) -> str: + bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4 + max_size_required_for_baddbmm = ( + 16 + * latents.size(dim=2) + * latents.size(dim=3) + * latents.size(dim=2) + * latents.size(dim=3) + * bytes_per_element_needed_for_baddbmm_duplication + ) + if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): + return "max" + elif torch.backends.mps.is_available(): + return "max" + else: + return "balanced" diff --git a/invokeai/frontend/install/widgets.py b/invokeai/frontend/install/widgets.py index 01580e5846..a75dd2c6fd 100644 --- a/invokeai/frontend/install/widgets.py +++ b/invokeai/frontend/install/widgets.py @@ -17,7 +17,7 @@ from shutil import get_terminal_size from curses import BUTTON2_CLICKED, BUTTON3_CLICKED # minimum size for UIs -MIN_COLS = 130 +MIN_COLS = 150 MIN_LINES = 40