add support for "balanced" attention slice size

2024-08-30 20:32:17 +00:00 · 2023-08-17 16:11:09 -04:00 · 2023-08-17 16:11:09 -04:00 · b69f26c85c
commit b69f26c85c
parent 23b4e1cea0
7 changed files with 38 additions and 12 deletions
--- a/docs/features/CONFIGURATION.md
+++ b/docs/features/CONFIGURATION.md
@ -184,7 +184,7 @@ These options tune InvokeAI's memory and performance characteristics.
 |-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `sequential_guidance` | `false`       | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss                                                                                                                                          |
 | `attention_type`      | `auto`        | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp`                                                                                                                                                                  |
-| `attention_slice_size` | `auto`       | When "sliced" attention is selected, set the slice size. One of `auto`, `max` or the integers 1-8|
+| `attention_slice_size` | `auto`       | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8|
 | `force_tiled_decode`  | `false`       | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance |

 ### Device
--- a/invokeai/app/services/config/invokeai_config.py
+++ b/invokeai/app/services/config/invokeai_config.py
@ -248,7 +248,7 @@ class InvokeAIAppConfig(InvokeAISettings):
    # GENERATION
    sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", )
    attention_type      : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", )
-    attention_slice_size: Literal[tuple(["auto", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
+    attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
    force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",)
    
    # DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@ -353,7 +353,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
        # old settings for defaults
        precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto")
        device = old_opts.device
-        attention_type = "xformers" if old_opts.xformers_enabled else old_opts.attention_type
+        attention_type = old_opts.attention_type
        attention_slice_size = old_opts.attention_slice_size

        self.nextrely += 1
@ -443,7 +443,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
            name="Attention Slice Size:",
            relx=5,
            editable=False,
-            hidden=True,
+            hidden=attention_type != "sliced",
            color="CONTROL",
            scroll_exit=True,
        )
@ -453,11 +453,10 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
            columns=len(ATTENTION_SLICE_CHOICES),
            values=ATTENTION_SLICE_CHOICES,
            value=ATTENTION_SLICE_CHOICES.index(attention_slice_size),
-            begin_entry_at=2,
            relx=30,
-            hidden=True,
+            hidden=attention_type != "sliced",
            max_height=2,
-            max_width=100,
+            max_width=110,
            scroll_exit=True,
        )

@ -611,7 +610,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS
        new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
        new_opts.device = DEVICE_CHOICES[self.device.value[0]]
        new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]]
-        new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value]
+        new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]]
        generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value]
        for v in GENERATION_OPT_CHOICES:
            setattr(new_opts, v, v in generation_options)
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@ -33,7 +33,7 @@ from .diffusion import (
    PostprocessingSettings,
    BasicConditioningInfo,
 )
-from ..util import normalize_device
+from ..util import normalize_device, auto_detect_slice_size


@dataclass
@ -296,8 +296,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            return
        elif config.attention_type == "sliced":
            slice_size = config.attention_slice_size
-            if torch.backends.mps.is_available():  #  doesn't auto already do this?
-                slice_size = "max"
+            if slice_size == "auto":
+                slice_size = auto_detect_slice_size(latents)
+            elif slice_size == "balanced":
+                slice_size = "auto"
            self.enable_attention_slicing(slice_size=slice_size)
            return
        elif config.attention_type == "normal":
--- a/invokeai/backend/util/init.py
+++ b/invokeai/backend/util/init.py
@ -12,3 +12,4 @@ from .devices import (
 )
 from .log import write_log
 from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir
+from .attention import auto_detect_slice_size
--- a/invokeai/backend/util/attention.py
+++ b/invokeai/backend/util/attention.py
@ -0,0 +1,24 @@
+# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team
+"""
+Utility routine used for autodetection of optimal slice size
+for attention mechanism.
+"""
+import torch
+
+
+def auto_detect_slice_size(latents: torch.Tensor) -> str:
+    bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
+    max_size_required_for_baddbmm = (
+        16
+        * latents.size(dim=2)
+        * latents.size(dim=3)
+        * latents.size(dim=2)
+        * latents.size(dim=3)
+        * bytes_per_element_needed_for_baddbmm_duplication
+    )
+    if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0):
+        return "max"
+    elif torch.backends.mps.is_available():
+        return "max"
+    else:
+        return "balanced"
--- a/invokeai/frontend/install/widgets.py
+++ b/invokeai/frontend/install/widgets.py
@ -17,7 +17,7 @@ from shutil import get_terminal_size
 from curses import BUTTON2_CLICKED, BUTTON3_CLICKED

 # minimum size for UIs
-MIN_COLS = 130
+MIN_COLS = 150
 MIN_LINES = 40