From b69f26c85caf74b01116cb8b2cd11baaef16915f Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lstein@gmail.com>
Date: Thu, 17 Aug 2023 16:11:09 -0400
Subject: [PATCH] add support for "balanced" attention slice size

---
 docs/features/CONFIGURATION.md                |  2 +-
 .../app/services/config/invokeai_config.py    |  2 +-
 .../backend/install/invokeai_configure.py     | 11 ++++-----
 .../stable_diffusion/diffusers_pipeline.py    |  8 ++++---
 invokeai/backend/util/__init__.py             |  1 +
 invokeai/backend/util/attention.py            | 24 +++++++++++++++++++
 invokeai/frontend/install/widgets.py          |  2 +-
 7 files changed, 38 insertions(+), 12 deletions(-)
 create mode 100644 invokeai/backend/util/attention.py

diff --git a/docs/features/CONFIGURATION.md b/docs/features/CONFIGURATION.md
index e93faa3ecc..6920d3d97f 100644
--- a/docs/features/CONFIGURATION.md
+++ b/docs/features/CONFIGURATION.md
@@ -184,7 +184,7 @@ These options tune InvokeAI's memory and performance characteristics.
 |-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `sequential_guidance` | `false`       | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss                                                                                                                                          |
 | `attention_type`      | `auto`        | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp`                                                                                                                                                                  |
-| `attention_slice_size` | `auto`       | When "sliced" attention is selected, set the slice size. One of `auto`, `max` or the integers 1-8|
+| `attention_slice_size` | `auto`       | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8|
 | `force_tiled_decode`  | `false`       | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance |
 
 ### Device
diff --git a/invokeai/app/services/config/invokeai_config.py b/invokeai/app/services/config/invokeai_config.py
index 11f3742075..ebf063c827 100644
--- a/invokeai/app/services/config/invokeai_config.py
+++ b/invokeai/app/services/config/invokeai_config.py
@@ -248,7 +248,7 @@ class InvokeAIAppConfig(InvokeAISettings):
     # GENERATION
     sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", )
     attention_type      : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", )
-    attention_slice_size: Literal[tuple(["auto", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
+    attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
     force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",)
     
     # DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES
diff --git a/invokeai/backend/install/invokeai_configure.py b/invokeai/backend/install/invokeai_configure.py
index 5f2ff3534c..fcce4ceab8 100755
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@@ -353,7 +353,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
         # old settings for defaults
         precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto")
         device = old_opts.device
-        attention_type = "xformers" if old_opts.xformers_enabled else old_opts.attention_type
+        attention_type = old_opts.attention_type
         attention_slice_size = old_opts.attention_slice_size
 
         self.nextrely += 1
@@ -443,7 +443,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
             name="Attention Slice Size:",
             relx=5,
             editable=False,
-            hidden=True,
+            hidden=attention_type != "sliced",
             color="CONTROL",
             scroll_exit=True,
         )
@@ -453,11 +453,10 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
             columns=len(ATTENTION_SLICE_CHOICES),
             values=ATTENTION_SLICE_CHOICES,
             value=ATTENTION_SLICE_CHOICES.index(attention_slice_size),
-            begin_entry_at=2,
             relx=30,
-            hidden=True,
+            hidden=attention_type != "sliced",
             max_height=2,
-            max_width=100,
+            max_width=110,
             scroll_exit=True,
         )
 
@@ -611,7 +610,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS
         new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
         new_opts.device = DEVICE_CHOICES[self.device.value[0]]
         new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]]
-        new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value]
+        new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]]
         generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value]
         for v in GENERATION_OPT_CHOICES:
             setattr(new_opts, v, v in generation_options)
diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
index c6785f82f4..63b0c78b51 100644
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -33,7 +33,7 @@ from .diffusion import (
     PostprocessingSettings,
     BasicConditioningInfo,
 )
-from ..util import normalize_device
+from ..util import normalize_device, auto_detect_slice_size
 
 
 @dataclass
@@ -296,8 +296,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
             return
         elif config.attention_type == "sliced":
             slice_size = config.attention_slice_size
-            if torch.backends.mps.is_available():  #  doesn't auto already do this?
-                slice_size = "max"
+            if slice_size == "auto":
+                slice_size = auto_detect_slice_size(latents)
+            elif slice_size == "balanced":
+                slice_size = "auto"
             self.enable_attention_slicing(slice_size=slice_size)
             return
         elif config.attention_type == "normal":
diff --git a/invokeai/backend/util/__init__.py b/invokeai/backend/util/__init__.py
index 2e69af5382..f607a33c4c 100644
--- a/invokeai/backend/util/__init__.py
+++ b/invokeai/backend/util/__init__.py
@@ -12,3 +12,4 @@ from .devices import (
 )
 from .log import write_log
 from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir
+from .attention import auto_detect_slice_size
diff --git a/invokeai/backend/util/attention.py b/invokeai/backend/util/attention.py
new file mode 100644
index 0000000000..ef80898c2e
--- /dev/null
+++ b/invokeai/backend/util/attention.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team
+"""
+Utility routine used for autodetection of optimal slice size
+for attention mechanism.
+"""
+import torch
+
+
+def auto_detect_slice_size(latents: torch.Tensor) -> str:
+    bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
+    max_size_required_for_baddbmm = (
+        16
+        * latents.size(dim=2)
+        * latents.size(dim=3)
+        * latents.size(dim=2)
+        * latents.size(dim=3)
+        * bytes_per_element_needed_for_baddbmm_duplication
+    )
+    if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0):
+        return "max"
+    elif torch.backends.mps.is_available():
+        return "max"
+    else:
+        return "balanced"
diff --git a/invokeai/frontend/install/widgets.py b/invokeai/frontend/install/widgets.py
index 01580e5846..a75dd2c6fd 100644
--- a/invokeai/frontend/install/widgets.py
+++ b/invokeai/frontend/install/widgets.py
@@ -17,7 +17,7 @@ from shutil import get_terminal_size
 from curses import BUTTON2_CLICKED, BUTTON3_CLICKED
 
 # minimum size for UIs
-MIN_COLS = 130
+MIN_COLS = 150
 MIN_LINES = 40