add support for "balanced" attention slice size

This commit is contained in:
Lincoln Stein 2023-08-17 16:11:09 -04:00
parent 23b4e1cea0
commit b69f26c85c
7 changed files with 38 additions and 12 deletions

View File

@ -184,7 +184,7 @@ These options tune InvokeAI's memory and performance characteristics.
|-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss |
| `attention_type` | `auto` | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp` |
| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `max` or the integers 1-8|
| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8|
| `force_tiled_decode` | `false` | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance |
### Device

View File

@ -248,7 +248,7 @@ class InvokeAIAppConfig(InvokeAISettings):
# GENERATION
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", )
attention_type : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", )
attention_slice_size: Literal[tuple(["auto", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",)
# DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES

View File

@ -353,7 +353,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
# old settings for defaults
precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto")
device = old_opts.device
attention_type = "xformers" if old_opts.xformers_enabled else old_opts.attention_type
attention_type = old_opts.attention_type
attention_slice_size = old_opts.attention_slice_size
self.nextrely += 1
@ -443,7 +443,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
name="Attention Slice Size:",
relx=5,
editable=False,
hidden=True,
hidden=attention_type != "sliced",
color="CONTROL",
scroll_exit=True,
)
@ -453,11 +453,10 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
columns=len(ATTENTION_SLICE_CHOICES),
values=ATTENTION_SLICE_CHOICES,
value=ATTENTION_SLICE_CHOICES.index(attention_slice_size),
begin_entry_at=2,
relx=30,
hidden=True,
hidden=attention_type != "sliced",
max_height=2,
max_width=100,
max_width=110,
scroll_exit=True,
)
@ -611,7 +610,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS
new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
new_opts.device = DEVICE_CHOICES[self.device.value[0]]
new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]]
new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value]
new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]]
generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value]
for v in GENERATION_OPT_CHOICES:
setattr(new_opts, v, v in generation_options)

View File

@ -33,7 +33,7 @@ from .diffusion import (
PostprocessingSettings,
BasicConditioningInfo,
)
from ..util import normalize_device
from ..util import normalize_device, auto_detect_slice_size
@dataclass
@ -296,8 +296,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
return
elif config.attention_type == "sliced":
slice_size = config.attention_slice_size
if torch.backends.mps.is_available(): # doesn't auto already do this?
slice_size = "max"
if slice_size == "auto":
slice_size = auto_detect_slice_size(latents)
elif slice_size == "balanced":
slice_size = "auto"
self.enable_attention_slicing(slice_size=slice_size)
return
elif config.attention_type == "normal":

View File

@ -12,3 +12,4 @@ from .devices import (
)
from .log import write_log
from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir
from .attention import auto_detect_slice_size

View File

@ -0,0 +1,24 @@
# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team
"""
Utility routine used for autodetection of optimal slice size
for attention mechanism.
"""
import torch
def auto_detect_slice_size(latents: torch.Tensor) -> str:
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
max_size_required_for_baddbmm = (
16
* latents.size(dim=2)
* latents.size(dim=3)
* latents.size(dim=2)
* latents.size(dim=3)
* bytes_per_element_needed_for_baddbmm_duplication
)
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0):
return "max"
elif torch.backends.mps.is_available():
return "max"
else:
return "balanced"

View File

@ -17,7 +17,7 @@ from shutil import get_terminal_size
from curses import BUTTON2_CLICKED, BUTTON3_CLICKED
# minimum size for UIs
MIN_COLS = 130
MIN_COLS = 150
MIN_LINES = 40