mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
add support for "balanced" attention slice size
This commit is contained in:
parent
23b4e1cea0
commit
b69f26c85c
@ -184,7 +184,7 @@ These options tune InvokeAI's memory and performance characteristics.
|
||||
|-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss |
|
||||
| `attention_type` | `auto` | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp` |
|
||||
| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `max` or the integers 1-8|
|
||||
| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8|
|
||||
| `force_tiled_decode` | `false` | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance |
|
||||
|
||||
### Device
|
||||
|
@ -248,7 +248,7 @@ class InvokeAIAppConfig(InvokeAISettings):
|
||||
# GENERATION
|
||||
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", )
|
||||
attention_type : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", )
|
||||
attention_slice_size: Literal[tuple(["auto", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
|
||||
attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
|
||||
force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",)
|
||||
|
||||
# DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES
|
||||
|
@ -353,7 +353,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
||||
# old settings for defaults
|
||||
precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto")
|
||||
device = old_opts.device
|
||||
attention_type = "xformers" if old_opts.xformers_enabled else old_opts.attention_type
|
||||
attention_type = old_opts.attention_type
|
||||
attention_slice_size = old_opts.attention_slice_size
|
||||
|
||||
self.nextrely += 1
|
||||
@ -443,7 +443,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
||||
name="Attention Slice Size:",
|
||||
relx=5,
|
||||
editable=False,
|
||||
hidden=True,
|
||||
hidden=attention_type != "sliced",
|
||||
color="CONTROL",
|
||||
scroll_exit=True,
|
||||
)
|
||||
@ -453,11 +453,10 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
||||
columns=len(ATTENTION_SLICE_CHOICES),
|
||||
values=ATTENTION_SLICE_CHOICES,
|
||||
value=ATTENTION_SLICE_CHOICES.index(attention_slice_size),
|
||||
begin_entry_at=2,
|
||||
relx=30,
|
||||
hidden=True,
|
||||
hidden=attention_type != "sliced",
|
||||
max_height=2,
|
||||
max_width=100,
|
||||
max_width=110,
|
||||
scroll_exit=True,
|
||||
)
|
||||
|
||||
@ -611,7 +610,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS
|
||||
new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
|
||||
new_opts.device = DEVICE_CHOICES[self.device.value[0]]
|
||||
new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]]
|
||||
new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value]
|
||||
new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]]
|
||||
generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value]
|
||||
for v in GENERATION_OPT_CHOICES:
|
||||
setattr(new_opts, v, v in generation_options)
|
||||
|
@ -33,7 +33,7 @@ from .diffusion import (
|
||||
PostprocessingSettings,
|
||||
BasicConditioningInfo,
|
||||
)
|
||||
from ..util import normalize_device
|
||||
from ..util import normalize_device, auto_detect_slice_size
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -296,8 +296,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
||||
return
|
||||
elif config.attention_type == "sliced":
|
||||
slice_size = config.attention_slice_size
|
||||
if torch.backends.mps.is_available(): # doesn't auto already do this?
|
||||
slice_size = "max"
|
||||
if slice_size == "auto":
|
||||
slice_size = auto_detect_slice_size(latents)
|
||||
elif slice_size == "balanced":
|
||||
slice_size = "auto"
|
||||
self.enable_attention_slicing(slice_size=slice_size)
|
||||
return
|
||||
elif config.attention_type == "normal":
|
||||
|
@ -12,3 +12,4 @@ from .devices import (
|
||||
)
|
||||
from .log import write_log
|
||||
from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir
|
||||
from .attention import auto_detect_slice_size
|
||||
|
24
invokeai/backend/util/attention.py
Normal file
24
invokeai/backend/util/attention.py
Normal file
@ -0,0 +1,24 @@
|
||||
# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team
|
||||
"""
|
||||
Utility routine used for autodetection of optimal slice size
|
||||
for attention mechanism.
|
||||
"""
|
||||
import torch
|
||||
|
||||
|
||||
def auto_detect_slice_size(latents: torch.Tensor) -> str:
|
||||
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
|
||||
max_size_required_for_baddbmm = (
|
||||
16
|
||||
* latents.size(dim=2)
|
||||
* latents.size(dim=3)
|
||||
* latents.size(dim=2)
|
||||
* latents.size(dim=3)
|
||||
* bytes_per_element_needed_for_baddbmm_duplication
|
||||
)
|
||||
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0):
|
||||
return "max"
|
||||
elif torch.backends.mps.is_available():
|
||||
return "max"
|
||||
else:
|
||||
return "balanced"
|
@ -17,7 +17,7 @@ from shutil import get_terminal_size
|
||||
from curses import BUTTON2_CLICKED, BUTTON3_CLICKED
|
||||
|
||||
# minimum size for UIs
|
||||
MIN_COLS = 130
|
||||
MIN_COLS = 150
|
||||
MIN_LINES = 40
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user