mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
add support for "balanced" attention slice size
This commit is contained in:
parent
23b4e1cea0
commit
b69f26c85c
@ -184,7 +184,7 @@ These options tune InvokeAI's memory and performance characteristics.
|
|||||||
|-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|-----------------------|---------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss |
|
| `sequential_guidance` | `false` | Calculate guidance in serial rather than in parallel, lowering memory requirements at the cost of some performance loss |
|
||||||
| `attention_type` | `auto` | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp` |
|
| `attention_type` | `auto` | Select the type of attention to use. One of `auto`,`normal`,`xformers`,`sliced`, or `torch-sdp` |
|
||||||
| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `max` or the integers 1-8|
|
| `attention_slice_size` | `auto` | When "sliced" attention is selected, set the slice size. One of `auto`, `balanced`, `max` or the integers 1-8|
|
||||||
| `force_tiled_decode` | `false` | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance |
|
| `force_tiled_decode` | `false` | Force the VAE step to decode in tiles, reducing memory consumption at the cost of performance |
|
||||||
|
|
||||||
### Device
|
### Device
|
||||||
|
@ -248,7 +248,7 @@ class InvokeAIAppConfig(InvokeAISettings):
|
|||||||
# GENERATION
|
# GENERATION
|
||||||
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", )
|
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category="Generation", )
|
||||||
attention_type : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", )
|
attention_type : Literal[tuple(["auto", "normal", "xformers", "sliced", "torch-sdp"])] = Field(default="auto", description="Attention type", category="Generation", )
|
||||||
attention_slice_size: Literal[tuple(["auto", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
|
attention_slice_size: Literal[tuple(["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8])] = Field(default="auto", description='Slice size, valid when attention_type=="sliced"', category="Generation", )
|
||||||
force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",)
|
force_tiled_decode: bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category="Generation",)
|
||||||
|
|
||||||
# DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES
|
# DEPRECATED FIELDS - STILL HERE IN ORDER TO OBTAN VALUES FROM PRE-3.1 CONFIG FILES
|
||||||
|
@ -353,7 +353,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
|||||||
# old settings for defaults
|
# old settings for defaults
|
||||||
precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto")
|
precision = old_opts.precision or ("float32" if program_opts.full_precision else "auto")
|
||||||
device = old_opts.device
|
device = old_opts.device
|
||||||
attention_type = "xformers" if old_opts.xformers_enabled else old_opts.attention_type
|
attention_type = old_opts.attention_type
|
||||||
attention_slice_size = old_opts.attention_slice_size
|
attention_slice_size = old_opts.attention_slice_size
|
||||||
|
|
||||||
self.nextrely += 1
|
self.nextrely += 1
|
||||||
@ -443,7 +443,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
|||||||
name="Attention Slice Size:",
|
name="Attention Slice Size:",
|
||||||
relx=5,
|
relx=5,
|
||||||
editable=False,
|
editable=False,
|
||||||
hidden=True,
|
hidden=attention_type != "sliced",
|
||||||
color="CONTROL",
|
color="CONTROL",
|
||||||
scroll_exit=True,
|
scroll_exit=True,
|
||||||
)
|
)
|
||||||
@ -453,11 +453,10 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
|
|||||||
columns=len(ATTENTION_SLICE_CHOICES),
|
columns=len(ATTENTION_SLICE_CHOICES),
|
||||||
values=ATTENTION_SLICE_CHOICES,
|
values=ATTENTION_SLICE_CHOICES,
|
||||||
value=ATTENTION_SLICE_CHOICES.index(attention_slice_size),
|
value=ATTENTION_SLICE_CHOICES.index(attention_slice_size),
|
||||||
begin_entry_at=2,
|
|
||||||
relx=30,
|
relx=30,
|
||||||
hidden=True,
|
hidden=attention_type != "sliced",
|
||||||
max_height=2,
|
max_height=2,
|
||||||
max_width=100,
|
max_width=110,
|
||||||
scroll_exit=True,
|
scroll_exit=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -611,7 +610,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS
|
|||||||
new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
|
new_opts.precision = PRECISION_CHOICES[self.precision.value[0]]
|
||||||
new_opts.device = DEVICE_CHOICES[self.device.value[0]]
|
new_opts.device = DEVICE_CHOICES[self.device.value[0]]
|
||||||
new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]]
|
new_opts.attention_type = ATTENTION_CHOICES[self.attention_type.value[0]]
|
||||||
new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value]
|
new_opts.attention_slice_size = ATTENTION_SLICE_CHOICES[self.attention_slice_size.value[0]]
|
||||||
generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value]
|
generation_options = [GENERATION_OPT_CHOICES[x] for x in self.generation_options.value]
|
||||||
for v in GENERATION_OPT_CHOICES:
|
for v in GENERATION_OPT_CHOICES:
|
||||||
setattr(new_opts, v, v in generation_options)
|
setattr(new_opts, v, v in generation_options)
|
||||||
|
@ -33,7 +33,7 @@ from .diffusion import (
|
|||||||
PostprocessingSettings,
|
PostprocessingSettings,
|
||||||
BasicConditioningInfo,
|
BasicConditioningInfo,
|
||||||
)
|
)
|
||||||
from ..util import normalize_device
|
from ..util import normalize_device, auto_detect_slice_size
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -296,8 +296,10 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
return
|
return
|
||||||
elif config.attention_type == "sliced":
|
elif config.attention_type == "sliced":
|
||||||
slice_size = config.attention_slice_size
|
slice_size = config.attention_slice_size
|
||||||
if torch.backends.mps.is_available(): # doesn't auto already do this?
|
if slice_size == "auto":
|
||||||
slice_size = "max"
|
slice_size = auto_detect_slice_size(latents)
|
||||||
|
elif slice_size == "balanced":
|
||||||
|
slice_size = "auto"
|
||||||
self.enable_attention_slicing(slice_size=slice_size)
|
self.enable_attention_slicing(slice_size=slice_size)
|
||||||
return
|
return
|
||||||
elif config.attention_type == "normal":
|
elif config.attention_type == "normal":
|
||||||
|
@ -12,3 +12,4 @@ from .devices import (
|
|||||||
)
|
)
|
||||||
from .log import write_log
|
from .log import write_log
|
||||||
from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir
|
from .util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name, Chdir
|
||||||
|
from .attention import auto_detect_slice_size
|
||||||
|
24
invokeai/backend/util/attention.py
Normal file
24
invokeai/backend/util/attention.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# Copyright (c) 2023 Lincoln Stein and the InvokeAI Team
|
||||||
|
"""
|
||||||
|
Utility routine used for autodetection of optimal slice size
|
||||||
|
for attention mechanism.
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
def auto_detect_slice_size(latents: torch.Tensor) -> str:
|
||||||
|
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
|
||||||
|
max_size_required_for_baddbmm = (
|
||||||
|
16
|
||||||
|
* latents.size(dim=2)
|
||||||
|
* latents.size(dim=3)
|
||||||
|
* latents.size(dim=2)
|
||||||
|
* latents.size(dim=3)
|
||||||
|
* bytes_per_element_needed_for_baddbmm_duplication
|
||||||
|
)
|
||||||
|
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0):
|
||||||
|
return "max"
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
return "max"
|
||||||
|
else:
|
||||||
|
return "balanced"
|
@ -17,7 +17,7 @@ from shutil import get_terminal_size
|
|||||||
from curses import BUTTON2_CLICKED, BUTTON3_CLICKED
|
from curses import BUTTON2_CLICKED, BUTTON3_CLICKED
|
||||||
|
|
||||||
# minimum size for UIs
|
# minimum size for UIs
|
||||||
MIN_COLS = 130
|
MIN_COLS = 150
|
||||||
MIN_LINES = 40
|
MIN_LINES = 40
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user