mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
wired attention configuration into backend
This commit is contained in:
@ -227,9 +227,9 @@ class InvokeAIAppConfig(InvokeAISettings):
|
|||||||
version : bool = Field(default=False, description="Show InvokeAI version and exit", category="Other")
|
version : bool = Field(default=False, description="Show InvokeAI version and exit", category="Other")
|
||||||
|
|
||||||
# CACHE
|
# CACHE
|
||||||
ram : Union[float, Literal["auto"]] = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching (floating point number or 'auto')", category="Cache", )
|
ram : Union[float, Literal["auto"]] = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching (floating point number or 'auto')", category="Model Cache", )
|
||||||
vram : Union[float, Literal["auto"]] = Field(default=0.25, ge=0, description="Amount of VRAM reserved for model storage (floating point number or 'auto')", category="Cache", )
|
vram : Union[float, Literal["auto"]] = Field(default=0.25, ge=0, description="Amount of VRAM reserved for model storage (floating point number or 'auto')", category="Model Cache", )
|
||||||
lazy_offload : bool = Field(default=True, description="Keep models in VRAM until their space is needed", category="Cache", )
|
lazy_offload : bool = Field(default=True, description="Keep models in VRAM until their space is needed", category="Model Cache", )
|
||||||
|
|
||||||
# DEVICE
|
# DEVICE
|
||||||
device : Literal[tuple(["auto", "cpu", "cuda", "cuda:1", "mps"])] = Field(default="auto", description="Generation device", category="Device", )
|
device : Literal[tuple(["auto", "cpu", "cuda", "cuda:1", "mps"])] = Field(default="auto", description="Generation device", category="Device", )
|
||||||
@ -246,8 +246,6 @@ class InvokeAIAppConfig(InvokeAISettings):
|
|||||||
free_gpu_mem : Optional[bool] = Field(default=None, description="If true, purge model from GPU after each generation.", category='Memory/Performance')
|
free_gpu_mem : Optional[bool] = Field(default=None, description="If true, purge model from GPU after each generation.", category='Memory/Performance')
|
||||||
max_cache_size : Optional[float] = Field(default=None, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance')
|
max_cache_size : Optional[float] = Field(default=None, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance')
|
||||||
max_vram_cache_size : Optional[float] = Field(default=None, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
|
max_vram_cache_size : Optional[float] = Field(default=None, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
|
||||||
precision : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='auto',description='Floating point precision', category='Memory/Performance')
|
|
||||||
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance')
|
|
||||||
xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')
|
xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')
|
||||||
tiled_decode : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category='Memory/Performance')
|
tiled_decode : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category='Memory/Performance')
|
||||||
|
|
||||||
|
@ -2,10 +2,8 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import inspect
|
import inspect
|
||||||
import math
|
|
||||||
import secrets
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, Callable, Generic, List, Optional, Type, Union
|
from typing import Any, Callable, List, Optional, Union
|
||||||
|
|
||||||
import PIL.Image
|
import PIL.Image
|
||||||
import einops
|
import einops
|
||||||
@ -293,6 +291,22 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
|
|||||||
if xformers is available, use it, otherwise use sliced attention.
|
if xformers is available, use it, otherwise use sliced attention.
|
||||||
"""
|
"""
|
||||||
config = InvokeAIAppConfig.get_config()
|
config = InvokeAIAppConfig.get_config()
|
||||||
|
if config.attention_type == "xformers":
|
||||||
|
self.enable_xformers_memory_efficient_attention()
|
||||||
|
return
|
||||||
|
elif config.attention_type == "sliced":
|
||||||
|
slice_size = config.attention_slice_size
|
||||||
|
if torch.backends.mps.is_available(): # doesn't auto already do this?
|
||||||
|
slice_size = "max"
|
||||||
|
self.enable_attention_slicing(slice_size=slice_size)
|
||||||
|
return
|
||||||
|
elif config.attention_type == "normal":
|
||||||
|
self.disable_attention_slicing()
|
||||||
|
return
|
||||||
|
elif config.attention_type == "torch-sdp":
|
||||||
|
raise Exception("torch-sdp attention slicing not yet implemented")
|
||||||
|
|
||||||
|
# the remainder if this code is called when attention_type=='auto'
|
||||||
if self.unet.device.type == "cuda":
|
if self.unet.device.type == "cuda":
|
||||||
if is_xformers_available() and not config.disable_xformers:
|
if is_xformers_available() and not config.disable_xformers:
|
||||||
self.enable_xformers_memory_efficient_attention()
|
self.enable_xformers_memory_efficient_attention()
|
||||||
|
Reference in New Issue
Block a user