mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
hacked in stable-fast; can generate one image before crashing
This commit is contained in:
@ -39,6 +39,14 @@ from .models import BaseModelType, ModelBase, ModelType, SubModelType
|
|||||||
if choose_torch_device() == torch.device("mps"):
|
if choose_torch_device() == torch.device("mps"):
|
||||||
from torch import mps
|
from torch import mps
|
||||||
|
|
||||||
|
sfast_available = True
|
||||||
|
if sfast_available:
|
||||||
|
from sfast.compilers.diffusion_pipeline_compiler import (compile,
|
||||||
|
compile_unet,
|
||||||
|
compile_vae,
|
||||||
|
CompilationConfig
|
||||||
|
)
|
||||||
|
|
||||||
# Maximum size of the cache, in gigs
|
# Maximum size of the cache, in gigs
|
||||||
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
|
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
|
||||||
DEFAULT_MAX_CACHE_SIZE = 6.0
|
DEFAULT_MAX_CACHE_SIZE = 6.0
|
||||||
@ -276,6 +284,8 @@ class ModelCache(object):
|
|||||||
self._cache_stack.remove(key)
|
self._cache_stack.remove(key)
|
||||||
self._cache_stack.append(key)
|
self._cache_stack.append(key)
|
||||||
|
|
||||||
|
if sfast_available and submodel:
|
||||||
|
cache_entry.model = self._compile_model(cache_entry.model, submodel)
|
||||||
return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
|
return self.ModelLocker(self, key, cache_entry.model, gpu_load, cache_entry.size)
|
||||||
|
|
||||||
def _move_model_to_device(self, key: str, target_device: torch.device):
|
def _move_model_to_device(self, key: str, target_device: torch.device):
|
||||||
@ -322,6 +332,20 @@ class ModelCache(object):
|
|||||||
f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
|
f"{get_pretty_snapshot_diff(snapshot_before, snapshot_after)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _compile_model(self, model, model_type):
|
||||||
|
config = CompilationConfig.Default()
|
||||||
|
config.enable_xformers = True
|
||||||
|
config.enable_triton = True
|
||||||
|
config.enable_jit_freeze = True
|
||||||
|
config.enable_cuda_graph = True
|
||||||
|
if model_type == SubModelType("unet"):
|
||||||
|
return compile_unet(model, config)
|
||||||
|
elif model_type == SubModelType("vae"):
|
||||||
|
return compile_vae(model, config)
|
||||||
|
else:
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
class ModelLocker(object):
|
class ModelLocker(object):
|
||||||
def __init__(self, cache, key, model, gpu_load, size_needed):
|
def __init__(self, cache, key, model, gpu_load, size_needed):
|
||||||
"""
|
"""
|
||||||
|
Reference in New Issue
Block a user