tested on 3.11 and 3.10

This commit is contained in:
Lincoln Stein 2023-07-24 17:13:32 -04:00
parent 4f9c728db0
commit fc4e104c61
4 changed files with 142 additions and 141 deletions

View File

@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, validator
from invokeai.app.invocations.metadata import CoreMetadata from invokeai.app.invocations.metadata import CoreMetadata
from invokeai.app.util.step_callback import stable_diffusion_step_callback from invokeai.app.util.step_callback import stable_diffusion_step_callback
from invokeai.backend.model_management.models.base import ModelType from invokeai.backend.model_management.models import ModelType, SilenceWarnings
from ...backend.model_management.lora import ModelPatcher from ...backend.model_management.lora import ModelPatcher
from ...backend.stable_diffusion import PipelineIntermediateState from ...backend.stable_diffusion import PipelineIntermediateState
@ -318,68 +318,69 @@ class TextToLatentsInvocation(BaseInvocation):
@torch.no_grad() @torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput: def invoke(self, context: InvocationContext) -> LatentsOutput:
noise = context.services.latents.get(self.noise.latents_name) with SilenceWarnings():
noise = context.services.latents.get(self.noise.latents_name)
# Get the source node id (we are invoking the prepared node) # Get the source node id (we are invoking the prepared node)
graph_execution_state = context.services.graph_execution_manager.get( graph_execution_state = context.services.graph_execution_manager.get(
context.graph_execution_state_id context.graph_execution_state_id
) )
source_node_id = graph_execution_state.prepared_source_mapping[self.id] source_node_id = graph_execution_state.prepared_source_mapping[self.id]
def step_callback(state: PipelineIntermediateState): def step_callback(state: PipelineIntermediateState):
self.dispatch_progress(context, source_node_id, state) self.dispatch_progress(context, source_node_id, state)
def _lora_loader(): def _lora_loader():
for lora in self.unet.loras: for lora in self.unet.loras:
lora_info = context.services.model_manager.get_model( lora_info = context.services.model_manager.get_model(
**lora.dict(exclude={"weight"}), context=context, **lora.dict(exclude={"weight"}), context=context,
)
yield (lora_info.context.model, lora.weight)
del lora_info
return
unet_info = context.services.model_manager.get_model(
**self.unet.unet.dict(), context=context,
)
with ExitStack() as exit_stack,\
ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
) )
yield (lora_info.context.model, lora.weight)
del lora_info
return
unet_info = context.services.model_manager.get_model( pipeline = self.create_pipeline(unet, scheduler)
**self.unet.unet.dict(), context=context, conditioning_data = self.get_conditioning_data(context, scheduler, unet)
)
with ExitStack() as exit_stack,\
ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype) control_data = self.prep_control_data(
model=pipeline, context=context, control_input=self.control,
latents_shape=noise.shape,
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
do_classifier_free_guidance=True,
exit_stack=exit_stack,
)
scheduler = get_scheduler( # TODO: Verify the noise is the right size
context=context, result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
scheduler_info=self.unet.scheduler, latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
scheduler_name=self.scheduler, noise=noise,
) num_inference_steps=self.steps,
conditioning_data=conditioning_data,
control_data=control_data, # list[ControlNetData]
callback=step_callback,
)
pipeline = self.create_pipeline(unet, scheduler) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
conditioning_data = self.get_conditioning_data(context, scheduler, unet) result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
control_data = self.prep_control_data( name = f'{context.graph_execution_state_id}__{self.id}'
model=pipeline, context=context, control_input=self.control, context.services.latents.save(name, result_latents)
latents_shape=noise.shape,
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
do_classifier_free_guidance=True,
exit_stack=exit_stack,
)
# TODO: Verify the noise is the right size
result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
noise=noise,
num_inference_steps=self.steps,
conditioning_data=conditioning_data,
control_data=control_data, # list[ControlNetData]
callback=step_callback,
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
name = f'{context.graph_execution_state_id}__{self.id}'
context.services.latents.save(name, result_latents)
return build_latents_output(latents_name=name, latents=result_latents) return build_latents_output(latents_name=name, latents=result_latents)
@ -411,81 +412,82 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
@torch.no_grad() @torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput: def invoke(self, context: InvocationContext) -> LatentsOutput:
noise = context.services.latents.get(self.noise.latents_name) with SilenceWarnings(): # this quenches NSFW nag from diffusers
latent = context.services.latents.get(self.latents.latents_name) noise = context.services.latents.get(self.noise.latents_name)
latent = context.services.latents.get(self.latents.latents_name)
# Get the source node id (we are invoking the prepared node) # Get the source node id (we are invoking the prepared node)
graph_execution_state = context.services.graph_execution_manager.get( graph_execution_state = context.services.graph_execution_manager.get(
context.graph_execution_state_id context.graph_execution_state_id
) )
source_node_id = graph_execution_state.prepared_source_mapping[self.id] source_node_id = graph_execution_state.prepared_source_mapping[self.id]
def step_callback(state: PipelineIntermediateState): def step_callback(state: PipelineIntermediateState):
self.dispatch_progress(context, source_node_id, state) self.dispatch_progress(context, source_node_id, state)
def _lora_loader(): def _lora_loader():
for lora in self.unet.loras: for lora in self.unet.loras:
lora_info = context.services.model_manager.get_model( lora_info = context.services.model_manager.get_model(
**lora.dict(exclude={"weight"}), context=context, **lora.dict(exclude={"weight"}), context=context,
)
yield (lora_info.context.model, lora.weight)
del lora_info
return
unet_info = context.services.model_manager.get_model(
**self.unet.unet.dict(), context=context,
)
with ExitStack() as exit_stack,\
ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype)
latent = latent.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
) )
yield (lora_info.context.model, lora.weight)
del lora_info
return
unet_info = context.services.model_manager.get_model( pipeline = self.create_pipeline(unet, scheduler)
**self.unet.unet.dict(), context=context, conditioning_data = self.get_conditioning_data(context, scheduler, unet)
)
with ExitStack() as exit_stack,\
ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype) control_data = self.prep_control_data(
latent = latent.to(device=unet.device, dtype=unet.dtype) model=pipeline, context=context, control_input=self.control,
latents_shape=noise.shape,
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
do_classifier_free_guidance=True,
exit_stack=exit_stack,
)
scheduler = get_scheduler( # TODO: Verify the noise is the right size
context=context, initial_latents = latent if self.strength < 1.0 else torch.zeros_like(
scheduler_info=self.unet.scheduler, latent, device=unet.device, dtype=latent.dtype
scheduler_name=self.scheduler, )
)
pipeline = self.create_pipeline(unet, scheduler) timesteps, _ = pipeline.get_img2img_timesteps(
conditioning_data = self.get_conditioning_data(context, scheduler, unet) self.steps,
self.strength,
device=unet.device,
)
control_data = self.prep_control_data( result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
model=pipeline, context=context, control_input=self.control, latents=initial_latents,
latents_shape=noise.shape, timesteps=timesteps,
# do_classifier_free_guidance=(self.cfg_scale >= 1.0)) noise=noise,
do_classifier_free_guidance=True, num_inference_steps=self.steps,
exit_stack=exit_stack, conditioning_data=conditioning_data,
) control_data=control_data, # list[ControlNetData]
callback=step_callback
)
# TODO: Verify the noise is the right size # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
initial_latents = latent if self.strength < 1.0 else torch.zeros_like( result_latents = result_latents.to("cpu")
latent, device=unet.device, dtype=latent.dtype torch.cuda.empty_cache()
)
timesteps, _ = pipeline.get_img2img_timesteps( name = f'{context.graph_execution_state_id}__{self.id}'
self.steps, context.services.latents.save(name, result_latents)
self.strength,
device=unet.device,
)
result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
latents=initial_latents,
timesteps=timesteps,
noise=noise,
num_inference_steps=self.steps,
conditioning_data=conditioning_data,
control_data=control_data, # list[ControlNetData]
callback=step_callback
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
name = f'{context.graph_execution_state_id}__{self.id}'
context.services.latents.save(name, result_latents)
return build_latents_output(latents_name=name, latents=result_latents) return build_latents_output(latents_name=name, latents=result_latents)

View File

@ -163,7 +163,6 @@ class ModelCache(object):
submodel: Optional[SubModelType] = None, submodel: Optional[SubModelType] = None,
gpu_load: bool = True, gpu_load: bool = True,
) -> Any: ) -> Any:
if not isinstance(model_path, Path): if not isinstance(model_path, Path):
model_path = Path(model_path) model_path = Path(model_path)

View File

@ -391,7 +391,7 @@ class ModelManager(object):
base_model: BaseModelType, base_model: BaseModelType,
model_type: ModelType, model_type: ModelType,
) -> str: ) -> str:
return f"{base_model}/{model_type}/{model_name}" return f"{base_model.value}/{model_type.value}/{model_name}"
@classmethod @classmethod
def parse_key(cls, model_key: str) -> Tuple[str, BaseModelType, ModelType]: def parse_key(cls, model_key: str) -> Tuple[str, BaseModelType, ModelType]:

View File

@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "InvokeAI" name = "InvokeAI"
description = "An implementation of Stable Diffusion which provides various new features and options to aid the image generation process" description = "An implementation of Stable Diffusion which provides various new features and options to aid the image generation process"
requires-python = ">=3.9, <3.11" requires-python = ">=3.9, <3.12"
readme = { content-type = "text/markdown", file = "README.md" } readme = { content-type = "text/markdown", file = "README.md" }
keywords = ["stable-diffusion", "AI"] keywords = ["stable-diffusion", "AI"]
dynamic = ["version"] dynamic = ["version"]
@ -32,16 +32,16 @@ classifiers = [
'Topic :: Scientific/Engineering :: Image Processing', 'Topic :: Scientific/Engineering :: Image Processing',
] ]
dependencies = [ dependencies = [
"accelerate~=0.16", "accelerate~=0.21.0",
"albumentations", "albumentations",
"click", "click",
"clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
"compel==2.0.0", "compel~=2.0.0",
"controlnet-aux>=0.0.6", "controlnet-aux>=0.0.6",
"timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26
"datasets", "datasets",
"diffusers[torch]~=0.18.1", "diffusers[torch]~=0.18.2",
"dnspython==2.2.1", "dnspython~=2.4.0",
"dynamicprompts", "dynamicprompts",
"easing-functions", "easing-functions",
"einops", "einops",
@ -54,37 +54,37 @@ dependencies = [
"flask_cors==3.0.10", "flask_cors==3.0.10",
"flask_socketio==5.3.0", "flask_socketio==5.3.0",
"flaskwebgui==1.0.3", "flaskwebgui==1.0.3",
"gfpgan==1.3.8",
"huggingface-hub>=0.11.1", "huggingface-hub>=0.11.1",
"invisible-watermark>=0.2.0", # needed to install SDXL base and refiner using their repo_ids "invisible-watermark~=0.2.0", # needed to install SDXL base and refiner using their repo_ids
"matplotlib", # needed for plotting of Penner easing functions "matplotlib", # needed for plotting of Penner easing functions
"mediapipe", # needed for "mediapipeface" controlnet model "mediapipe", # needed for "mediapipeface" controlnet model
"npyscreen", "npyscreen",
"numpy<1.24", "numpy==1.24.4",
"omegaconf", "omegaconf",
"opencv-python", "opencv-python",
"picklescan", "picklescan",
"pillow", "pillow",
"prompt-toolkit", "prompt-toolkit",
"pympler==1.0.1", "pydantic==1.10.10",
"pympler~=1.0.1",
"pypatchmatch", "pypatchmatch",
'pyperclip', 'pyperclip',
"pyreadline3", "pyreadline3",
"python-multipart==0.0.6", "python-multipart",
"pytorch-lightning==1.7.7", "pytorch-lightning",
"realesrgan", "realesrgan",
"requests==2.28.2", "requests~=2.28.2",
"rich~=13.3", "rich~=13.3",
"safetensors~=0.3.0", "safetensors~=0.3.0",
"scikit-image>=0.19", "scikit-image~=0.21.0",
"send2trash", "send2trash",
"test-tube>=0.7.5", "test-tube~=0.7.5",
"torch~=2.0.0", "torch~=2.0.1",
"torchvision>=0.14.1", "torchvision~=0.15.2",
"torchmetrics==0.11.4", "torchmetrics~=1.0.1",
"torchsde==0.2.5", "torchsde~=0.2.5",
"transformers~=4.31.0", "transformers~=4.31.0",
"uvicorn[standard]==0.21.1", "uvicorn[standard]~=0.21.1",
"windows-curses; sys_platform=='win32'", "windows-curses; sys_platform=='win32'",
] ]