diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 6082057bd3..96c536dc94 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, validator from invokeai.app.invocations.metadata import CoreMetadata from invokeai.app.util.step_callback import stable_diffusion_step_callback -from invokeai.backend.model_management.models.base import ModelType +from invokeai.backend.model_management.models import ModelType, SilenceWarnings from ...backend.model_management.lora import ModelPatcher from ...backend.stable_diffusion import PipelineIntermediateState @@ -318,68 +318,69 @@ class TextToLatentsInvocation(BaseInvocation): @torch.no_grad() def invoke(self, context: InvocationContext) -> LatentsOutput: - noise = context.services.latents.get(self.noise.latents_name) + with SilenceWarnings(): + noise = context.services.latents.get(self.noise.latents_name) - # Get the source node id (we are invoking the prepared node) - graph_execution_state = context.services.graph_execution_manager.get( - context.graph_execution_state_id - ) - source_node_id = graph_execution_state.prepared_source_mapping[self.id] + # Get the source node id (we are invoking the prepared node) + graph_execution_state = context.services.graph_execution_manager.get( + context.graph_execution_state_id + ) + source_node_id = graph_execution_state.prepared_source_mapping[self.id] - def step_callback(state: PipelineIntermediateState): - self.dispatch_progress(context, source_node_id, state) + def step_callback(state: PipelineIntermediateState): + self.dispatch_progress(context, source_node_id, state) - def _lora_loader(): - for lora in self.unet.loras: - lora_info = context.services.model_manager.get_model( - **lora.dict(exclude={"weight"}), context=context, + def _lora_loader(): + for lora in self.unet.loras: + lora_info = context.services.model_manager.get_model( + **lora.dict(exclude={"weight"}), context=context, + ) + yield (lora_info.context.model, lora.weight) + del lora_info + return + + unet_info = context.services.model_manager.get_model( + **self.unet.unet.dict(), context=context, + ) + with ExitStack() as exit_stack,\ + ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\ + unet_info as unet: + + noise = noise.to(device=unet.device, dtype=unet.dtype) + + scheduler = get_scheduler( + context=context, + scheduler_info=self.unet.scheduler, + scheduler_name=self.scheduler, ) - yield (lora_info.context.model, lora.weight) - del lora_info - return - unet_info = context.services.model_manager.get_model( - **self.unet.unet.dict(), context=context, - ) - with ExitStack() as exit_stack,\ - ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\ - unet_info as unet: + pipeline = self.create_pipeline(unet, scheduler) + conditioning_data = self.get_conditioning_data(context, scheduler, unet) - noise = noise.to(device=unet.device, dtype=unet.dtype) + control_data = self.prep_control_data( + model=pipeline, context=context, control_input=self.control, + latents_shape=noise.shape, + # do_classifier_free_guidance=(self.cfg_scale >= 1.0)) + do_classifier_free_guidance=True, + exit_stack=exit_stack, + ) - scheduler = get_scheduler( - context=context, - scheduler_info=self.unet.scheduler, - scheduler_name=self.scheduler, - ) + # TODO: Verify the noise is the right size + result_latents, result_attention_map_saver = pipeline.latents_from_embeddings( + latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)), + noise=noise, + num_inference_steps=self.steps, + conditioning_data=conditioning_data, + control_data=control_data, # list[ControlNetData] + callback=step_callback, + ) - pipeline = self.create_pipeline(unet, scheduler) - conditioning_data = self.get_conditioning_data(context, scheduler, unet) + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + result_latents = result_latents.to("cpu") + torch.cuda.empty_cache() - control_data = self.prep_control_data( - model=pipeline, context=context, control_input=self.control, - latents_shape=noise.shape, - # do_classifier_free_guidance=(self.cfg_scale >= 1.0)) - do_classifier_free_guidance=True, - exit_stack=exit_stack, - ) - - # TODO: Verify the noise is the right size - result_latents, result_attention_map_saver = pipeline.latents_from_embeddings( - latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)), - noise=noise, - num_inference_steps=self.steps, - conditioning_data=conditioning_data, - control_data=control_data, # list[ControlNetData] - callback=step_callback, - ) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - result_latents = result_latents.to("cpu") - torch.cuda.empty_cache() - - name = f'{context.graph_execution_state_id}__{self.id}' - context.services.latents.save(name, result_latents) + name = f'{context.graph_execution_state_id}__{self.id}' + context.services.latents.save(name, result_latents) return build_latents_output(latents_name=name, latents=result_latents) @@ -411,81 +412,82 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation): @torch.no_grad() def invoke(self, context: InvocationContext) -> LatentsOutput: - noise = context.services.latents.get(self.noise.latents_name) - latent = context.services.latents.get(self.latents.latents_name) + with SilenceWarnings(): # this quenches NSFW nag from diffusers + noise = context.services.latents.get(self.noise.latents_name) + latent = context.services.latents.get(self.latents.latents_name) - # Get the source node id (we are invoking the prepared node) - graph_execution_state = context.services.graph_execution_manager.get( - context.graph_execution_state_id - ) - source_node_id = graph_execution_state.prepared_source_mapping[self.id] + # Get the source node id (we are invoking the prepared node) + graph_execution_state = context.services.graph_execution_manager.get( + context.graph_execution_state_id + ) + source_node_id = graph_execution_state.prepared_source_mapping[self.id] - def step_callback(state: PipelineIntermediateState): - self.dispatch_progress(context, source_node_id, state) + def step_callback(state: PipelineIntermediateState): + self.dispatch_progress(context, source_node_id, state) - def _lora_loader(): - for lora in self.unet.loras: - lora_info = context.services.model_manager.get_model( - **lora.dict(exclude={"weight"}), context=context, + def _lora_loader(): + for lora in self.unet.loras: + lora_info = context.services.model_manager.get_model( + **lora.dict(exclude={"weight"}), context=context, + ) + yield (lora_info.context.model, lora.weight) + del lora_info + return + + unet_info = context.services.model_manager.get_model( + **self.unet.unet.dict(), context=context, + ) + with ExitStack() as exit_stack,\ + ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\ + unet_info as unet: + + noise = noise.to(device=unet.device, dtype=unet.dtype) + latent = latent.to(device=unet.device, dtype=unet.dtype) + + scheduler = get_scheduler( + context=context, + scheduler_info=self.unet.scheduler, + scheduler_name=self.scheduler, ) - yield (lora_info.context.model, lora.weight) - del lora_info - return - unet_info = context.services.model_manager.get_model( - **self.unet.unet.dict(), context=context, - ) - with ExitStack() as exit_stack,\ - ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\ - unet_info as unet: + pipeline = self.create_pipeline(unet, scheduler) + conditioning_data = self.get_conditioning_data(context, scheduler, unet) - noise = noise.to(device=unet.device, dtype=unet.dtype) - latent = latent.to(device=unet.device, dtype=unet.dtype) + control_data = self.prep_control_data( + model=pipeline, context=context, control_input=self.control, + latents_shape=noise.shape, + # do_classifier_free_guidance=(self.cfg_scale >= 1.0)) + do_classifier_free_guidance=True, + exit_stack=exit_stack, + ) - scheduler = get_scheduler( - context=context, - scheduler_info=self.unet.scheduler, - scheduler_name=self.scheduler, - ) + # TODO: Verify the noise is the right size + initial_latents = latent if self.strength < 1.0 else torch.zeros_like( + latent, device=unet.device, dtype=latent.dtype + ) - pipeline = self.create_pipeline(unet, scheduler) - conditioning_data = self.get_conditioning_data(context, scheduler, unet) + timesteps, _ = pipeline.get_img2img_timesteps( + self.steps, + self.strength, + device=unet.device, + ) - control_data = self.prep_control_data( - model=pipeline, context=context, control_input=self.control, - latents_shape=noise.shape, - # do_classifier_free_guidance=(self.cfg_scale >= 1.0)) - do_classifier_free_guidance=True, - exit_stack=exit_stack, - ) + result_latents, result_attention_map_saver = pipeline.latents_from_embeddings( + latents=initial_latents, + timesteps=timesteps, + noise=noise, + num_inference_steps=self.steps, + conditioning_data=conditioning_data, + control_data=control_data, # list[ControlNetData] + callback=step_callback + ) - # TODO: Verify the noise is the right size - initial_latents = latent if self.strength < 1.0 else torch.zeros_like( - latent, device=unet.device, dtype=latent.dtype - ) + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + result_latents = result_latents.to("cpu") + torch.cuda.empty_cache() - timesteps, _ = pipeline.get_img2img_timesteps( - self.steps, - self.strength, - device=unet.device, - ) - - result_latents, result_attention_map_saver = pipeline.latents_from_embeddings( - latents=initial_latents, - timesteps=timesteps, - noise=noise, - num_inference_steps=self.steps, - conditioning_data=conditioning_data, - control_data=control_data, # list[ControlNetData] - callback=step_callback - ) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - result_latents = result_latents.to("cpu") - torch.cuda.empty_cache() - - name = f'{context.graph_execution_state_id}__{self.id}' - context.services.latents.save(name, result_latents) + name = f'{context.graph_execution_state_id}__{self.id}' + context.services.latents.save(name, result_latents) return build_latents_output(latents_name=name, latents=result_latents) diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index 5ca17f00fc..59644966be 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -163,7 +163,6 @@ class ModelCache(object): submodel: Optional[SubModelType] = None, gpu_load: bool = True, ) -> Any: - if not isinstance(model_path, Path): model_path = Path(model_path) diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index a0b3e6d625..098258153e 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -391,7 +391,7 @@ class ModelManager(object): base_model: BaseModelType, model_type: ModelType, ) -> str: - return f"{base_model}/{model_type}/{model_name}" + return f"{base_model.value}/{model_type.value}/{model_name}" @classmethod def parse_key(cls, model_key: str) -> Tuple[str, BaseModelType, ModelType]: diff --git a/pyproject.toml b/pyproject.toml index 5cc6971df6..816a050393 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "InvokeAI" description = "An implementation of Stable Diffusion which provides various new features and options to aid the image generation process" -requires-python = ">=3.9, <3.11" +requires-python = ">=3.9, <3.12" readme = { content-type = "text/markdown", file = "README.md" } keywords = ["stable-diffusion", "AI"] dynamic = ["version"] @@ -32,16 +32,16 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Image Processing', ] dependencies = [ - "accelerate~=0.16", + "accelerate~=0.21.0", "albumentations", "click", - "clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", - "compel==2.0.0", + "clip_anytorch", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", + "compel~=2.0.0", "controlnet-aux>=0.0.6", - "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 + "timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26 "datasets", - "diffusers[torch]~=0.18.1", - "dnspython==2.2.1", + "diffusers[torch]~=0.18.2", + "dnspython~=2.4.0", "dynamicprompts", "easing-functions", "einops", @@ -54,37 +54,37 @@ dependencies = [ "flask_cors==3.0.10", "flask_socketio==5.3.0", "flaskwebgui==1.0.3", - "gfpgan==1.3.8", "huggingface-hub>=0.11.1", - "invisible-watermark>=0.2.0", # needed to install SDXL base and refiner using their repo_ids + "invisible-watermark~=0.2.0", # needed to install SDXL base and refiner using their repo_ids "matplotlib", # needed for plotting of Penner easing functions "mediapipe", # needed for "mediapipeface" controlnet model "npyscreen", - "numpy<1.24", + "numpy==1.24.4", "omegaconf", "opencv-python", "picklescan", "pillow", "prompt-toolkit", - "pympler==1.0.1", + "pydantic==1.10.10", + "pympler~=1.0.1", "pypatchmatch", 'pyperclip', "pyreadline3", - "python-multipart==0.0.6", - "pytorch-lightning==1.7.7", + "python-multipart", + "pytorch-lightning", "realesrgan", - "requests==2.28.2", + "requests~=2.28.2", "rich~=13.3", "safetensors~=0.3.0", - "scikit-image>=0.19", + "scikit-image~=0.21.0", "send2trash", - "test-tube>=0.7.5", - "torch~=2.0.0", - "torchvision>=0.14.1", - "torchmetrics==0.11.4", - "torchsde==0.2.5", + "test-tube~=0.7.5", + "torch~=2.0.1", + "torchvision~=0.15.2", + "torchmetrics~=1.0.1", + "torchsde~=0.2.5", "transformers~=4.31.0", - "uvicorn[standard]==0.21.1", + "uvicorn[standard]~=0.21.1", "windows-curses; sys_platform=='win32'", ]