tested on 3.11 and 3.10

2025-07-26 05:17:55 +00:00 · 2023-07-24 17:13:32 -04:00
parent 4f9c728db0
commit fc4e104c61
4 changed files with 142 additions and 141 deletions
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, validator

 from invokeai.app.invocations.metadata import CoreMetadata
 from invokeai.app.util.step_callback import stable_diffusion_step_callback
-from invokeai.backend.model_management.models.base import ModelType
+from invokeai.backend.model_management.models import ModelType, SilenceWarnings

 from ...backend.model_management.lora import ModelPatcher
 from ...backend.stable_diffusion import PipelineIntermediateState
@ -318,68 +318,69 @@ class TextToLatentsInvocation(BaseInvocation):

    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        noise = context.services.latents.get(self.noise.latents_name)
+        with SilenceWarnings():
+            noise = context.services.latents.get(self.noise.latents_name)

-        # Get the source node id (we are invoking the prepared node)
-        graph_execution_state = context.services.graph_execution_manager.get(
-            context.graph_execution_state_id
-        )
-        source_node_id = graph_execution_state.prepared_source_mapping[self.id]
+            # Get the source node id (we are invoking the prepared node)
+            graph_execution_state = context.services.graph_execution_manager.get(
+                context.graph_execution_state_id
+            )
+            source_node_id = graph_execution_state.prepared_source_mapping[self.id]

-        def step_callback(state: PipelineIntermediateState):
-            self.dispatch_progress(context, source_node_id, state)
+            def step_callback(state: PipelineIntermediateState):
+                self.dispatch_progress(context, source_node_id, state)

-        def _lora_loader():
-            for lora in self.unet.loras:
-                lora_info = context.services.model_manager.get_model(
-                    **lora.dict(exclude={"weight"}), context=context,
+            def _lora_loader():
+                for lora in self.unet.loras:
+                    lora_info = context.services.model_manager.get_model(
+                        **lora.dict(exclude={"weight"}), context=context,
+                    )
+                    yield (lora_info.context.model, lora.weight)
+                    del lora_info
+                return
+
+            unet_info = context.services.model_manager.get_model(
+                **self.unet.unet.dict(), context=context,
+            )
+            with ExitStack() as exit_stack,\
+                    ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
+                    unet_info as unet:
+
+                noise = noise.to(device=unet.device, dtype=unet.dtype)
+
+                scheduler = get_scheduler(
+                    context=context,
+                    scheduler_info=self.unet.scheduler,
+                    scheduler_name=self.scheduler,
                )
-                yield (lora_info.context.model, lora.weight)
-                del lora_info
-            return

-        unet_info = context.services.model_manager.get_model(
-            **self.unet.unet.dict(), context=context,
-        )
-        with ExitStack() as exit_stack,\
-                ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
-                unet_info as unet:
+                pipeline = self.create_pipeline(unet, scheduler)
+                conditioning_data = self.get_conditioning_data(context, scheduler, unet)

-            noise = noise.to(device=unet.device, dtype=unet.dtype)
+                control_data = self.prep_control_data(
+                    model=pipeline, context=context, control_input=self.control,
+                    latents_shape=noise.shape,
+                    # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                    do_classifier_free_guidance=True,
+                    exit_stack=exit_stack,
+                )

-            scheduler = get_scheduler(
-                context=context,
-                scheduler_info=self.unet.scheduler,
-                scheduler_name=self.scheduler,
-            )
+                # TODO: Verify the noise is the right size
+                result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
+                    latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
+                    noise=noise,
+                    num_inference_steps=self.steps,
+                    conditioning_data=conditioning_data,
+                    control_data=control_data,  # list[ControlNetData]
+                    callback=step_callback,
+                )

-            pipeline = self.create_pipeline(unet, scheduler)
-            conditioning_data = self.get_conditioning_data(context, scheduler, unet)
+            # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+            result_latents = result_latents.to("cpu")
+            torch.cuda.empty_cache()

-            control_data = self.prep_control_data(
-                model=pipeline, context=context, control_input=self.control,
-                latents_shape=noise.shape,
-                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
-                do_classifier_free_guidance=True,
-                exit_stack=exit_stack,
-            )
-
-            # TODO: Verify the noise is the right size
-            result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
-                latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
-                noise=noise,
-                num_inference_steps=self.steps,
-                conditioning_data=conditioning_data,
-                control_data=control_data,  # list[ControlNetData]
-                callback=step_callback,
-            )
-
-        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
-        result_latents = result_latents.to("cpu")
-        torch.cuda.empty_cache()
-
-        name = f'{context.graph_execution_state_id}__{self.id}'
-        context.services.latents.save(name, result_latents)
+            name = f'{context.graph_execution_state_id}__{self.id}'
+            context.services.latents.save(name, result_latents)
        return build_latents_output(latents_name=name, latents=result_latents)


@ -411,81 +412,82 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):

    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        noise = context.services.latents.get(self.noise.latents_name)
-        latent = context.services.latents.get(self.latents.latents_name)
+        with SilenceWarnings():  # this quenches NSFW nag from diffusers
+            noise = context.services.latents.get(self.noise.latents_name)
+            latent = context.services.latents.get(self.latents.latents_name)

-        # Get the source node id (we are invoking the prepared node)
-        graph_execution_state = context.services.graph_execution_manager.get(
-            context.graph_execution_state_id
-        )
-        source_node_id = graph_execution_state.prepared_source_mapping[self.id]
+            # Get the source node id (we are invoking the prepared node)
+            graph_execution_state = context.services.graph_execution_manager.get(
+                context.graph_execution_state_id
+            )
+            source_node_id = graph_execution_state.prepared_source_mapping[self.id]

-        def step_callback(state: PipelineIntermediateState):
-            self.dispatch_progress(context, source_node_id, state)
+            def step_callback(state: PipelineIntermediateState):
+                self.dispatch_progress(context, source_node_id, state)

-        def _lora_loader():
-            for lora in self.unet.loras:
-                lora_info = context.services.model_manager.get_model(
-                    **lora.dict(exclude={"weight"}), context=context,
+            def _lora_loader():
+                for lora in self.unet.loras:
+                    lora_info = context.services.model_manager.get_model(
+                        **lora.dict(exclude={"weight"}), context=context,
+                    )
+                    yield (lora_info.context.model, lora.weight)
+                    del lora_info
+                return
+
+            unet_info = context.services.model_manager.get_model(
+                **self.unet.unet.dict(), context=context,
+            )
+            with ExitStack() as exit_stack,\
+                    ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
+                    unet_info as unet:
+
+                noise = noise.to(device=unet.device, dtype=unet.dtype)
+                latent = latent.to(device=unet.device, dtype=unet.dtype)
+
+                scheduler = get_scheduler(
+                    context=context,
+                    scheduler_info=self.unet.scheduler,
+                    scheduler_name=self.scheduler,
                )
-                yield (lora_info.context.model, lora.weight)
-                del lora_info
-            return

-        unet_info = context.services.model_manager.get_model(
-            **self.unet.unet.dict(), context=context,
-        )
-        with ExitStack() as exit_stack,\
-                ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
-                unet_info as unet:
+                pipeline = self.create_pipeline(unet, scheduler)
+                conditioning_data = self.get_conditioning_data(context, scheduler, unet)

-            noise = noise.to(device=unet.device, dtype=unet.dtype)
-            latent = latent.to(device=unet.device, dtype=unet.dtype)
+                control_data = self.prep_control_data(
+                    model=pipeline, context=context, control_input=self.control,
+                    latents_shape=noise.shape,
+                    # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
+                    do_classifier_free_guidance=True,
+                    exit_stack=exit_stack,
+                )

-            scheduler = get_scheduler(
-                context=context,
-                scheduler_info=self.unet.scheduler,
-                scheduler_name=self.scheduler,
-            )
+                # TODO: Verify the noise is the right size
+                initial_latents = latent if self.strength < 1.0 else torch.zeros_like(
+                    latent, device=unet.device, dtype=latent.dtype
+                )

-            pipeline = self.create_pipeline(unet, scheduler)
-            conditioning_data = self.get_conditioning_data(context, scheduler, unet)
+                timesteps, _ = pipeline.get_img2img_timesteps(
+                    self.steps,
+                    self.strength,
+                    device=unet.device,
+                )

-            control_data = self.prep_control_data(
-                model=pipeline, context=context, control_input=self.control,
-                latents_shape=noise.shape,
-                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
-                do_classifier_free_guidance=True,
-                exit_stack=exit_stack,
-            )
+                result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
+                    latents=initial_latents,
+                    timesteps=timesteps,
+                    noise=noise,
+                    num_inference_steps=self.steps,
+                    conditioning_data=conditioning_data,
+                    control_data=control_data,  # list[ControlNetData]
+                    callback=step_callback
+                )

-            # TODO: Verify the noise is the right size
-            initial_latents = latent if self.strength < 1.0 else torch.zeros_like(
-                latent, device=unet.device, dtype=latent.dtype
-            )
+            # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
+            result_latents = result_latents.to("cpu")
+            torch.cuda.empty_cache()

-            timesteps, _ = pipeline.get_img2img_timesteps(
-                self.steps,
-                self.strength,
-                device=unet.device,
-            )
-
-            result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
-                latents=initial_latents,
-                timesteps=timesteps,
-                noise=noise,
-                num_inference_steps=self.steps,
-                conditioning_data=conditioning_data,
-                control_data=control_data,  # list[ControlNetData]
-                callback=step_callback
-            )
-
-        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
-        result_latents = result_latents.to("cpu")
-        torch.cuda.empty_cache()
-
-        name = f'{context.graph_execution_state_id}__{self.id}'
-        context.services.latents.save(name, result_latents)
+            name = f'{context.graph_execution_state_id}__{self.id}'
+            context.services.latents.save(name, result_latents)
        return build_latents_output(latents_name=name, latents=result_latents)


--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -163,7 +163,6 @@ class ModelCache(object):
        submodel: Optional[SubModelType] = None,
        gpu_load: bool = True,
    ) -> Any:
-
        if not isinstance(model_path, Path):
            model_path = Path(model_path)

--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -391,7 +391,7 @@ class ModelManager(object):
        base_model: BaseModelType,
        model_type: ModelType,
    ) -> str:
-        return f"{base_model}/{model_type}/{model_name}"
+        return f"{base_model.value}/{model_type.value}/{model_name}"

    @classmethod
    def parse_key(cls, model_key: str) -> Tuple[str, BaseModelType, ModelType]:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "InvokeAI"
 description = "An implementation of Stable Diffusion which provides various new features and options to aid the image generation process"
-requires-python = ">=3.9, <3.11"
+requires-python = ">=3.9, <3.12"
 readme = { content-type = "text/markdown", file = "README.md" }
 keywords = ["stable-diffusion", "AI"]
 dynamic = ["version"]
@ -32,16 +32,16 @@ classifiers = [
  'Topic :: Scientific/Engineering :: Image Processing',
 ]
 dependencies = [
-  "accelerate~=0.16",
+  "accelerate~=0.21.0",
  "albumentations",
  "click",
-  "clip_anytorch",          # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
-  "compel==2.0.0",
+  "clip_anytorch",  # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
+  "compel~=2.0.0",
  "controlnet-aux>=0.0.6",
-  "timm==0.6.13",           # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
+  "timm==0.6.13",   # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
  "datasets",
-  "diffusers[torch]~=0.18.1",
-  "dnspython==2.2.1",
+  "diffusers[torch]~=0.18.2",
+  "dnspython~=2.4.0",
  "dynamicprompts",
  "easing-functions",
  "einops",
@ -54,37 +54,37 @@ dependencies = [
  "flask_cors==3.0.10",
  "flask_socketio==5.3.0",
  "flaskwebgui==1.0.3",
-  "gfpgan==1.3.8",
  "huggingface-hub>=0.11.1",
-  "invisible-watermark>=0.2.0", # needed to install SDXL base and refiner using their repo_ids
+  "invisible-watermark~=0.2.0", # needed to install SDXL base and refiner using their repo_ids
  "matplotlib",                 # needed for plotting of Penner easing functions
  "mediapipe",                  # needed for "mediapipeface" controlnet model
  "npyscreen",
-  "numpy<1.24",
+  "numpy==1.24.4",
  "omegaconf",
  "opencv-python",
  "picklescan",
  "pillow",
  "prompt-toolkit",
-  "pympler==1.0.1",
+  "pydantic==1.10.10",
+  "pympler~=1.0.1",
  "pypatchmatch",
  'pyperclip',
  "pyreadline3",
-  "python-multipart==0.0.6",
-  "pytorch-lightning==1.7.7",
+  "python-multipart",
+  "pytorch-lightning",
  "realesrgan",
-  "requests==2.28.2",
+  "requests~=2.28.2",
  "rich~=13.3",
  "safetensors~=0.3.0",
-  "scikit-image>=0.19",
+  "scikit-image~=0.21.0",
  "send2trash",
-  "test-tube>=0.7.5",
-  "torch~=2.0.0",
-  "torchvision>=0.14.1",
-  "torchmetrics==0.11.4",
-  "torchsde==0.2.5",
+  "test-tube~=0.7.5",
+  "torch~=2.0.1",
+  "torchvision~=0.15.2",
+  "torchmetrics~=1.0.1",
+  "torchsde~=0.2.5",
  "transformers~=4.31.0",
-  "uvicorn[standard]==0.21.1",
+  "uvicorn[standard]~=0.21.1",
  "windows-curses; sys_platform=='win32'",
 ]