Merge branch 'main' into release/invokeai-3-0-1

2024-08-30 20:32:17 +00:00 · 2023-07-27 15:21:08 -04:00 · 2023-07-27 15:21:08 -04:00 · 006075483d
commit 006075483d
parent 52bd29d484 41b13e83a5
9 changed files with 173 additions and 157 deletions
--- a/README.md
+++ b/README.md
@ -123,7 +123,7 @@ and go to http://localhost:9090.
 ### Command-Line Installation (for developers and users familiar with Terminals)
-You must have Python 3.9 or 3.10 installed on your machine. Earlier or
+You must have Python 3.9 through 3.11 installed on your machine. Earlier or
 later versions are not supported.
 Node.js also needs to be installed along with yarn (can be installed with
 the command `npm install -g yarn` if needed)
--- a/docs/installation/010_INSTALL_AUTOMATED.md
+++ b/docs/installation/010_INSTALL_AUTOMATED.md
@ -40,10 +40,8 @@ experimental versions later.
    this, open up a command-line window ("Terminal" on Linux and
    Macintosh, "Command" or "Powershell" on Windows) and type `python
    --version`. If Python is installed, it will print out the version
-    number. If it is version `3.9.*` or `3.10.*`, you meet
+    number. If it is version `3.9.*`, `3.10.*` or `3.11.*` you meet
-    requirements. We do not recommend using Python 3.11 or higher,
+    requirements.
    as not all the libraries that InvokeAI depends on work properly
    with this version.
    !!! warning "What to do if you have an unsupported version"
--- a/docs/installation/020_INSTALL_MANUAL.md
+++ b/docs/installation/020_INSTALL_MANUAL.md
@ -32,7 +32,7 @@ gaming):
 * **Python**
-    version 3.9 or 3.10 (3.11 is not recommended).
+    version 3.9 through 3.11
 * **CUDA Tools**
@ -65,7 +65,7 @@ gaming):
 To install InvokeAI with virtual environments and the PIP package
 manager, please follow these steps:
-1.  Please make sure you are using Python 3.9 or 3.10. The rest of the install
+1.  Please make sure you are using Python 3.9 through 3.11. The rest of the install
    procedure depends on this and will not work with other versions:
    ```bash
--- a/installer/install.sh.in
+++ b/installer/install.sh.in
@ -9,16 +9,20 @@ cd $scriptdir
 function version { echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'; }
 MINIMUM_PYTHON_VERSION=3.9.0
-MAXIMUM_PYTHON_VERSION=3.11.0
+MAXIMUM_PYTHON_VERSION=3.11.100
 PYTHON=""
-for candidate in python3.10 python3.9 python3 python ; do
+for candidate in python3.11 python3.10 python3.9 python3 python ; do
    if ppath=`which $candidate`; then
        # when using `pyenv`, the executable for an inactive Python version will exist but will not be operational
        # we check that this found executable can actually run
        if [ $($candidate --version &>/dev/null; echo ${PIPESTATUS}) -gt 0 ]; then continue; fi
        python_version=$($ppath -V | awk '{ print $2 }')
        if [ $(version $python_version) -ge $(version "$MINIMUM_PYTHON_VERSION") ]; then
-	    if [ $(version $python_version) -lt $(version "$MAXIMUM_PYTHON_VERSION") ]; then
+            if [ $(version $python_version) -le $(version "$MAXIMUM_PYTHON_VERSION") ]; then
-		PYTHON=$ppath
+                PYTHON=$ppath
-		break
+                break
-	    fi
+            fi
        fi
    fi
 done
--- a/invokeai/app/api/routers/models.py
+++ b/invokeai/app/api/routers/models.py
@ -90,7 +90,7 @@ async def update_model(
                new_name=info.model_name,
                new_base=info.base_model,
            )
-            logger.info(f"Successfully renamed {base_model}/{model_name}=>{info.base_model}/{info.model_name}")
+            logger.info(f"Successfully renamed {base_model.value}/{model_name}=>{info.base_model}/{info.model_name}")
            # update information to support an update of attributes
            model_name = info.model_name
            base_model = info.base_model
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -12,7 +12,7 @@ from pydantic import BaseModel, Field, validator
 from invokeai.app.invocations.metadata import CoreMetadata
 from invokeai.app.util.step_callback import stable_diffusion_step_callback
-from invokeai.backend.model_management.models.base import ModelType
+from invokeai.backend.model_management.models import ModelType, SilenceWarnings
 from ...backend.model_management.lora import ModelPatcher
 from ...backend.stable_diffusion import PipelineIntermediateState
@ -311,70 +311,71 @@ class TextToLatentsInvocation(BaseInvocation):
    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        noise = context.services.latents.get(self.noise.latents_name)
+        with SilenceWarnings():
            noise = context.services.latents.get(self.noise.latents_name)
-        # Get the source node id (we are invoking the prepared node)
+            # Get the source node id (we are invoking the prepared node)
-        graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
+            graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
-        source_node_id = graph_execution_state.prepared_source_mapping[self.id]
+            source_node_id = graph_execution_state.prepared_source_mapping[self.id]
-        def step_callback(state: PipelineIntermediateState):
+            def step_callback(state: PipelineIntermediateState):
-            self.dispatch_progress(context, source_node_id, state)
+                self.dispatch_progress(context, source_node_id, state)
-        def _lora_loader():
+            def _lora_loader():
-            for lora in self.unet.loras:
+                for lora in self.unet.loras:
-                lora_info = context.services.model_manager.get_model(
+                    lora_info = context.services.model_manager.get_model(
-                    **lora.dict(exclude={"weight"}),
+                        **lora.dict(exclude={"weight"}),
                        context=context,
                    )
                    yield (lora_info.context.model, lora.weight)
                    del lora_info
                return
            unet_info = context.services.model_manager.get_model(
                **self.unet.unet.dict(),
                context=context,
            )
            with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet(
                unet_info.context.model, _lora_loader()
            ), unet_info as unet:
                noise = noise.to(device=unet.device, dtype=unet.dtype)
                scheduler = get_scheduler(
                    context=context,
                    scheduler_info=self.unet.scheduler,
                    scheduler_name=self.scheduler,
                )
                yield (lora_info.context.model, lora.weight)
                del lora_info
            return
-        unet_info = context.services.model_manager.get_model(
+                pipeline = self.create_pipeline(unet, scheduler)
-            **self.unet.unet.dict(),
+                conditioning_data = self.get_conditioning_data(context, scheduler, unet)
            context=context,
        )
        with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet(
            unet_info.context.model, _lora_loader()
        ), unet_info as unet:
            noise = noise.to(device=unet.device, dtype=unet.dtype)
-            scheduler = get_scheduler(
+                control_data = self.prep_control_data(
-                context=context,
+                    model=pipeline,
-                scheduler_info=self.unet.scheduler,
+                    context=context,
-                scheduler_name=self.scheduler,
+                    control_input=self.control,
-            )
+                    latents_shape=noise.shape,
                    # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
                    do_classifier_free_guidance=True,
                    exit_stack=exit_stack,
                )
-            pipeline = self.create_pipeline(unet, scheduler)
+                # TODO: Verify the noise is the right size
-            conditioning_data = self.get_conditioning_data(context, scheduler, unet)
+                result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
                    latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
                    noise=noise,
                    num_inference_steps=self.steps,
                    conditioning_data=conditioning_data,
                    control_data=control_data,  # list[ControlNetData]
                    callback=step_callback,
                )
-            control_data = self.prep_control_data(
+            # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
-                model=pipeline,
+            result_latents = result_latents.to("cpu")
-                context=context,
+            torch.cuda.empty_cache()
                control_input=self.control,
                latents_shape=noise.shape,
                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
                do_classifier_free_guidance=True,
                exit_stack=exit_stack,
            )
-            # TODO: Verify the noise is the right size
+            name = f"{context.graph_execution_state_id}__{self.id}"
-            result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
+            context.services.latents.save(name, result_latents)
-                latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
+            return build_latents_output(latents_name=name, latents=result_latents)
                noise=noise,
                num_inference_steps=self.steps,
                conditioning_data=conditioning_data,
                control_data=control_data,  # list[ControlNetData]
                callback=step_callback,
            )
        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        result_latents = result_latents.to("cpu")
        torch.cuda.empty_cache()
        name = f"{context.graph_execution_state_id}__{self.id}"
        context.services.latents.save(name, result_latents)
        return build_latents_output(latents_name=name, latents=result_latents)
 class LatentsToLatentsInvocation(TextToLatentsInvocation):
@ -402,82 +403,83 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        noise = context.services.latents.get(self.noise.latents_name)
+        with SilenceWarnings():  # this quenches NSFW nag from diffusers
-        latent = context.services.latents.get(self.latents.latents_name)
+            noise = context.services.latents.get(self.noise.latents_name)
            latent = context.services.latents.get(self.latents.latents_name)
-        # Get the source node id (we are invoking the prepared node)
+            # Get the source node id (we are invoking the prepared node)
-        graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
+            graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
-        source_node_id = graph_execution_state.prepared_source_mapping[self.id]
+            source_node_id = graph_execution_state.prepared_source_mapping[self.id]
-        def step_callback(state: PipelineIntermediateState):
+            def step_callback(state: PipelineIntermediateState):
-            self.dispatch_progress(context, source_node_id, state)
+                self.dispatch_progress(context, source_node_id, state)
-        def _lora_loader():
+            def _lora_loader():
-            for lora in self.unet.loras:
+                for lora in self.unet.loras:
-                lora_info = context.services.model_manager.get_model(
+                    lora_info = context.services.model_manager.get_model(
-                    **lora.dict(exclude={"weight"}),
+                        **lora.dict(exclude={"weight"}),
                        context=context,
                    )
                    yield (lora_info.context.model, lora.weight)
                    del lora_info
                return
            unet_info = context.services.model_manager.get_model(
                **self.unet.unet.dict(),
                context=context,
            )
            with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet(
                unet_info.context.model, _lora_loader()
            ), unet_info as unet:
                noise = noise.to(device=unet.device, dtype=unet.dtype)
                latent = latent.to(device=unet.device, dtype=unet.dtype)
                scheduler = get_scheduler(
                    context=context,
                    scheduler_info=self.unet.scheduler,
                    scheduler_name=self.scheduler,
                )
                yield (lora_info.context.model, lora.weight)
                del lora_info
            return
-        unet_info = context.services.model_manager.get_model(
+                pipeline = self.create_pipeline(unet, scheduler)
-            **self.unet.unet.dict(),
+                conditioning_data = self.get_conditioning_data(context, scheduler, unet)
            context=context,
        )
        with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet(
            unet_info.context.model, _lora_loader()
        ), unet_info as unet:
            noise = noise.to(device=unet.device, dtype=unet.dtype)
            latent = latent.to(device=unet.device, dtype=unet.dtype)
-            scheduler = get_scheduler(
+                control_data = self.prep_control_data(
-                context=context,
+                    model=pipeline,
-                scheduler_info=self.unet.scheduler,
+                    context=context,
-                scheduler_name=self.scheduler,
+                    control_input=self.control,
-            )
+                    latents_shape=noise.shape,
                    # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
                    do_classifier_free_guidance=True,
                    exit_stack=exit_stack,
                )
-            pipeline = self.create_pipeline(unet, scheduler)
+                # TODO: Verify the noise is the right size
-            conditioning_data = self.get_conditioning_data(context, scheduler, unet)
+                initial_latents = (
                    latent if self.strength < 1.0 else torch.zeros_like(latent, device=unet.device, dtype=latent.dtype)
                )
-            control_data = self.prep_control_data(
+                timesteps, _ = pipeline.get_img2img_timesteps(
-                model=pipeline,
+                    self.steps,
-                context=context,
+                    self.strength,
-                control_input=self.control,
+                    device=unet.device,
-                latents_shape=noise.shape,
+                )
                # do_classifier_free_guidance=(self.cfg_scale >= 1.0))
                do_classifier_free_guidance=True,
                exit_stack=exit_stack,
            )
-            # TODO: Verify the noise is the right size
+                result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
-            initial_latents = (
+                    latents=initial_latents,
-                latent if self.strength < 1.0 else torch.zeros_like(latent, device=unet.device, dtype=latent.dtype)
+                    timesteps=timesteps,
-            )
+                    noise=noise,
                    num_inference_steps=self.steps,
                    conditioning_data=conditioning_data,
                    control_data=control_data,  # list[ControlNetData]
                    callback=step_callback,
                )
-            timesteps, _ = pipeline.get_img2img_timesteps(
+            # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
-                self.steps,
+            result_latents = result_latents.to("cpu")
-                self.strength,
+            torch.cuda.empty_cache()
                device=unet.device,
            )
-            result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
+            name = f"{context.graph_execution_state_id}__{self.id}"
-                latents=initial_latents,
+            context.services.latents.save(name, result_latents)
                timesteps=timesteps,
                noise=noise,
                num_inference_steps=self.steps,
                conditioning_data=conditioning_data,
                control_data=control_data,  # list[ControlNetData]
                callback=step_callback,
            )
        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        result_latents = result_latents.to("cpu")
        torch.cuda.empty_cache()
        name = f"{context.graph_execution_state_id}__{self.id}"
        context.services.latents.save(name, result_latents)
        return build_latents_output(latents_name=name, latents=result_latents)
@ -490,7 +492,7 @@ class LatentsToImageInvocation(BaseInvocation):
    # Inputs
    latents: Optional[LatentsField] = Field(description="The latents to generate an image from")
    vae: VaeField = Field(default=None, description="Vae submodel")
-    tiled: bool = Field(default=False, description="Decode latents by overlapping tiles(less memory consumption)")
+    tiled: bool = Field(default=False, description="Decode latents by overlaping tiles (less memory consumption)")
    fp32: bool = Field(DEFAULT_PRECISION == "float32", description="Decode in full precision")
    metadata: Optional[CoreMetadata] = Field(
        default=None, description="Optional core metadata to be written to the image"
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -401,7 +401,11 @@ class ModelManager(object):
        base_model: BaseModelType,
        model_type: ModelType,
    ) -> str:
-        return f"{base_model}/{model_type}/{model_name}"
+        # In 3.11, the behavior of (str,enum) when interpolated into a
        # string has changed. The next two lines are defensive.
        base_model = BaseModelType(base_model)
        model_type = ModelType(model_type)
        return f"{base_model.value}/{model_type.value}/{model_name}"
    @classmethod
    def parse_key(cls, model_key: str) -> Tuple[str, BaseModelType, ModelType]:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "InvokeAI"
 description = "An implementation of Stable Diffusion which provides various new features and options to aid the image generation process"
-requires-python = ">=3.9, <3.11"
+requires-python = ">=3.9, <3.12"
 readme = { content-type = "text/markdown", file = "README.md" }
 keywords = ["stable-diffusion", "AI"]
 dynamic = ["version"]
@ -32,16 +32,16 @@ classifiers = [
  'Topic :: Scientific/Engineering :: Image Processing',
 ]
 dependencies = [
-  "accelerate~=0.16",
+  "accelerate~=0.21.0",
  "albumentations",
  "click",
-  "clip_anytorch",          # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
+  "clip_anytorch",  # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
-  "compel==2.0.0",
+  "compel~=2.0.0",
  "controlnet-aux>=0.0.6",
-  "timm==0.6.13",           # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
+  "timm==0.6.13",   # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
  "datasets",
-  "diffusers[torch]~=0.18.1",
+  "diffusers[torch]~=0.18.2",
-  "dnspython==2.2.1",
+  "dnspython~=2.4.0",
  "dynamicprompts",
  "easing-functions",
  "einops",
@ -54,37 +54,37 @@ dependencies = [
  "flask_cors==3.0.10",
  "flask_socketio==5.3.0",
  "flaskwebgui==1.0.3",
  "gfpgan==1.3.8",
  "huggingface-hub>=0.11.1",
-  "invisible-watermark>=0.2.0", # needed to install SDXL base and refiner using their repo_ids
+  "invisible-watermark~=0.2.0", # needed to install SDXL base and refiner using their repo_ids
  "matplotlib",                 # needed for plotting of Penner easing functions
  "mediapipe",                  # needed for "mediapipeface" controlnet model
  "npyscreen",
-  "numpy<1.24",
+  "numpy==1.24.4",
  "omegaconf",
  "opencv-python",
  "picklescan",
  "pillow",
  "prompt-toolkit",
-  "pympler==1.0.1",
+  "pydantic==1.10.10",
  "pympler~=1.0.1",
  "pypatchmatch",
  'pyperclip',
  "pyreadline3",
-  "python-multipart==0.0.6",
+  "python-multipart",
-  "pytorch-lightning==1.7.7",
+  "pytorch-lightning",
  "realesrgan",
-  "requests==2.28.2",
+  "requests~=2.28.2",
  "rich~=13.3",
  "safetensors~=0.3.0",
-  "scikit-image>=0.19",
+  "scikit-image~=0.21.0",
  "send2trash",
-  "test-tube>=0.7.5",
+  "test-tube~=0.7.5",
-  "torch~=2.0.0",
+  "torch~=2.0.1",
-  "torchvision>=0.14.1",
+  "torchvision~=0.15.2",
-  "torchmetrics==0.11.4",
+  "torchmetrics~=1.0.1",
-  "torchsde==0.2.5",
+  "torchsde~=0.2.5",
  "transformers~=4.31.0",
-  "uvicorn[standard]==0.21.1",
+  "uvicorn[standard]~=0.21.1",
  "windows-curses; sys_platform=='win32'",
 ]
--- a/scripts/probe-model.py
+++ b/scripts/probe-model.py
@ -1,8 +1,16 @@
 #!/bin/env python
 import argparse
 import sys
 from pathlib import Path
 from invokeai.backend.model_management.model_probe import ModelProbe
-info = ModelProbe().probe(Path(sys.argv[1]))
+parser = argparse.ArgumentParser(description="Probe model type")
 parser.add_argument(
    "model_path",
    type=Path,
 )
 args = parser.parse_args()
 info = ModelProbe().probe(args.model_path)
 print(info)