this is release candidate 2.3.3-rc1 (#3033)

This includes a number of bug fixes described in the draft release notes. It also incorporates a modified version of the dialog-based invoke.sh script suggested by JoshuaKimsey: https://discord.com/channels/1020123559063990373/1089119602425995304
2024-08-30 20:32:17 +00:00 · 2023-03-27 12:09:56 -04:00
parent 610a1483b7 8f921741a5
commit 77a63e5310
4 changed files with 711 additions and 312 deletions
--- a/docs/installation/050_INSTALLING_MODELS.md
+++ b/docs/installation/050_INSTALLING_MODELS.md
@ -211,6 +211,26 @@ description for the model, whether to make this the default model that
 is loaded at InvokeAI startup time, and whether to replace its
 VAE. Generally the answer to the latter question is "no".

+### Specifying a configuration file for legacy checkpoints
+
+Some checkpoint files come with instructions to use a specific .yaml
+configuration file. For InvokeAI load this file correctly, please put
+the config file in the same directory as the corresponding `.ckpt` or
+`.safetensors` file and make sure the file has the same basename as
+the weights file. Here is an example:
+
+```bash
+wonderful-model-v2.ckpt
+wonderful-model-v2.yaml
+```
+
+Similarly, to use a custom VAE, name the VAE like this:
+
+```bash
+wonderful-model-v2.vae.pt
+```
+
+
 ### Converting legacy models into `diffusers`

 The CLI `!convert_model` will convert a `.safetensors` or `.ckpt`
--- a/installer/templates/invoke.sh.in
+++ b/installer/templates/invoke.sh.in
@ -1,5 +1,8 @@
 #!/bin/bash

+# coauthored by Lincoln Stein, Eugene Brodsky and JoshuaKimsey
+# Copyright 2023, The InvokeAI Development Team
+
 ####
 # This launch script assumes that:
 # 1. it is located in the runtime directory,
@ -18,18 +21,109 @@ cd "$scriptdir"
 . .venv/bin/activate

 export INVOKEAI_ROOT="$scriptdir"
+PARAMS=$@

 # set required env var for torch on mac MPS
 if [ "$(uname -s)" == "Darwin" ]; then
    export PYTORCH_ENABLE_MPS_FALLBACK=1
 fi

-if [ "$0" != "bash" ]; then
+do_choice() {
+    case $1 in
+	1)
+            echo "Generate images with a browser-based interface"
+            clear
+            invokeai --web $PARAMS
+            ;;
+	2)
+            echo "Generate images using a command-line interface"
+            clear
+            invokeai $PARAMS
+            ;;
+	3)
+            echo "Textual inversion training"
+            clear
+            invokeai-ti --gui $PARAMS
+            ;;
+	4)
+            echo "Merge models (diffusers type only)"
+            clear
+            invokeai-merge --gui $PARAMS
+            ;;
+	5)
+            echo "Download and install models"
+            clear
+            invokeai-model-install --root ${INVOKEAI_ROOT}
+            ;;
+	6)
+            echo "Change InvokeAI startup options"
+            clear
+            invokeai-configure --root ${INVOKEAI_ROOT} --skip-sd-weights --skip-support-models
+            ;;
+	7)
+            echo "Re-run the configure script to fix a broken install"
+            clear
+            invokeai-configure --root ${INVOKEAI_ROOT} --yes --default_only
+            ;;
+	8)
+            echo "Open the developer console"
+            clear
+            file_name=$(basename "${BASH_SOURCE[0]}")
+            bash --init-file "$file_name"
+            ;;
+	9)
+            echo "Update InvokeAI"
+            clear
+            invokeai-update
+            ;;
+	10)
+            echo "Command-line help"
+            clear
+            invokeai --help
+            ;;
+	*)
+            echo "Exiting..."
+            exit
+            ;;
+    esac
+    clear
+}
+    
+do_dialog() {
+    while true
+    do
+	options=(
+	    1 "Generate images with a browser-based interface"
+	    2 "Generate images using a command-line interface"
+	    3 "Textual inversion training"
+	    4 "Merge models (diffusers type only)"
+	    5 "Download and install models"
+	    6 "Change InvokeAI startup options"
+	    7 "Re-run the configure script to fix a broken install"
+	    8 "Open the developer console"
+	    9 "Update InvokeAI"
+	    10 "Command-line help")
+	
+        choice=$(dialog --clear \
+                        --backtitle "InvokeAI" \
+                        --title "What would you like to run?" \
+                        --menu "Select an option:" \
+                        0 0 0 \
+                        "${options[@]}" \
+                        2>&1 >/dev/tty) || clear
+	do_choice "$choice"
+    done
+    clear
+}
+
+do_line_input() {
+    echo " ** For a more attractive experience, please install the 'dialog' utility. **"
+    echo ""
    while true
    do
 	echo "Do you want to generate images using the"
-    echo "1. command-line interface"
-    echo "2. browser-based UI"
+	echo "1. browser-based UI"
+	echo "2. command-line interface"
 	echo "3. run textual inversion training"
 	echo "4. merge models (diffusers type only)"
 	echo "5. download and install models"
@ -40,56 +134,22 @@ if [ "$0" != "bash" ]; then
 	echo "10. command-line help"
 	echo "Q - Quit"
 	echo ""
-    read -p "Please enter 1-10, Q: [2] " yn
-    choice=${yn:='2'}
-    case $choice in
-        1)
-            echo "Starting the InvokeAI command-line..."
-            invokeai $@
-            ;;
-        2)
-            echo "Starting the InvokeAI browser-based UI..."
-            invokeai --web $@
-            ;;
-        3)
-            echo "Starting Textual Inversion:"
-            invokeai-ti --gui $@
-            ;;
-        4)
-            echo "Merging Models:"
-            invokeai-merge --gui $@
-            ;;
-        5)
-            invokeai-model-install --root ${INVOKEAI_ROOT}
-            ;;
-        6)
-            invokeai-configure --root ${INVOKEAI_ROOT} --skip-sd-weights --skip-support-models
-            ;;
-        7)
-            invokeai-configure --root ${INVOKEAI_ROOT} --yes --default_only
-            ;;
-        8)
-            echo "Developer Console:"
-            file_name=$(basename "${BASH_SOURCE[0]}")
-            bash --init-file "$file_name"
-            ;;
-        9)
-            echo "Update:"
-            invokeai-update
-            ;;
-        10)
-            invokeai --help
-            ;;
-        [qQ])
-            exit 0
-            ;;
-        *)
-            echo "Invalid selection"
-            exit;;
-    esac
+	read -p "Please enter 1-10, Q: [1] " yn
+	choice=${yn:='1'}
+	do_choice $choice
    done
+}
+
+if [ "$0" != "bash" ]; then
+    # Dialog seems to be a standard installtion for most Linux distros, but this checks to ensure it is present regardless
+    if command -v dialog &> /dev/null ; then
+	do_dialog
+    else
+	do_line_input
+    fi
 else # in developer console
    python --version
    echo "Press ^D to exit"
    export PS1="(InvokeAI) \u@\h \w> "
 fi
+
--- a/ldm/invoke/ckpt_to_diffuser.py
+++ b/ldm/invoke/ckpt_to_diffuser.py
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@ -19,7 +19,7 @@ import warnings
 from enum import Enum
 from pathlib import Path
 from shutil import move, rmtree
-from typing import Any, Optional, Union, Callable
+from typing import Any, Callable, Optional, Union

 import safetensors
 import safetensors.torch
@ -35,12 +35,7 @@ from picklescan.scanner import scan_file_path
 from ldm.invoke.devices import CPU_DEVICE
 from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline
 from ldm.invoke.globals import Globals, global_cache_dir
-from ldm.util import (
-    ask_user,
-    download_with_resume,
-    instantiate_from_config,
-    url_attachment_name,
-)
+from ldm.util import ask_user, download_with_resume, instantiate_from_config, url_attachment_name


 class SDLegacyType(Enum):
@ -384,15 +379,16 @@ class ModelManager(object):
        if not os.path.isabs(weights):
            weights = os.path.normpath(os.path.join(Globals.root, weights))

+        # check whether this is a v2 file and force conversion
+        convert = Globals.ckpt_convert or self.is_v2_config(config)
+
        # if converting automatically to diffusers, then we do the conversion and return
        # a diffusers pipeline
-        if Globals.ckpt_convert:
+        if convert:
            print(
                f">> Converting legacy checkpoint {model_name} into a diffusers model..."
            )
-            from ldm.invoke.ckpt_to_diffuser import (
-                load_pipeline_from_original_stable_diffusion_ckpt,
-            )
+            from ldm.invoke.ckpt_to_diffuser import load_pipeline_from_original_stable_diffusion_ckpt

            self.offload_model(self.current_model)
            if vae_config := self._choose_diffusers_vae(model_name):
@ -547,6 +543,15 @@ class ModelManager(object):

        return pipeline, width, height, model_hash

+    def is_v2_config(self, config: Path) -> bool:
+        try:
+            mconfig = OmegaConf.load(config)
+            return (
+                mconfig["model"]["params"]["unet_config"]["params"]["context_dim"] > 768
+            )
+        except:
+            return False
+
    def model_name_or_path(self, model_name: Union[str, DictConfig]) -> str | Path:
        if isinstance(model_name, DictConfig) or isinstance(model_name, dict):
            mconfig = model_name
@ -724,7 +729,7 @@ class ModelManager(object):
        SDLegacyType.V2_v   (V2 using 'v_prediction' prediction type)
        SDLegacyType.UNKNOWN
        """
-        global_step = checkpoint.get('global_step')
+        global_step = checkpoint.get("global_step")
        state_dict = checkpoint.get("state_dict") or checkpoint

        try:
@ -871,6 +876,12 @@ class ModelManager(object):
            checkpoint = safetensors.torch.load_file(model_path)
        # additional probing needed if no config file provided
        if model_config_file is None:
+            # Is there a like-named .yaml file in the same directory as the
+            # weights file? If so, we treat this as our model
+            if model_path.with_suffix(".yaml").exists():
+                model_config_file = model_path.with_suffix(".yaml")
+                print(f"   | Using config file {model_config_file.name}")
+            else:
                model_type = self.probe_model_type(checkpoint)
                if model_type == SDLegacyType.V1:
                    print("   | SD-v1 model detected")
@ -880,19 +891,16 @@ class ModelManager(object):
                elif model_type == SDLegacyType.V1_INPAINT:
                    print("   | SD-v1 inpainting model detected")
                    model_config_file = Path(
-                    Globals.root, "configs/stable-diffusion/v1-inpainting-inference.yaml"
+                        Globals.root,
+                        "configs/stable-diffusion/v1-inpainting-inference.yaml",
                    )
                elif model_type == SDLegacyType.V2_v:
-                print(
-                    "  | SD-v2-v model detected"
-                )
+                    print("   | SD-v2-v model detected")
                    model_config_file = Path(
                        Globals.root, "configs/stable-diffusion/v2-inference-v.yaml"
                    )
                elif model_type == SDLegacyType.V2_e:
-                print(
-                    "  | SD-v2-e model detected"
-                )
+                    print("   | SD-v2-e model detected")
                    model_config_file = Path(
                        Globals.root, "configs/stable-diffusion/v2-inference.yaml"
                    )
@ -910,11 +918,17 @@ class ModelManager(object):
            if not model_config_file:
                return

-        if model_config_file.name.startswith('v2'):
+        if self.is_v2_config(model_config_file):
            convert = True
-            print(
-                "   | This SD-v2 model will be converted to diffusers format for use"
-            )
+            print("   | This SD-v2 model will be converted to diffusers format for use")
+
+        # look for a custom vae
+        vae_path = None
+        for suffix in ["pt", "ckpt", "safetensors"]:
+            if (model_path.with_suffix(f".vae.{suffix}")).exists():
+                vae_path = model_path.with_suffix(f".vae.{suffix}")
+                print(f"   | Using VAE file {vae_path.name}")
+        vae = None if vae_path else dict(repo_id="stabilityai/sd-vae-ft-mse")

        if convert:
            diffuser_path = Path(
@ -923,7 +937,8 @@ class ModelManager(object):
            model_name = self.convert_and_import(
                model_path,
                diffusers_path=diffuser_path,
-                vae=dict(repo_id="stabilityai/sd-vae-ft-mse"),
+                vae=vae,
+                vae_path=vae_path,
                model_name=model_name,
                model_description=description,
                original_config_file=model_config_file,
@ -941,7 +956,8 @@ class ModelManager(object):
                model_name=model_name,
                model_description=description,
                vae=str(
-                    Path(
+                    vae_path
+                    or Path(
                        Globals.root,
                        "models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt",
                    )
@ -958,7 +974,8 @@ class ModelManager(object):
        diffusers_path: Path,
        model_name=None,
        model_description=None,
-            vae=None,
+        vae: dict = None,
+        vae_path: Path = None,
        original_config_file: Path = None,
        commit_to_conf: Path = None,
        scan_needed: bool = True,
@ -975,7 +992,7 @@ class ModelManager(object):

        new_config = None

-        from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser
+        from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffusers

        if diffusers_path.exists():
            print(
@ -990,12 +1007,13 @@ class ModelManager(object):
            # By passing the specified VAE to the conversion function, the autoencoder
            # will be built into the model rather than tacked on afterward via the config file
            vae_model = self._load_vae(vae) if vae else None
-            convert_ckpt_to_diffuser(
+            convert_ckpt_to_diffusers(
                ckpt_path,
                diffusers_path,
                extract_ema=True,
                original_config_file=original_config_file,
                vae=vae_model,
+                vae_path=str(vae_path) if vae_path else None,
                scan_needed=scan_needed,
            )
            print(
@ -1048,7 +1066,7 @@ class ModelManager(object):
        # In the event that the original entry is using a custom ckpt VAE, we try to
        # map that VAE onto a diffuser VAE using a hard-coded dictionary.
        # I would prefer to do this differently: We load the ckpt model into memory, swap the
-        # VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
+        # VAE in memory, and then pass that to convert_ckpt_to_diffusers() so that the swapped
        # VAE is built into the model. However, when I tried this I got obscure key errors.
        if vae:
            return vae
@ -1134,14 +1152,14 @@ class ModelManager(object):
        legacy_locations = [
            Path(
                models_dir,
-                "CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
+                "CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker",
            ),
            Path("bert-base-uncased/models--bert-base-uncased"),
            Path(
                "openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
            ),
        ]
-        legacy_locations.extend(list(global_cache_dir("diffusers").glob('*')))
+        legacy_locations.extend(list(global_cache_dir("diffusers").glob("*")))
        legacy_layout = False
        for model in legacy_locations:
            legacy_layout = legacy_layout or model.exists()