Support both v2-v and v2-e legacy ckpt models

2024-08-30 20:32:17 +00:00 · 2023-03-09 15:35:17 -05:00
parent 5a633ba811
commit 2ae396640b
10 changed files with 237 additions and 65 deletions
--- a/invokeai/configs/stable-diffusion/v2-inference.yaml
+++ b/invokeai/configs/stable-diffusion/v2-inference.yaml
@ -0,0 +1,67 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
--- a/invokeai/frontend/dist/assets/index-c09cf9ca.js
+++ b/invokeai/frontend/dist/assets/index-c09cf9ca.js
--- a/invokeai/frontend/dist/index.html
+++ b/invokeai/frontend/dist/index.html
@ -5,7 +5,7 @@
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>InvokeAI - A Stable Diffusion Toolkit</title>
    <link rel="shortcut icon" type="icon" href="./assets/favicon-0d253ced.ico" />
-    <script type="module" crossorigin src="./assets/index-720872d1.js"></script>
+    <script type="module" crossorigin src="./assets/index-c09cf9ca.js"></script>
    <link rel="stylesheet" href="./assets/index-14cb2922.css">
  </head>

--- a/invokeai/frontend/dist/locales/en.json
+++ b/invokeai/frontend/dist/locales/en.json
@ -365,7 +365,8 @@
        "convertToDiffusersHelpText6": "Do you wish to convert this model?",
        "convertToDiffusersSaveLocation": "Save Location",
        "v1": "v1",
-        "v2": "v2",
+        "v2_base": "v2 (512px)",
+        "v2_768": "v2 (768px)",
        "inpainting": "v1 Inpainting",
        "customConfig": "Custom Config",
        "pathToCustomConfig": "Path To Custom Config",
--- a/invokeai/frontend/public/locales/en.json
+++ b/invokeai/frontend/public/locales/en.json
@ -365,7 +365,8 @@
        "convertToDiffusersHelpText6": "Do you wish to convert this model?",
        "convertToDiffusersSaveLocation": "Save Location",
        "v1": "v1",
-        "v2": "v2",
+        "v2_base": "v2 (512px)",
+        "v2_768": "v2 (768px)",
        "inpainting": "v1 Inpainting",
        "customConfig": "Custom Config",
        "pathToCustomConfig": "Path To Custom Config",
--- a/invokeai/frontend/src/features/system/components/ModelManager/SearchModels.tsx
+++ b/invokeai/frontend/src/features/system/components/ModelManager/SearchModels.tsx
@ -181,7 +181,8 @@ export default function SearchModels() {

    const configFiles = {
      v1: 'configs/stable-diffusion/v1-inference.yaml',
-      v2: 'configs/stable-diffusion/v2-inference-v.yaml',
+      v2_base: 'configs/stable-diffusion/v2-inference-v.yaml',
+      v2_768: 'configs/stable-diffusion/v2-inference-v.yaml',
      inpainting: 'configs/stable-diffusion/v1-inpainting-inference.yaml',
      custom: pathToConfig,
    };
@ -385,7 +386,8 @@ export default function SearchModels() {
              >
                <Flex gap={4}>
                  <Radio value="v1">{t('modelManager.v1')}</Radio>
-                  <Radio value="v2">{t('modelManager.v2')}</Radio>
+                  <Radio value="v2_base">{t('modelManager.v2_base')}</Radio>
+                  <Radio value="v2_768">{t('modelManager.v2_768')}</Radio>
                  <Radio value="inpainting">
                    {t('modelManager.inpainting')}
                  </Radio>
--- a/invokeai/frontend/stats.html
+++ b/invokeai/frontend/stats.html
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@ -22,7 +22,7 @@ from ..generate import Generate
 from .args import (Args, dream_cmd_from_png, metadata_dumps,
                             metadata_from_png)
 from .generator.diffusers_pipeline import PipelineIntermediateState
-from .globals import Globals
+from .globals import Globals, global_config_dir
 from .image_util import make_grid
 from .log import write_log
 from .model_manager import ModelManager
@ -33,7 +33,6 @@ from ..util import url_attachment_name
 # global used in multiple functions (fix)
 infile = None

-
 def main():
    """Initialize command-line parsers and the diffusion model"""
    global infile
@ -66,6 +65,9 @@ def main():
    Globals.sequential_guidance = args.sequential_guidance
    Globals.ckpt_convert = args.ckpt_convert

+    # run any post-install patches needed
+    run_patches()
+
    print(f">> Internet connectivity is {Globals.internet_available}")

    if not args.conf:
@ -660,7 +662,16 @@ def import_model(model_path: str, gen, opt, completer, convert=False):
    )

    if not imported_name:
-        print("** Import failed or was skipped")
+        if config_file := _pick_configuration_file(completer):
+            imported_name = gen.model_manager.heuristic_import(
+                model_path,
+                model_name=model_name,
+                description=model_desc,
+                convert=convert,
+                model_config_file=config_file,
+            )
+    if not imported_name:
+        print("** Aborting import.")
        return

    if not _verify_load(imported_name, gen):
@ -674,6 +685,48 @@ def import_model(model_path: str, gen, opt, completer, convert=False):
    completer.update_models(gen.model_manager.list_models())
    print(f">> {imported_name} successfully installed")

+def _pick_configuration_file(completer)->Path:
+    print(
+"""
+Please select the type of this model:
+[1] A Stable Diffusion v1.x ckpt/safetensors model
+[2] A Stable Diffusion v1.x inpainting ckpt/safetensors model
+[3] A Stable Diffusion v2.x base model (512 pixels)
+[4] A Stable Diffusion v2.x v-predictive model (768 pixels)
+[5] Other (you will be prompted to enter the config file path)
+[Q] I have no idea! Skip the import.
+""")
+    choices = [
+        global_config_dir() / 'stable-diffusion' / x
+        for x in [
+                'v1-inference.yaml',
+                'v1-inpainting-inference.yaml',
+                'v2-inference.yaml',
+                'v2-inference-v.yaml',
+        ]
+    ]
+
+    ok = False
+    while not ok:
+        try:
+            choice = input('select 0-5, Q > ').strip()
+            if choice.startswith(('q','Q')):
+                return
+            if choice == '5':
+                completer.complete_extensions(('.yaml'))
+                choice = Path(input('Select config file for this model> ').strip()).absolute()
+                completer.complete_extensions(None)
+                ok = choice.exists()
+            else:
+                choice = choices[int(choice)-1]
+                ok = True
+        except (ValueError, IndexError):
+            print(f'{choice} is not a valid choice')
+        except EOFError:
+            return
+    return choice
+
+
 def _verify_load(model_name: str, gen) -> bool:
    print(">> Verifying that new model loads...")
    current_model = gen.model_name
@ -1234,6 +1287,20 @@ def check_internet() -> bool:
    except:
        return False

+# This routine performs any patch-ups needed after installation
+def run_patches():
+    # install ckpt configuration files that may have been added to the
+    # distro after original root directory configuration
+    import invokeai.configs as conf
+    from shutil import copyfile
+    
+    root_configs = Path(global_config_dir(), 'stable-diffusion')
+    repo_configs = Path(conf.__path__[0], 'stable-diffusion')
+    for src in repo_configs.iterdir():
+        dest = root_configs / src.name
+        if not dest.exists():
+            copyfile(src,dest)
+    
 if __name__ == '__main__':
    main()
    
--- a/ldm/invoke/ckpt_to_diffuser.py
+++ b/ldm/invoke/ckpt_to_diffuser.py
@ -862,12 +862,16 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
        if original_config_file is None:
            model_type = ModelManager.probe_model_type(checkpoint)
            
-            if model_type == SDLegacyType.V2:
+            if model_type == SDLegacyType.V2_v:
                original_config_file = global_config_dir() / 'stable-diffusion' / 'v2-inference-v.yaml'
                if global_step == 110000:
                    # v2.1 needs to upcast attention
                    upcast_attention = True
-                    
+                elif model_type == SDLegacyType.V2_e:
+                    original_config_file = (
+                        global_config_dir() / "stable-diffusion" / "v2-inference.yaml"
+                    )
+
            elif model_type == SDLegacyType.V1_INPAINT:
                original_config_file = global_config_dir() / 'stable-diffusion' / 'v1-inpainting-inference.yaml'
                
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@ -47,6 +47,8 @@ class SDLegacyType(Enum):
    V1 = 1
    V1_INPAINT = 2
    V2 = 3
+    V2_e = 4
+    V2_v = 5
    UNKNOWN = 99


@ -724,15 +726,25 @@ class ModelManager(object):
        format. Valid return values include:
        SDLegacyType.V1
        SDLegacyType.V1_INPAINT
-        SDLegacyType.V2
+        SDLegacyType.V2     (V2 prediction type unknown)
+        SDLegacyType.V2_e   (V2 using 'epsilon' prediction type)
+        SDLegacyType.V2_v   (V2 using 'v_prediction' prediction type)
        SDLegacyType.UNKNOWN
        """
-        key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
-        if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024:
-            return SDLegacyType.V2
+        global_step = checkpoint.get('global_step')
+        state_dict = checkpoint.get("state_dict") or checkpoint

        try:
            state_dict = checkpoint.get("state_dict") or checkpoint
+            key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
+            if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
+                if global_step == 220000:
+                    return SDLegacyType.V2_e
+                elif global_step == 110000:
+                    return SDLegacyType.V2_v
+                else:
+                    return SDLegacyType.V2
+            # otherwise we assume a V1 file
            in_channels = state_dict[
                "model.diffusion_model.input_blocks.0.0.weight"
            ].shape[1]
@ -746,12 +758,13 @@ class ModelManager(object):
            return SDLegacyType.UNKNOWN

    def heuristic_import(
-        self,
-        path_url_or_repo: str,
-        convert: bool = False,
-        model_name: str = None,
-        description: str = None,
-        commit_to_conf: Path = None,
+            self,
+            path_url_or_repo: str,
+            convert: bool = False,
+            model_name: str = None,
+            description: str = None,
+            model_config_file: Path = None,
+            commit_to_conf: Path = None,
    ) -> str:
        """
        Accept a string which could be:
@ -849,7 +862,7 @@ class ModelManager(object):

        if model_path.stem in self.config:  # already imported
            print("  | Already imported. Skipping")
-            return
+            return model_path.stem

        # another round of heuristics to guess the correct config file.
        checkpoint = (
@ -857,32 +870,49 @@ class ModelManager(object):
            if model_path.suffix == ".safetensors"
            else torch.load(model_path)
        )
-        model_type = self.probe_model_type(checkpoint)
+        # additional probing needed if no config file provided
+        if model_config_file is None:
+            model_type = self.probe_model_type(checkpoint)
+            if model_type == SDLegacyType.V1:
+                print("  | SD-v1 model detected")
+                model_config_file = Path(
+                    Globals.root, "configs/stable-diffusion/v1-inference.yaml"
+                )
+            elif model_type == SDLegacyType.V1_INPAINT:
+                print("  | SD-v1 inpainting model detected")
+                model_config_file = Path(
+                    Globals.root, "configs/stable-diffusion/v1-inpainting-inference.yaml"
+                )
+            elif model_type == SDLegacyType.V2_v:
+                print(
+                    "  | SD-v2-v model detected"
+                )
+                model_config_file = Path(
+                    Globals.root, "configs/stable-diffusion/v2-inference-v.yaml"
+                )
+            elif model_type == SDLegacyType.V2_e:
+                print(
+                    "  | SD-v2-e model detected"
+                )
+                model_config_file = Path(
+                    Globals.root, "configs/stable-diffusion/v2-inference.yaml"
+                )
+            elif model_type == SDLegacyType.V2:
+                print(
+                    f"** {thing} is a V2 checkpoint file, but its parameterization cannot be determined. Please provide configuration file path."
+                )
+                return
+            else:
+                print(
+                    f"** {thing} is a legacy checkpoint file but not a known Stable Diffusion model. Please provide configuration file path."
+                )
+                return

-        model_config_file = None
-        if model_type == SDLegacyType.V1:
-            print("  | SD-v1 model detected")
-            model_config_file = Path(
-                Globals.root, "configs/stable-diffusion/v1-inference.yaml"
-            )
-        elif model_type == SDLegacyType.V1_INPAINT:
-            print("  | SD-v1 inpainting model detected")
-            model_config_file = Path(
-                Globals.root, "configs/stable-diffusion/v1-inpainting-inference.yaml"
-            )
-        elif model_type == SDLegacyType.V2:
-            print(
-                "  | SD-v2 model detected; model will be converted to diffusers format"
-            )
-            model_config_file = Path(
-                Globals.root, "configs/stable-diffusion/v2-inference-v.yaml"
-            )
+        if model_config_file.name.startswith('v2'):
            convert = True
-        else:
            print(
-                f"** {thing} is a legacy checkpoint file but not in a known Stable Diffusion model. Skipping import"
+                "  | This SD-v2 model will be converted to diffusers format for use"
            )
-            return

        if convert:
            diffuser_path = Path(