Merge branch 'main' into bugfix/use-cu117-wheel

2024-08-30 20:32:17 +00:00 · 2023-02-04 09:43:52 -05:00
parent 6bf73a0cf9 f76d57637e
commit 61c3886843
4 changed files with 183 additions and 181 deletions
--- a/installer/templates/invoke.sh.in
+++ b/installer/templates/invoke.sh.in
@ -47,11 +47,11 @@ if [ "$0" != "bash" ]; then
            ;;
        3)
            echo "Starting Textual Inversion:"
-            exec textual_inversion --gui $@
+            exec invokeai-ti --gui $@
            ;;
        4)
            echo "Merging Models:"
-            exec merge_models --gui $@
+            exec invokeai-merge --gui $@
            ;;
        5)
            echo "Developer Console:"
--- a/ldm/invoke/ckpt_to_diffuser.py
+++ b/ldm/invoke/ckpt_to_diffuser.py
@ -20,6 +20,7 @@
 import os
 import re
 import torch
 import warnings
 from pathlib import Path
 from ldm.invoke.globals import Globals, global_cache_dir
 from safetensors.torch import load_file
@ -44,6 +45,7 @@ from diffusers import (
    PNDMScheduler,
    StableDiffusionPipeline,
    UNet2DConditionModel,
    logging as dlogging,
 )
 from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
 from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline
@ -795,8 +797,9 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
        prediction_type:str=None,
        extract_ema:bool=True,
        upcast_attn:bool=False,
-        vae:AutoencoderKL=None
+        vae:AutoencoderKL=None,
-)->StableDiffusionGeneratorPipeline:
+        return_generator_pipeline:bool=False,
 )->Union[StableDiffusionPipeline,StableDiffusionGeneratorPipeline]:
    '''
    Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
    config file.
@ -823,166 +826,173 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
    :param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
    running stable diffusion 2.1.
    '''
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        verbosity = dlogging.get_verbosity()
        dlogging.set_verbosity_error()
-    checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
+        checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
-    cache_dir = global_cache_dir('hub')
+        cache_dir = global_cache_dir('hub')
        pipeline_class = StableDiffusionGeneratorPipeline if return_generator_pipeline else StableDiffusionPipeline
-    # Sometimes models don't have the global_step item
+        # Sometimes models don't have the global_step item
-    if "global_step" in checkpoint:
+        if "global_step" in checkpoint:
-        global_step = checkpoint["global_step"]
+            global_step = checkpoint["global_step"]
    else:
        print("  | global_step key not found in model")
        global_step = None
    # sometimes there is a state_dict key and sometimes not
    if 'state_dict' in checkpoint:
        checkpoint = checkpoint["state_dict"]
    upcast_attention = False
    if original_config_file is None:
        key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
        if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024:
            original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v2-inference-v.yaml')
            if global_step == 110000:
                # v2.1 needs to upcast attention
                upcast_attention = True
        else:
-            original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v1-inference.yaml')
+            print("  | global_step key not found in model")
            global_step = None
-    original_config = OmegaConf.load(original_config_file)
+        # sometimes there is a state_dict key and sometimes not
        if 'state_dict' in checkpoint:
            checkpoint = checkpoint["state_dict"]
-    if num_in_channels is not None:
+        upcast_attention = False
-        original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
+        if original_config_file is None:
            key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
-    if (
+            if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024:
-        "parameterization" in original_config["model"]["params"]
+                original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v2-inference-v.yaml')
        and original_config["model"]["params"]["parameterization"] == "v"
    ):
        if prediction_type is None:
            # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"`
            # as it relies on a brittle global step parameter here
            prediction_type = "epsilon" if global_step == 875000 else "v_prediction"
        if image_size is None:
            # NOTE: For stable diffusion 2 base one has to pass `image_size==512`
            # as it relies on a brittle global step parameter here
            image_size = 512 if global_step == 875000 else 768
    else:
        if prediction_type is None:
            prediction_type = "epsilon"
        if image_size is None:
            image_size = 512
-    num_train_timesteps = original_config.model.params.timesteps
+                if global_step == 110000:
-    beta_start = original_config.model.params.linear_start
+                    # v2.1 needs to upcast attention
-    beta_end = original_config.model.params.linear_end
+                    upcast_attention = True
            else:
                original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v1-inference.yaml')
-    scheduler = DDIMScheduler(
+        original_config = OmegaConf.load(original_config_file)
        beta_end=beta_end,
        beta_schedule="scaled_linear",
        beta_start=beta_start,
        num_train_timesteps=num_train_timesteps,
        steps_offset=1,
        clip_sample=False,
        set_alpha_to_one=False,
        prediction_type=prediction_type,
    )
    # make sure scheduler works correctly with DDIM
    scheduler.register_to_config(clip_sample=False)
-    if scheduler_type == "pndm":
+        if num_in_channels is not None:
-        config = dict(scheduler.config)
+            original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
        config["skip_prk_steps"] = True
        scheduler = PNDMScheduler.from_config(config)
    elif scheduler_type == "lms":
        scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
    elif scheduler_type == "heun":
        scheduler = HeunDiscreteScheduler.from_config(scheduler.config)
    elif scheduler_type == "euler":
        scheduler = EulerDiscreteScheduler.from_config(scheduler.config)
    elif scheduler_type == "euler-ancestral":
        scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config)
    elif scheduler_type == "dpm":
        scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config)
    elif scheduler_type == "ddim":
        scheduler = scheduler
    else:
        raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
-    # Convert the UNet2DConditionModel model.
+        if (
-    unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
+            "parameterization" in original_config["model"]["params"]
-    unet_config["upcast_attention"] = upcast_attention
+            and original_config["model"]["params"]["parameterization"] == "v"
-    unet = UNet2DConditionModel(**unet_config)
+        ):
            if prediction_type is None:
                # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"`
                # as it relies on a brittle global step parameter here
                prediction_type = "epsilon" if global_step == 875000 else "v_prediction"
            if image_size is None:
                # NOTE: For stable diffusion 2 base one has to pass `image_size==512`
                # as it relies on a brittle global step parameter here
                image_size = 512 if global_step == 875000 else 768
        else:
            if prediction_type is None:
                prediction_type = "epsilon"
            if image_size is None:
                image_size = 512
-    converted_unet_checkpoint = convert_ldm_unet_checkpoint(
+        num_train_timesteps = original_config.model.params.timesteps
-        checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
+        beta_start = original_config.model.params.linear_start
-    )
+        beta_end = original_config.model.params.linear_end
-    unet.load_state_dict(converted_unet_checkpoint)
+        scheduler = DDIMScheduler(
-
+            beta_end=beta_end,
-    # Convert the VAE model, or use the one passed
+            beta_schedule="scaled_linear",
-    if not vae:
+            beta_start=beta_start,
-        print(f'  | Using checkpoint model\'s original VAE')
+            num_train_timesteps=num_train_timesteps,
-        vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
+            steps_offset=1,
-        converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
+            clip_sample=False,
-
+            set_alpha_to_one=False,
-        vae = AutoencoderKL(**vae_config)
+            prediction_type=prediction_type,
        vae.load_state_dict(converted_vae_checkpoint)
    else:
        print(f'  | Using external VAE specified in config')
    # Convert the text model.
    model_type = pipeline_type
    if model_type is None:
        model_type = original_config.model.params.cond_stage_config.target.split(".")[-1]
    if model_type == "FrozenOpenCLIPEmbedder":
        text_model = convert_open_clip_checkpoint(checkpoint)
        tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2",
                                                  subfolder="tokenizer",
                                                  cache_dir=global_cache_dir('diffusers')
                                                  )
        pipe = StableDiffusionGeneratorPipeline(
            vae=vae,
            text_encoder=text_model,
            tokenizer=tokenizer,
            unet=unet,
            scheduler=scheduler,
            safety_checker=None,
            feature_extractor=None,
            requires_safety_checker=False,
        )
-    elif model_type == "PaintByExample":
+        # make sure scheduler works correctly with DDIM
-        vision_model = convert_paint_by_example_checkpoint(checkpoint)
+        scheduler.register_to_config(clip_sample=False)
-        tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
+
-        feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
+        if scheduler_type == "pndm":
-        pipe = PaintByExamplePipeline(
+            config = dict(scheduler.config)
-            vae=vae,
+            config["skip_prk_steps"] = True
-            image_encoder=vision_model,
+            scheduler = PNDMScheduler.from_config(config)
-            unet=unet,
+        elif scheduler_type == "lms":
-            scheduler=scheduler,
+            scheduler = LMSDiscreteScheduler.from_config(scheduler.config)
-            safety_checker=None,
+        elif scheduler_type == "heun":
-            feature_extractor=feature_extractor,
+            scheduler = HeunDiscreteScheduler.from_config(scheduler.config)
        elif scheduler_type == "euler":
            scheduler = EulerDiscreteScheduler.from_config(scheduler.config)
        elif scheduler_type == "euler-ancestral":
            scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config)
        elif scheduler_type == "dpm":
            scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config)
        elif scheduler_type == "ddim":
            scheduler = scheduler
        else:
            raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!")
        # Convert the UNet2DConditionModel model.
        unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
        unet_config["upcast_attention"] = upcast_attention
        unet = UNet2DConditionModel(**unet_config)
        converted_unet_checkpoint = convert_ldm_unet_checkpoint(
            checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
        )
-    elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
+
-        text_model = convert_ldm_clip_checkpoint(checkpoint)
+        unet.load_state_dict(converted_unet_checkpoint)
-        tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
+
-        feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
+        # Convert the VAE model, or use the one passed
-        pipe = StableDiffusionGeneratorPipeline(
+        if not vae:
-            vae=vae,
+            print('  | Using checkpoint model\'s original VAE')
-            text_encoder=text_model,
+            vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
-            tokenizer=tokenizer,
+            converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
-            unet=unet,
+
-            scheduler=scheduler,
+            vae = AutoencoderKL(**vae_config)
-            safety_checker=None,
+            vae.load_state_dict(converted_vae_checkpoint)
-            feature_extractor=feature_extractor,
+        else:
-        )
+            print('  | Using external VAE specified in config')
-    else:
+
-        text_config = create_ldm_bert_config(original_config)
+        # Convert the text model.
-        text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
+        model_type = pipeline_type
-        tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
+        if model_type is None:
-        pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
+            model_type = original_config.model.params.cond_stage_config.target.split(".")[-1]
        if model_type == "FrozenOpenCLIPEmbedder":
            text_model = convert_open_clip_checkpoint(checkpoint)
            tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2",
                                                      subfolder="tokenizer",
                                                      cache_dir=global_cache_dir('diffusers')
                                                      )
            pipe = pipeline_class(
                vae=vae,
                text_encoder=text_model,
                tokenizer=tokenizer,
                unet=unet,
                scheduler=scheduler,
                safety_checker=None,
                feature_extractor=None,
                requires_safety_checker=False,
            )
        elif model_type == "PaintByExample":
            vision_model = convert_paint_by_example_checkpoint(checkpoint)
            tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
            feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
            pipe = PaintByExamplePipeline(
                vae=vae,
                image_encoder=vision_model,
                unet=unet,
                scheduler=scheduler,
                safety_checker=None,
                feature_extractor=feature_extractor,
            )
        elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
            text_model = convert_ldm_clip_checkpoint(checkpoint)
            tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
            feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
            pipe = pipeline_class(
                vae=vae,
                text_encoder=text_model,
                tokenizer=tokenizer,
                unet=unet,
                scheduler=scheduler,
                safety_checker=None,
                feature_extractor=feature_extractor,
            )
        else:
            text_config = create_ldm_bert_config(original_config)
            text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
            tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
            pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
    dlogging.set_verbosity(verbosity)
    return pipe
@ -1000,6 +1010,7 @@ def convert_ckpt_to_diffuser(
        checkpoint_path,
        **kwargs
    )
    pipe.save_pretrained(
        dump_path,
        safe_serialization=is_safetensors_available(),
--- a/ldm/invoke/merge_diffusers.py
+++ b/ldm/invoke/merge_diffusers.py
@ -8,13 +8,14 @@ import argparse
 import curses
 import os
 import sys
 import traceback
 import warnings
 from argparse import Namespace
 from pathlib import Path
 from typing import List, Union
 import npyscreen
-import warnings
+from diffusers import DiffusionPipeline, logging as dlogging
 from diffusers import DiffusionPipeline
 from omegaconf import OmegaConf
 from ldm.invoke.globals import (
@ -46,18 +47,24 @@ def merge_diffusion_models(
    **kwargs - the default DiffusionPipeline.get_config_dict kwargs:
         cache_dir, resume_download, force_download, proxies, local_files_only, use_auth_token, revision, torch_dtype, device_map
    """
-    pipe = DiffusionPipeline.from_pretrained(
+    with warnings.catch_warnings():
-        model_ids_or_paths[0],
+        warnings.simplefilter('ignore')
-        cache_dir=kwargs.get("cache_dir", global_cache_dir()),
+        verbosity = dlogging.get_verbosity()
-        custom_pipeline="checkpoint_merger",
+        dlogging.set_verbosity_error()
-    )
+        
-    merged_pipe = pipe.merge(
+        pipe = DiffusionPipeline.from_pretrained(
-        pretrained_model_name_or_path_list=model_ids_or_paths,
+            model_ids_or_paths[0],
-        alpha=alpha,
+            cache_dir=kwargs.get("cache_dir", global_cache_dir()),
-        interp=interp,
+            custom_pipeline="checkpoint_merger",
-        force=force,
+        )
-        **kwargs,
+        merged_pipe = pipe.merge(
-    )
+            pretrained_model_name_or_path_list=model_ids_or_paths,
            alpha=alpha,
            interp=interp,
            force=force,
            **kwargs,
        )
        dlogging.set_verbosity(verbosity)
    return merged_pipe
@ -443,22 +450,5 @@ def main():
    ] = cache_dir  # because not clear the merge pipeline is honoring cache_dir
    args.cache_dir = cache_dir
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        try:
            if args.front_end:
                run_gui(args)
            else:
                run_cli(args)
            print(f'>> Conversion successful.')
        except Exception as e:
            if str(e).startswith('Not enough space'):
                print('** Not enough horizontal space! Try making the window wider, or relaunch with a smaller starting size.')
            else:
                print(f"** An error occurred while merging the pipelines: {str(e)}")
            sys.exit(-1)
        except KeyboardInterrupt:
            sys.exit(-1)
 if __name__ == "__main__":
    main()
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@ -356,6 +356,7 @@ class ModelManager(object):
                checkpoint_path = weights,
                original_config_file = config,
                vae = vae,
                return_generator_pipeline=True,
            )
            return (
                pipeline.to(self.device).to(torch.float16 if self.precision == 'float16' else torch.float32),