diff --git a/installer/templates/invoke.sh.in b/installer/templates/invoke.sh.in index 29e95d0ba0..5c08d7c23b 100644 --- a/installer/templates/invoke.sh.in +++ b/installer/templates/invoke.sh.in @@ -47,11 +47,11 @@ if [ "$0" != "bash" ]; then ;; 3) echo "Starting Textual Inversion:" - exec textual_inversion --gui $@ + exec invokeai-ti --gui $@ ;; 4) echo "Merging Models:" - exec merge_models --gui $@ + exec invokeai-merge --gui $@ ;; 5) echo "Developer Console:" diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index fe56051aa3..8f4dc3961c 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -20,6 +20,7 @@ import os import re import torch +import warnings from pathlib import Path from ldm.invoke.globals import Globals, global_cache_dir from safetensors.torch import load_file @@ -44,6 +45,7 @@ from diffusers import ( PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel, + logging as dlogging, ) from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline @@ -795,8 +797,9 @@ def load_pipeline_from_original_stable_diffusion_ckpt( prediction_type:str=None, extract_ema:bool=True, upcast_attn:bool=False, - vae:AutoencoderKL=None -)->StableDiffusionGeneratorPipeline: + vae:AutoencoderKL=None, + return_generator_pipeline:bool=False, +)->Union[StableDiffusionPipeline,StableDiffusionGeneratorPipeline]: ''' Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml` config file. @@ -823,166 +826,173 @@ def load_pipeline_from_original_stable_diffusion_ckpt( :param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when running stable diffusion 2.1. ''' + + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + verbosity = dlogging.get_verbosity() + dlogging.set_verbosity_error() - checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path) - cache_dir = global_cache_dir('hub') + checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path) + cache_dir = global_cache_dir('hub') + pipeline_class = StableDiffusionGeneratorPipeline if return_generator_pipeline else StableDiffusionPipeline - # Sometimes models don't have the global_step item - if "global_step" in checkpoint: - global_step = checkpoint["global_step"] - else: - print(" | global_step key not found in model") - global_step = None - - # sometimes there is a state_dict key and sometimes not - if 'state_dict' in checkpoint: - checkpoint = checkpoint["state_dict"] - - upcast_attention = False - if original_config_file is None: - key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" - - if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024: - original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v2-inference-v.yaml') - - if global_step == 110000: - # v2.1 needs to upcast attention - upcast_attention = True + # Sometimes models don't have the global_step item + if "global_step" in checkpoint: + global_step = checkpoint["global_step"] else: - original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v1-inference.yaml') + print(" | global_step key not found in model") + global_step = None - original_config = OmegaConf.load(original_config_file) + # sometimes there is a state_dict key and sometimes not + if 'state_dict' in checkpoint: + checkpoint = checkpoint["state_dict"] - if num_in_channels is not None: - original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels + upcast_attention = False + if original_config_file is None: + key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" - if ( - "parameterization" in original_config["model"]["params"] - and original_config["model"]["params"]["parameterization"] == "v" - ): - if prediction_type is None: - # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` - # as it relies on a brittle global step parameter here - prediction_type = "epsilon" if global_step == 875000 else "v_prediction" - if image_size is None: - # NOTE: For stable diffusion 2 base one has to pass `image_size==512` - # as it relies on a brittle global step parameter here - image_size = 512 if global_step == 875000 else 768 - else: - if prediction_type is None: - prediction_type = "epsilon" - if image_size is None: - image_size = 512 + if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024: + original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v2-inference-v.yaml') - num_train_timesteps = original_config.model.params.timesteps - beta_start = original_config.model.params.linear_start - beta_end = original_config.model.params.linear_end + if global_step == 110000: + # v2.1 needs to upcast attention + upcast_attention = True + else: + original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v1-inference.yaml') - scheduler = DDIMScheduler( - beta_end=beta_end, - beta_schedule="scaled_linear", - beta_start=beta_start, - num_train_timesteps=num_train_timesteps, - steps_offset=1, - clip_sample=False, - set_alpha_to_one=False, - prediction_type=prediction_type, - ) - # make sure scheduler works correctly with DDIM - scheduler.register_to_config(clip_sample=False) + original_config = OmegaConf.load(original_config_file) - if scheduler_type == "pndm": - config = dict(scheduler.config) - config["skip_prk_steps"] = True - scheduler = PNDMScheduler.from_config(config) - elif scheduler_type == "lms": - scheduler = LMSDiscreteScheduler.from_config(scheduler.config) - elif scheduler_type == "heun": - scheduler = HeunDiscreteScheduler.from_config(scheduler.config) - elif scheduler_type == "euler": - scheduler = EulerDiscreteScheduler.from_config(scheduler.config) - elif scheduler_type == "euler-ancestral": - scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config) - elif scheduler_type == "dpm": - scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) - elif scheduler_type == "ddim": - scheduler = scheduler - else: - raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + if num_in_channels is not None: + original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels - # Convert the UNet2DConditionModel model. - unet_config = create_unet_diffusers_config(original_config, image_size=image_size) - unet_config["upcast_attention"] = upcast_attention - unet = UNet2DConditionModel(**unet_config) + if ( + "parameterization" in original_config["model"]["params"] + and original_config["model"]["params"]["parameterization"] == "v" + ): + if prediction_type is None: + # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` + # as it relies on a brittle global step parameter here + prediction_type = "epsilon" if global_step == 875000 else "v_prediction" + if image_size is None: + # NOTE: For stable diffusion 2 base one has to pass `image_size==512` + # as it relies on a brittle global step parameter here + image_size = 512 if global_step == 875000 else 768 + else: + if prediction_type is None: + prediction_type = "epsilon" + if image_size is None: + image_size = 512 - converted_unet_checkpoint = convert_ldm_unet_checkpoint( - checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema - ) + num_train_timesteps = original_config.model.params.timesteps + beta_start = original_config.model.params.linear_start + beta_end = original_config.model.params.linear_end - unet.load_state_dict(converted_unet_checkpoint) - - # Convert the VAE model, or use the one passed - if not vae: - print(f' | Using checkpoint model\'s original VAE') - vae_config = create_vae_diffusers_config(original_config, image_size=image_size) - converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) - - vae = AutoencoderKL(**vae_config) - vae.load_state_dict(converted_vae_checkpoint) - else: - print(f' | Using external VAE specified in config') - - # Convert the text model. - model_type = pipeline_type - if model_type is None: - model_type = original_config.model.params.cond_stage_config.target.split(".")[-1] - - if model_type == "FrozenOpenCLIPEmbedder": - text_model = convert_open_clip_checkpoint(checkpoint) - tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", - subfolder="tokenizer", - cache_dir=global_cache_dir('diffusers') - ) - pipe = StableDiffusionGeneratorPipeline( - vae=vae, - text_encoder=text_model, - tokenizer=tokenizer, - unet=unet, - scheduler=scheduler, - safety_checker=None, - feature_extractor=None, - requires_safety_checker=False, + scheduler = DDIMScheduler( + beta_end=beta_end, + beta_schedule="scaled_linear", + beta_start=beta_start, + num_train_timesteps=num_train_timesteps, + steps_offset=1, + clip_sample=False, + set_alpha_to_one=False, + prediction_type=prediction_type, ) - elif model_type == "PaintByExample": - vision_model = convert_paint_by_example_checkpoint(checkpoint) - tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir) - feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir) - pipe = PaintByExamplePipeline( - vae=vae, - image_encoder=vision_model, - unet=unet, - scheduler=scheduler, - safety_checker=None, - feature_extractor=feature_extractor, + # make sure scheduler works correctly with DDIM + scheduler.register_to_config(clip_sample=False) + + if scheduler_type == "pndm": + config = dict(scheduler.config) + config["skip_prk_steps"] = True + scheduler = PNDMScheduler.from_config(config) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "dpm": + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + elif scheduler_type == "ddim": + scheduler = scheduler + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + + # Convert the UNet2DConditionModel model. + unet_config = create_unet_diffusers_config(original_config, image_size=image_size) + unet_config["upcast_attention"] = upcast_attention + unet = UNet2DConditionModel(**unet_config) + + converted_unet_checkpoint = convert_ldm_unet_checkpoint( + checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema ) - elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']: - text_model = convert_ldm_clip_checkpoint(checkpoint) - tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir) - feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir) - pipe = StableDiffusionGeneratorPipeline( - vae=vae, - text_encoder=text_model, - tokenizer=tokenizer, - unet=unet, - scheduler=scheduler, - safety_checker=None, - feature_extractor=feature_extractor, - ) - else: - text_config = create_ldm_bert_config(original_config) - text_model = convert_ldm_bert_checkpoint(checkpoint, text_config) - tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir) - pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler) + + unet.load_state_dict(converted_unet_checkpoint) + + # Convert the VAE model, or use the one passed + if not vae: + print(' | Using checkpoint model\'s original VAE') + vae_config = create_vae_diffusers_config(original_config, image_size=image_size) + converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) + + vae = AutoencoderKL(**vae_config) + vae.load_state_dict(converted_vae_checkpoint) + else: + print(' | Using external VAE specified in config') + + # Convert the text model. + model_type = pipeline_type + if model_type is None: + model_type = original_config.model.params.cond_stage_config.target.split(".")[-1] + + if model_type == "FrozenOpenCLIPEmbedder": + text_model = convert_open_clip_checkpoint(checkpoint) + tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", + subfolder="tokenizer", + cache_dir=global_cache_dir('diffusers') + ) + pipe = pipeline_class( + vae=vae, + text_encoder=text_model, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=None, + feature_extractor=None, + requires_safety_checker=False, + ) + elif model_type == "PaintByExample": + vision_model = convert_paint_by_example_checkpoint(checkpoint) + tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir) + feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir) + pipe = PaintByExamplePipeline( + vae=vae, + image_encoder=vision_model, + unet=unet, + scheduler=scheduler, + safety_checker=None, + feature_extractor=feature_extractor, + ) + elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']: + text_model = convert_ldm_clip_checkpoint(checkpoint) + tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir) + feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir) + pipe = pipeline_class( + vae=vae, + text_encoder=text_model, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=None, + feature_extractor=feature_extractor, + ) + else: + text_config = create_ldm_bert_config(original_config) + text_model = convert_ldm_bert_checkpoint(checkpoint, text_config) + tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir) + pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler) + dlogging.set_verbosity(verbosity) return pipe @@ -1000,6 +1010,7 @@ def convert_ckpt_to_diffuser( checkpoint_path, **kwargs ) + pipe.save_pretrained( dump_path, safe_serialization=is_safetensors_available(), diff --git a/ldm/invoke/merge_diffusers.py b/ldm/invoke/merge_diffusers.py index 00d2599e69..50b1f473f5 100644 --- a/ldm/invoke/merge_diffusers.py +++ b/ldm/invoke/merge_diffusers.py @@ -8,13 +8,14 @@ import argparse import curses import os import sys +import traceback +import warnings from argparse import Namespace from pathlib import Path from typing import List, Union import npyscreen -import warnings -from diffusers import DiffusionPipeline +from diffusers import DiffusionPipeline, logging as dlogging from omegaconf import OmegaConf from ldm.invoke.globals import ( @@ -46,18 +47,24 @@ def merge_diffusion_models( **kwargs - the default DiffusionPipeline.get_config_dict kwargs: cache_dir, resume_download, force_download, proxies, local_files_only, use_auth_token, revision, torch_dtype, device_map """ - pipe = DiffusionPipeline.from_pretrained( - model_ids_or_paths[0], - cache_dir=kwargs.get("cache_dir", global_cache_dir()), - custom_pipeline="checkpoint_merger", - ) - merged_pipe = pipe.merge( - pretrained_model_name_or_path_list=model_ids_or_paths, - alpha=alpha, - interp=interp, - force=force, - **kwargs, - ) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + verbosity = dlogging.get_verbosity() + dlogging.set_verbosity_error() + + pipe = DiffusionPipeline.from_pretrained( + model_ids_or_paths[0], + cache_dir=kwargs.get("cache_dir", global_cache_dir()), + custom_pipeline="checkpoint_merger", + ) + merged_pipe = pipe.merge( + pretrained_model_name_or_path_list=model_ids_or_paths, + alpha=alpha, + interp=interp, + force=force, + **kwargs, + ) + dlogging.set_verbosity(verbosity) return merged_pipe @@ -443,22 +450,5 @@ def main(): ] = cache_dir # because not clear the merge pipeline is honoring cache_dir args.cache_dir = cache_dir - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - try: - if args.front_end: - run_gui(args) - else: - run_cli(args) - print(f'>> Conversion successful.') - except Exception as e: - if str(e).startswith('Not enough space'): - print('** Not enough horizontal space! Try making the window wider, or relaunch with a smaller starting size.') - else: - print(f"** An error occurred while merging the pipelines: {str(e)}") - sys.exit(-1) - except KeyboardInterrupt: - sys.exit(-1) - if __name__ == "__main__": main() diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index ea8bc8d83c..466b2f25b8 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -356,6 +356,7 @@ class ModelManager(object): checkpoint_path = weights, original_config_file = config, vae = vae, + return_generator_pipeline=True, ) return ( pipeline.to(self.device).to(torch.float16 if self.precision == 'float16' else torch.float32),