From 72c519c6ad21bfebae4e25e94655b233f6f47398 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Jul 2023 13:51:47 -0400 Subject: [PATCH 1/6] fix incorrect key construction --- invokeai/backend/model_management/convert_ckpt_to_diffusers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py index 2c62b8b192..5a3228658e 100644 --- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py +++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py @@ -422,7 +422,7 @@ def convert_ldm_unet_checkpoint( ) for key in keys: if key.startswith("model.diffusion_model"): - flat_ema_key = "model_ema." + "".join(key.split(".")[1:]) + flat_ema_key = "model_ema." + ".".join(key.split(".")[1:]) unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) else: if sum(k.startswith("model_ema") for k in keys) > 100: From 2a2d988928e2606e24f56deebdc262e101029d32 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Jul 2023 15:28:39 -0400 Subject: [PATCH 2/6] convert script handles more ckpt variants --- .../convert_ckpt_to_diffusers.py | 45 ++++++++++++------- .../model_management/models/controlnet.py | 10 ++--- .../models/stable_diffusion.py | 1 + 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py index 5a3228658e..d6d61ee71d 100644 --- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py +++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py @@ -422,8 +422,11 @@ def convert_ldm_unet_checkpoint( ) for key in keys: if key.startswith("model.diffusion_model"): - flat_ema_key = "model_ema." + ".".join(key.split(".")[1:]) - unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) + for delimiter in ['','.']: + flat_ema_key = "model_ema." + delimiter.join(key.split(".")[1:]) + if checkpoint.get(flat_ema_key) is not None: + unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) + break else: if sum(k.startswith("model_ema") for k in keys) > 100: logger.warning( @@ -1070,7 +1073,7 @@ def convert_controlnet_checkpoint( extract_ema, use_linear_projection=None, cross_attention_dim=None, - precision: torch.dtype = torch.float32, + precision: torch.dtype = None, ): ctrlnet_config = create_unet_diffusers_config(original_config, image_size=image_size, controlnet=True) ctrlnet_config["upcast_attention"] = upcast_attention @@ -1121,7 +1124,7 @@ def download_from_original_stable_diffusion_ckpt( prediction_type: str = None, model_type: str = None, extract_ema: bool = False, - precision: torch.dtype = torch.float32, + precision: torch.dtype = None, scheduler_type: str = "pndm", num_in_channels: Optional[int] = None, upcast_attention: Optional[bool] = None, @@ -1194,6 +1197,8 @@ def download_from_original_stable_diffusion_ckpt( [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer) to use. If this parameter is `None`, the function will load a new instance of [CLIPTokenizer] by itself, if needed. + precision (`torch.dtype`, *optional*, defauts to `None`): + If not provided the precision will be set to the precision of the original file. return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file. """ @@ -1251,6 +1256,10 @@ def download_from_original_stable_diffusion_ckpt( checkpoint = checkpoint["state_dict"] logger.debug(f"model_type = {model_type}; original_config_file = {original_config_file}") + + precision_probing_key = "model.diffusion_model.input_blocks.0.0.bias" + logger.debug(f"original checkpoint precision == {checkpoint[precision_probing_key].dtype}") + precision = precision or checkpoint[precision_probing_key].dtype if original_config_file is None: key_name_v2_1 = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" @@ -1281,7 +1290,7 @@ def download_from_original_stable_diffusion_ckpt( original_config = OmegaConf.load(original_config_file) if ( model_version == BaseModelType.StableDiffusion2 - and original_config["model"]["params"]["parameterization"] == "v" + and original_config["model"]["params"].get("parameterization") == "v" ): prediction_type = "v_prediction" upcast_attention = True @@ -1447,7 +1456,7 @@ def download_from_original_stable_diffusion_ckpt( if controlnet: pipe = pipeline_class( vae=vae.to(precision), - text_encoder=text_model, + text_encoder=text_model.to(precision), tokenizer=tokenizer, unet=unet.to(precision), scheduler=scheduler, @@ -1459,7 +1468,7 @@ def download_from_original_stable_diffusion_ckpt( else: pipe = pipeline_class( vae=vae.to(precision), - text_encoder=text_model, + text_encoder=text_model.to(precision), tokenizer=tokenizer, unet=unet.to(precision), scheduler=scheduler, @@ -1484,8 +1493,8 @@ def download_from_original_stable_diffusion_ckpt( image_noising_scheduler=image_noising_scheduler, # regular denoising components tokenizer=tokenizer, - text_encoder=text_model, - unet=unet, + text_encoder=text_model.to(precision), + unet=unet.to(precision), scheduler=scheduler, # vae vae=vae, @@ -1560,7 +1569,7 @@ def download_from_original_stable_diffusion_ckpt( if controlnet: pipe = pipeline_class( vae=vae.to(precision), - text_encoder=text_model, + text_encoder=text_model.to(precision), tokenizer=tokenizer, unet=unet.to(precision), controlnet=controlnet, @@ -1571,7 +1580,7 @@ def download_from_original_stable_diffusion_ckpt( else: pipe = pipeline_class( vae=vae.to(precision), - text_encoder=text_model, + text_encoder=text_model.to(precision), tokenizer=tokenizer, unet=unet.to(precision), scheduler=scheduler, @@ -1594,9 +1603,9 @@ def download_from_original_stable_diffusion_ckpt( pipe = StableDiffusionXLPipeline( vae=vae.to(precision), - text_encoder=text_encoder, + text_encoder=text_encoder.to(precision), tokenizer=tokenizer, - text_encoder_2=text_encoder_2, + text_encoder_2=text_encoder_2.to(precision), tokenizer_2=tokenizer_2, unet=unet.to(precision), scheduler=scheduler, @@ -1639,7 +1648,7 @@ def download_controlnet_from_original_ckpt( original_config_file: str, image_size: int = 512, extract_ema: bool = False, - precision: torch.dtype = torch.float32, + precision: torch.dtype = None, num_in_channels: Optional[int] = None, upcast_attention: Optional[bool] = None, device: str = None, @@ -1680,6 +1689,12 @@ def download_controlnet_from_original_ckpt( while "state_dict" in checkpoint: checkpoint = checkpoint["state_dict"] + # use original precision + precision_probing_key = 'input_blocks.0.0.bias' + ckpt_precision = checkpoint[precision_probing_key].dtype + logger.debug(f'original controlnet precision = {ckpt_precision}') + precision = precision or ckpt_precision + original_config = OmegaConf.load(original_config_file) if num_in_channels is not None: @@ -1699,7 +1714,7 @@ def download_controlnet_from_original_ckpt( cross_attention_dim=cross_attention_dim, ) - return controlnet + return controlnet.to(precision) def convert_ldm_vae_to_diffusers(checkpoint, vae_config: DictConfig, image_size: int) -> AutoencoderKL: diff --git a/invokeai/backend/model_management/models/controlnet.py b/invokeai/backend/model_management/models/controlnet.py index e075843a56..ed1e7316dc 100644 --- a/invokeai/backend/model_management/models/controlnet.py +++ b/invokeai/backend/model_management/models/controlnet.py @@ -17,7 +17,7 @@ from .base import ( ModelNotFoundException, ) from invokeai.app.services.config import InvokeAIAppConfig - +import invokeai.backend.util.logging as logger class ControlNetModelFormat(str, Enum): Checkpoint = "checkpoint" @@ -66,7 +66,7 @@ class ControlNetModel(ModelBase): child_type: Optional[SubModelType] = None, ): if child_type is not None: - raise Exception("There is no child models in controlnet model") + raise Exception("There are no child models in controlnet model") model = None for variant in ["fp16", None]: @@ -123,10 +123,7 @@ class ControlNetModel(ModelBase): else: return model_path - -@classmethod def _convert_controlnet_ckpt_and_cache( - cls, model_path: str, output_path: str, base_model: BaseModelType, @@ -140,7 +137,8 @@ def _convert_controlnet_ckpt_and_cache( app_config = InvokeAIAppConfig.get_config() weights = app_config.root_path / model_path output_path = Path(output_path) - + + logger.info(f"Converting {weights} to diffusers format") # return cached version if it exists if output_path.exists(): return output_path diff --git a/invokeai/backend/model_management/models/stable_diffusion.py b/invokeai/backend/model_management/models/stable_diffusion.py index 76b4833f9c..e4396a9582 100644 --- a/invokeai/backend/model_management/models/stable_diffusion.py +++ b/invokeai/backend/model_management/models/stable_diffusion.py @@ -123,6 +123,7 @@ class StableDiffusion1Model(DiffusersModel): return _convert_ckpt_and_cache( version=BaseModelType.StableDiffusion1, model_config=config, + load_safety_checker=False, output_path=output_path, ) else: From 3f9105be5005639f8e5ad7518d4f94f0ac37a3d8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Jul 2023 17:17:45 -0400 Subject: [PATCH 3/6] make convert script respect setting of use_ema in config file --- .../model_management/convert_ckpt_to_diffusers.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py index d6d61ee71d..b371fc96e8 100644 --- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py +++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py @@ -422,11 +422,8 @@ def convert_ldm_unet_checkpoint( ) for key in keys: if key.startswith("model.diffusion_model"): - for delimiter in ['','.']: - flat_ema_key = "model_ema." + delimiter.join(key.split(".")[1:]) - if checkpoint.get(flat_ema_key) is not None: - unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) - break + flat_ema_key = "model_ema." + "".join(key.split(".")[2:]) + unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) else: if sum(k.startswith("model_ema") for k in keys) > 100: logger.warning( @@ -1114,7 +1111,6 @@ def convert_controlnet_checkpoint( return controlnet.to(precision) -# TO DO - PASS PRECISION def download_from_original_stable_diffusion_ckpt( checkpoint_path: str, model_version: BaseModelType, @@ -1288,6 +1284,9 @@ def download_from_original_stable_diffusion_ckpt( original_config_file = BytesIO(requests.get(config_url).content) original_config = OmegaConf.load(original_config_file) + if original_config['model']['params'].get('use_ema') is not None: + extract_ema = original_config['model']['params']['use_ema'] + if ( model_version == BaseModelType.StableDiffusion2 and original_config["model"]["params"].get("parameterization") == "v" From 1de783b1cede6cac409e3599f9204a067b322c2d Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Jul 2023 17:20:26 -0400 Subject: [PATCH 4/6] fix mistake in indexing flat_ema_key --- invokeai/backend/model_management/convert_ckpt_to_diffusers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py index b371fc96e8..4a95d1d980 100644 --- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py +++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py @@ -422,7 +422,7 @@ def convert_ldm_unet_checkpoint( ) for key in keys: if key.startswith("model.diffusion_model"): - flat_ema_key = "model_ema." + "".join(key.split(".")[2:]) + flat_ema_key = "model_ema." + "".join(key.split(".")[1:]) unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) else: if sum(k.startswith("model_ema") for k in keys) > 100: From e82eb0b9fc6a61d52087d4d327a05b476dfe57e5 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Jul 2023 17:30:21 -0400 Subject: [PATCH 5/6] add correct optional annotation to precision arg --- .../backend/model_management/convert_ckpt_to_diffusers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py index 4a95d1d980..8565c1aef7 100644 --- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py +++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py @@ -1070,7 +1070,7 @@ def convert_controlnet_checkpoint( extract_ema, use_linear_projection=None, cross_attention_dim=None, - precision: torch.dtype = None, + precision: Optional[torch.dtype] = None, ): ctrlnet_config = create_unet_diffusers_config(original_config, image_size=image_size, controlnet=True) ctrlnet_config["upcast_attention"] = upcast_attention @@ -1120,7 +1120,7 @@ def download_from_original_stable_diffusion_ckpt( prediction_type: str = None, model_type: str = None, extract_ema: bool = False, - precision: torch.dtype = None, + precision: Optional[torch.dtype] = None, scheduler_type: str = "pndm", num_in_channels: Optional[int] = None, upcast_attention: Optional[bool] = None, @@ -1647,7 +1647,7 @@ def download_controlnet_from_original_ckpt( original_config_file: str, image_size: int = 512, extract_ema: bool = False, - precision: torch.dtype = None, + precision: Optional[torch.dtype] = None, num_in_channels: Optional[int] = None, upcast_attention: Optional[bool] = None, device: str = None, From 348bee89817df4e59c058e349f63e46a71bcc92d Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Jul 2023 17:30:54 -0400 Subject: [PATCH 6/6] blackified --- invokeai/app/invocations/params.py | 6 +++--- .../model_management/convert_ckpt_to_diffusers.py | 12 ++++++------ .../backend/model_management/models/controlnet.py | 4 +++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/params.py b/invokeai/app/invocations/params.py index 1a03baa7cc..513eb8762f 100644 --- a/invokeai/app/invocations/params.py +++ b/invokeai/app/invocations/params.py @@ -6,8 +6,7 @@ from pydantic import Field from invokeai.app.invocations.prompt import PromptOutput -from .baseinvocation import (BaseInvocation, BaseInvocationOutput, - InvocationConfig, InvocationContext) +from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext from .math import FloatOutput, IntOutput # Pass-through parameter nodes - used by subgraphs @@ -68,6 +67,7 @@ class ParamStringInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> StringOutput: return StringOutput(text=self.text) + class ParamPromptInvocation(BaseInvocation): """A prompt input parameter""" @@ -80,4 +80,4 @@ class ParamPromptInvocation(BaseInvocation): } def invoke(self, context: InvocationContext) -> PromptOutput: - return PromptOutput(prompt=self.prompt) \ No newline at end of file + return PromptOutput(prompt=self.prompt) diff --git a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py index 8565c1aef7..3893081b39 100644 --- a/invokeai/backend/model_management/convert_ckpt_to_diffusers.py +++ b/invokeai/backend/model_management/convert_ckpt_to_diffusers.py @@ -1252,7 +1252,7 @@ def download_from_original_stable_diffusion_ckpt( checkpoint = checkpoint["state_dict"] logger.debug(f"model_type = {model_type}; original_config_file = {original_config_file}") - + precision_probing_key = "model.diffusion_model.input_blocks.0.0.bias" logger.debug(f"original checkpoint precision == {checkpoint[precision_probing_key].dtype}") precision = precision or checkpoint[precision_probing_key].dtype @@ -1284,9 +1284,9 @@ def download_from_original_stable_diffusion_ckpt( original_config_file = BytesIO(requests.get(config_url).content) original_config = OmegaConf.load(original_config_file) - if original_config['model']['params'].get('use_ema') is not None: - extract_ema = original_config['model']['params']['use_ema'] - + if original_config["model"]["params"].get("use_ema") is not None: + extract_ema = original_config["model"]["params"]["use_ema"] + if ( model_version == BaseModelType.StableDiffusion2 and original_config["model"]["params"].get("parameterization") == "v" @@ -1689,9 +1689,9 @@ def download_controlnet_from_original_ckpt( checkpoint = checkpoint["state_dict"] # use original precision - precision_probing_key = 'input_blocks.0.0.bias' + precision_probing_key = "input_blocks.0.0.bias" ckpt_precision = checkpoint[precision_probing_key].dtype - logger.debug(f'original controlnet precision = {ckpt_precision}') + logger.debug(f"original controlnet precision = {ckpt_precision}") precision = precision or ckpt_precision original_config = OmegaConf.load(original_config_file) diff --git a/invokeai/backend/model_management/models/controlnet.py b/invokeai/backend/model_management/models/controlnet.py index ed1e7316dc..061be7ae49 100644 --- a/invokeai/backend/model_management/models/controlnet.py +++ b/invokeai/backend/model_management/models/controlnet.py @@ -19,6 +19,7 @@ from .base import ( from invokeai.app.services.config import InvokeAIAppConfig import invokeai.backend.util.logging as logger + class ControlNetModelFormat(str, Enum): Checkpoint = "checkpoint" Diffusers = "diffusers" @@ -123,6 +124,7 @@ class ControlNetModel(ModelBase): else: return model_path + def _convert_controlnet_ckpt_and_cache( model_path: str, output_path: str, @@ -137,7 +139,7 @@ def _convert_controlnet_ckpt_and_cache( app_config = InvokeAIAppConfig.get_config() weights = app_config.root_path / model_path output_path = Path(output_path) - + logger.info(f"Converting {weights} to diffusers format") # return cached version if it exists if output_path.exists():