mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Merge branch 'main' into dev/installer
This commit is contained in:
commit
2202288eb2
@ -44,11 +44,13 @@ def main():
|
||||
print('--max_loaded_models must be >= 1; using 1')
|
||||
args.max_loaded_models = 1
|
||||
|
||||
# alert - setting a global here
|
||||
# alert - setting a few globals here
|
||||
Globals.try_patchmatch = args.patchmatch
|
||||
Globals.always_use_cpu = args.always_use_cpu
|
||||
Globals.internet_available = args.internet_available and check_internet()
|
||||
Globals.disable_xformers = not args.xformers
|
||||
Globals.ckpt_convert = args.ckpt_convert
|
||||
|
||||
print(f'>> Internet connectivity is {Globals.internet_available}')
|
||||
|
||||
if not args.conf:
|
||||
@ -717,11 +719,16 @@ def optimize_model(model_name_or_path:str, gen, opt, completer):
|
||||
print(f'** {model_name_or_path} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.')
|
||||
return
|
||||
|
||||
vae = None
|
||||
if input('Replace this model\'s VAE with "stabilityai/sd-vae-ft-mse"? [n] ').strip() in ('y','Y'):
|
||||
vae = dict(repo_id='stabilityai/sd-vae-ft-mse')
|
||||
|
||||
new_config = gen.model_manager.convert_and_import(
|
||||
ckpt_path,
|
||||
diffuser_path,
|
||||
model_name=model_name,
|
||||
model_description=model_description,
|
||||
vae = vae,
|
||||
commit_to_conf=opt.conf,
|
||||
)
|
||||
if not new_config:
|
||||
|
@ -503,6 +503,13 @@ class Args(object):
|
||||
help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}',
|
||||
default='auto',
|
||||
)
|
||||
model_group.add_argument(
|
||||
'--ckpt_convert',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
dest='ckpt_convert',
|
||||
default=False,
|
||||
help='Load legacy ckpt files as diffusers. Pass --no-ckpt-convert to inhibit this behavior',
|
||||
)
|
||||
model_group.add_argument(
|
||||
'--internet',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
|
@ -23,6 +23,7 @@ import torch
|
||||
from pathlib import Path
|
||||
from ldm.invoke.globals import Globals, global_cache_dir
|
||||
from safetensors.torch import load_file
|
||||
from typing import Union
|
||||
|
||||
try:
|
||||
from omegaconf import OmegaConf
|
||||
@ -46,9 +47,11 @@ from diffusers import (
|
||||
)
|
||||
from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
|
||||
from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline
|
||||
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
|
||||
from diffusers.utils import is_safetensors_available
|
||||
from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextModel, CLIPTokenizer, CLIPVisionConfig
|
||||
|
||||
from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline
|
||||
|
||||
def shave_segments(path, n_shave_prefix_segments=1):
|
||||
"""
|
||||
Removes segments. Positive values shave the first segments, negative shave the last segments.
|
||||
@ -318,11 +321,10 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False
|
||||
unet_key = "model.diffusion_model."
|
||||
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
|
||||
if sum(k.startswith("model_ema") for k in keys) > 100:
|
||||
print(f"Checkpoint {path} has both EMA and non-EMA weights.")
|
||||
print(f" | Checkpoint {path} has both EMA and non-EMA weights.")
|
||||
if extract_ema:
|
||||
print(
|
||||
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
|
||||
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
|
||||
' | Extracting EMA weights (usually better for inference)'
|
||||
)
|
||||
for key in keys:
|
||||
if key.startswith("model.diffusion_model"):
|
||||
@ -330,8 +332,7 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False
|
||||
unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
|
||||
else:
|
||||
print(
|
||||
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
|
||||
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
|
||||
' | Extracting only the non-EMA weights (usually better for fine-tuning)'
|
||||
)
|
||||
|
||||
for key in keys:
|
||||
@ -784,17 +785,44 @@ def convert_open_clip_checkpoint(checkpoint):
|
||||
|
||||
return text_model
|
||||
|
||||
def convert_ckpt_to_diffuser(checkpoint_path:str,
|
||||
dump_path:str,
|
||||
def load_pipeline_from_original_stable_diffusion_ckpt(
|
||||
checkpoint_path:str,
|
||||
original_config_file:str=None,
|
||||
num_in_channels:int=None,
|
||||
scheduler_type:str='pndm',
|
||||
pipeline_type:str=None,
|
||||
image_size:int=None,
|
||||
prediction_type:str=None,
|
||||
extract_ema:bool=False,
|
||||
extract_ema:bool=True,
|
||||
upcast_attn:bool=False,
|
||||
):
|
||||
vae:AutoencoderKL=None
|
||||
)->StableDiffusionGeneratorPipeline:
|
||||
'''
|
||||
Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
|
||||
config file.
|
||||
|
||||
Although many of the arguments can be automatically inferred, some of these rely on brittle checks against the
|
||||
global step count, which will likely fail for models that have undergone further fine-tuning. Therefore, it is
|
||||
recommended that you override the default values and/or supply an `original_config_file` wherever possible.
|
||||
|
||||
:param checkpoint_path: Path to `.ckpt` file.
|
||||
:param original_config_file: Path to `.yaml` config file corresponding to the original architecture.
|
||||
If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models.
|
||||
:param image_size: The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2
|
||||
Base. Use 768 for Stable Diffusion v2.
|
||||
:param prediction_type: The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion
|
||||
v1.X and Stable Diffusion v2 Base. Use `'v-prediction'` for Stable Diffusion v2.
|
||||
:param num_in_channels: The number of input channels. If `None` number of input channels will be automatically
|
||||
inferred.
|
||||
:param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler",
|
||||
"euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of
|
||||
`["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder", "PaintByExample"]`. :param extract_ema: Only relevant for
|
||||
checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights
|
||||
or not. Defaults to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher
|
||||
quality images for inference. Non-EMA weights are usually better to continue fine-tuning.
|
||||
:param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
|
||||
running stable diffusion 2.1.
|
||||
'''
|
||||
|
||||
checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
|
||||
cache_dir = global_cache_dir('hub')
|
||||
@ -803,7 +831,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
|
||||
if "global_step" in checkpoint:
|
||||
global_step = checkpoint["global_step"]
|
||||
else:
|
||||
print("global_step key not found in model")
|
||||
print(" | global_step key not found in model")
|
||||
global_step = None
|
||||
|
||||
# sometimes there is a state_dict key and sometimes not
|
||||
@ -893,12 +921,16 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
|
||||
|
||||
unet.load_state_dict(converted_unet_checkpoint)
|
||||
|
||||
# Convert the VAE model.
|
||||
# Convert the VAE model, or use the one passed
|
||||
if not vae:
|
||||
print(f' | Using checkpoint model\'s original VAE')
|
||||
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
|
||||
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
|
||||
|
||||
vae = AutoencoderKL(**vae_config)
|
||||
vae.load_state_dict(converted_vae_checkpoint)
|
||||
else:
|
||||
print(f' | Using external VAE specified in config')
|
||||
|
||||
# Convert the text model.
|
||||
model_type = pipeline_type
|
||||
@ -907,8 +939,11 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
|
||||
|
||||
if model_type == "FrozenOpenCLIPEmbedder":
|
||||
text_model = convert_open_clip_checkpoint(checkpoint)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer",cache_dir=global_cache_dir('diffusers'))
|
||||
pipe = StableDiffusionPipeline(
|
||||
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2",
|
||||
subfolder="tokenizer",
|
||||
cache_dir=global_cache_dir('diffusers')
|
||||
)
|
||||
pipe = StableDiffusionGeneratorPipeline(
|
||||
vae=vae,
|
||||
text_encoder=text_model,
|
||||
tokenizer=tokenizer,
|
||||
@ -933,15 +968,14 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
|
||||
elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
|
||||
text_model = convert_ldm_clip_checkpoint(checkpoint)
|
||||
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
|
||||
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
|
||||
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
|
||||
pipe = StableDiffusionPipeline(
|
||||
pipe = StableDiffusionGeneratorPipeline(
|
||||
vae=vae,
|
||||
text_encoder=text_model,
|
||||
tokenizer=tokenizer,
|
||||
unet=unet,
|
||||
scheduler=scheduler,
|
||||
safety_checker=safety_checker,
|
||||
safety_checker=None,
|
||||
feature_extractor=feature_extractor,
|
||||
)
|
||||
else:
|
||||
@ -950,7 +984,23 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
|
||||
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
|
||||
pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
|
||||
|
||||
return pipe
|
||||
|
||||
def convert_ckpt_to_diffuser(
|
||||
checkpoint_path:Union[str,Path],
|
||||
dump_path:Union[str,Path],
|
||||
**kwargs,
|
||||
):
|
||||
'''
|
||||
Takes all the arguments of load_pipeline_from_original_stable_diffusion_ckpt(),
|
||||
and in addition a path-like object indicating the location of the desired diffusers
|
||||
model to be written.
|
||||
'''
|
||||
pipe = load_pipeline_from_original_stable_diffusion_ckpt(
|
||||
checkpoint_path,
|
||||
**kwargs
|
||||
)
|
||||
pipe.save_pretrained(
|
||||
dump_path,
|
||||
safe_serialization=1,
|
||||
safe_serialization=is_safetensors_available(),
|
||||
)
|
||||
|
@ -51,6 +51,9 @@ Globals.disable_xformers = False
|
||||
# whether we are forcing full precision
|
||||
Globals.full_precision = False
|
||||
|
||||
# whether we should convert ckpt files into diffusers models on the fly
|
||||
Globals.ckpt_convert = False
|
||||
|
||||
def global_config_file()->Path:
|
||||
return Path(Globals.root, Globals.config_dir, Globals.models_file)
|
||||
|
||||
|
@ -150,6 +150,10 @@ class ModelManager(object):
|
||||
'''
|
||||
Return true if this is a legacy (.ckpt) model
|
||||
'''
|
||||
# if we are converting legacy files automatically, then
|
||||
# there are no legacy ckpts!
|
||||
if Globals.ckpt_convert:
|
||||
return False
|
||||
info = self.model_info(model_name)
|
||||
if 'weights' in info and info['weights'].endswith(('.ckpt','.safetensors')):
|
||||
return True
|
||||
@ -340,6 +344,26 @@ class ModelManager(object):
|
||||
config = os.path.join(Globals.root,config)
|
||||
if not os.path.isabs(weights):
|
||||
weights = os.path.normpath(os.path.join(Globals.root,weights))
|
||||
|
||||
# if converting automatically to diffusers, then we do the conversion and return
|
||||
# a diffusers pipeline
|
||||
if Globals.ckpt_convert:
|
||||
print(f'>> Converting legacy checkpoint {model_name} into a diffusers model...')
|
||||
from ldm.invoke.ckpt_to_diffuser import load_pipeline_from_original_stable_diffusion_ckpt
|
||||
if vae_config := self._choose_diffusers_vae(model_name):
|
||||
vae = self._load_vae(vae_config)
|
||||
pipeline = load_pipeline_from_original_stable_diffusion_ckpt(
|
||||
checkpoint_path = weights,
|
||||
original_config_file = config,
|
||||
vae = vae,
|
||||
)
|
||||
return (
|
||||
pipeline.to(self.device).to(torch.float16 if self.precision == 'float16' else torch.float32),
|
||||
width,
|
||||
height,
|
||||
'NOHASH'
|
||||
)
|
||||
|
||||
# scan model
|
||||
self.scan_model(model_name, weights)
|
||||
|
||||
@ -484,7 +508,7 @@ class ModelManager(object):
|
||||
return pipeline, width, height, model_hash
|
||||
|
||||
def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path:
|
||||
if isinstance(model_name,DictConfig):
|
||||
if isinstance(model_name,DictConfig) or isinstance(model_name,dict):
|
||||
mconfig = model_name
|
||||
elif model_name in self.config:
|
||||
mconfig = self.config[model_name]
|
||||
@ -664,6 +688,7 @@ class ModelManager(object):
|
||||
diffusers_path:Path,
|
||||
model_name=None,
|
||||
model_description=None,
|
||||
vae= None,
|
||||
commit_to_conf:Path=None,
|
||||
)->dict:
|
||||
'''
|
||||
@ -681,38 +706,23 @@ class ModelManager(object):
|
||||
model_description = model_description or 'Optimized version of {model_name}'
|
||||
print(f'>> Optimizing {model_name} (30-60s)')
|
||||
try:
|
||||
verbosity =transformers.logging.get_verbosity()
|
||||
transformers.logging.set_verbosity_error()
|
||||
convert_ckpt_to_diffuser(ckpt_path, diffusers_path,extract_ema=True)
|
||||
transformers.logging.set_verbosity(verbosity)
|
||||
print(f'>> Success. Optimized model is now located at {str(diffusers_path)}')
|
||||
print(f'>> Writing new config file entry for {model_name}')
|
||||
# By passing the specified VAE too the conversion function, the autoencoder
|
||||
# will be built into the model rather than tacked on afterward via the config file
|
||||
vae_model = self._load_vae(vae) if vae else None
|
||||
convert_ckpt_to_diffuser(
|
||||
ckpt_path,
|
||||
diffusers_path,
|
||||
extract_ema = True,
|
||||
vae = vae_model,
|
||||
)
|
||||
print(f' | Success. Optimized model is now located at {str(diffusers_path)}')
|
||||
print(f' | Writing new config file entry for {model_name}')
|
||||
new_config = dict(
|
||||
path=str(diffusers_path),
|
||||
description=model_description,
|
||||
format='diffusers',
|
||||
)
|
||||
|
||||
# HACK (LS): in the event that the original entry is using a custom ckpt VAE, we try to
|
||||
# map that VAE onto a diffuser VAE using a hard-coded dictionary.
|
||||
# I would prefer to do this differently: We load the ckpt model into memory, swap the
|
||||
# VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
|
||||
# VAE is built into the model. However, when I tried this I got obscure key errors.
|
||||
if model_name in self.config and (vae_ckpt_path := self.model_info(model_name)['vae']):
|
||||
vae_basename = Path(vae_ckpt_path).stem
|
||||
diffusers_vae = None
|
||||
if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
|
||||
print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
|
||||
new_config.update(
|
||||
vae = {'repo_id': diffusers_vae}
|
||||
)
|
||||
else:
|
||||
print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
|
||||
print(f'** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
|
||||
new_config.update(
|
||||
vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
|
||||
)
|
||||
|
||||
if model_name in self.config:
|
||||
self.del_model(model_name)
|
||||
self.add_model(model_name, new_config, True)
|
||||
if commit_to_conf:
|
||||
@ -742,6 +752,27 @@ class ModelManager(object):
|
||||
|
||||
return search_folder, found_models
|
||||
|
||||
def _choose_diffusers_vae(self, model_name:str, vae:str=None)->Union[dict,str]:
|
||||
|
||||
# In the event that the original entry is using a custom ckpt VAE, we try to
|
||||
# map that VAE onto a diffuser VAE using a hard-coded dictionary.
|
||||
# I would prefer to do this differently: We load the ckpt model into memory, swap the
|
||||
# VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
|
||||
# VAE is built into the model. However, when I tried this I got obscure key errors.
|
||||
if vae:
|
||||
return vae
|
||||
if model_name in self.config and (vae_ckpt_path := self.model_info(model_name).get('vae',None)):
|
||||
vae_basename = Path(vae_ckpt_path).stem
|
||||
diffusers_vae = None
|
||||
if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
|
||||
print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
|
||||
vae = {'repo_id': diffusers_vae}
|
||||
else:
|
||||
print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
|
||||
print('** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
|
||||
vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
|
||||
return vae
|
||||
|
||||
def _make_cache_room(self) -> None:
|
||||
num_loaded_models = len(self.models)
|
||||
if num_loaded_models >= self.max_loaded_models:
|
||||
@ -976,7 +1007,7 @@ class ModelManager(object):
|
||||
f.write(hash)
|
||||
return hash
|
||||
|
||||
def _load_vae(self, vae_config):
|
||||
def _load_vae(self, vae_config)->AutoencoderKL:
|
||||
vae_args = {}
|
||||
name_or_path = self.model_name_or_path(vae_config)
|
||||
using_fp16 = self.precision == 'float16'
|
||||
|
Loading…
Reference in New Issue
Block a user