Merge branch 'main' into dev/installer

This commit is contained in:
Lincoln Stein 2023-02-02 15:17:40 -05:00 committed by GitHub
commit 2202288eb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 60 deletions

View File

@ -44,11 +44,13 @@ def main():
print('--max_loaded_models must be >= 1; using 1')
args.max_loaded_models = 1
# alert - setting a global here
# alert - setting a few globals here
Globals.try_patchmatch = args.patchmatch
Globals.always_use_cpu = args.always_use_cpu
Globals.internet_available = args.internet_available and check_internet()
Globals.disable_xformers = not args.xformers
Globals.ckpt_convert = args.ckpt_convert
print(f'>> Internet connectivity is {Globals.internet_available}')
if not args.conf:
@ -717,11 +719,16 @@ def optimize_model(model_name_or_path:str, gen, opt, completer):
print(f'** {model_name_or_path} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.')
return
vae = None
if input('Replace this model\'s VAE with "stabilityai/sd-vae-ft-mse"? [n] ').strip() in ('y','Y'):
vae = dict(repo_id='stabilityai/sd-vae-ft-mse')
new_config = gen.model_manager.convert_and_import(
ckpt_path,
diffuser_path,
model_name=model_name,
model_description=model_description,
vae = vae,
commit_to_conf=opt.conf,
)
if not new_config:

View File

@ -503,6 +503,13 @@ class Args(object):
help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}',
default='auto',
)
model_group.add_argument(
'--ckpt_convert',
action=argparse.BooleanOptionalAction,
dest='ckpt_convert',
default=False,
help='Load legacy ckpt files as diffusers. Pass --no-ckpt-convert to inhibit this behavior',
)
model_group.add_argument(
'--internet',
action=argparse.BooleanOptionalAction,

View File

@ -23,6 +23,7 @@ import torch
from pathlib import Path
from ldm.invoke.globals import Globals, global_cache_dir
from safetensors.torch import load_file
from typing import Union
try:
from omegaconf import OmegaConf
@ -46,9 +47,11 @@ from diffusers import (
)
from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from diffusers.utils import is_safetensors_available
from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextModel, CLIPTokenizer, CLIPVisionConfig
from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline
def shave_segments(path, n_shave_prefix_segments=1):
"""
Removes segments. Positive values shave the first segments, negative shave the last segments.
@ -318,11 +321,10 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False
unet_key = "model.diffusion_model."
# at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
if sum(k.startswith("model_ema") for k in keys) > 100:
print(f"Checkpoint {path} has both EMA and non-EMA weights.")
print(f" | Checkpoint {path} has both EMA and non-EMA weights.")
if extract_ema:
print(
"In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
" weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
' | Extracting EMA weights (usually better for inference)'
)
for key in keys:
if key.startswith("model.diffusion_model"):
@ -330,8 +332,7 @@ def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False
unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
else:
print(
"In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
" weights (usually better for inference), please make sure to add the `--extract_ema` flag."
' | Extracting only the non-EMA weights (usually better for fine-tuning)'
)
for key in keys:
@ -784,17 +785,44 @@ def convert_open_clip_checkpoint(checkpoint):
return text_model
def convert_ckpt_to_diffuser(checkpoint_path:str,
dump_path:str,
original_config_file:str=None,
num_in_channels:int=None,
scheduler_type:str='pndm',
pipeline_type:str=None,
image_size:int=None,
prediction_type:str=None,
extract_ema:bool=False,
upcast_attn:bool=False,
):
def load_pipeline_from_original_stable_diffusion_ckpt(
checkpoint_path:str,
original_config_file:str=None,
num_in_channels:int=None,
scheduler_type:str='pndm',
pipeline_type:str=None,
image_size:int=None,
prediction_type:str=None,
extract_ema:bool=True,
upcast_attn:bool=False,
vae:AutoencoderKL=None
)->StableDiffusionGeneratorPipeline:
'''
Load a Stable Diffusion pipeline object from a CompVis-style `.ckpt`/`.safetensors` file and (ideally) a `.yaml`
config file.
Although many of the arguments can be automatically inferred, some of these rely on brittle checks against the
global step count, which will likely fail for models that have undergone further fine-tuning. Therefore, it is
recommended that you override the default values and/or supply an `original_config_file` wherever possible.
:param checkpoint_path: Path to `.ckpt` file.
:param original_config_file: Path to `.yaml` config file corresponding to the original architecture.
If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models.
:param image_size: The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2
Base. Use 768 for Stable Diffusion v2.
:param prediction_type: The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion
v1.X and Stable Diffusion v2 Base. Use `'v-prediction'` for Stable Diffusion v2.
:param num_in_channels: The number of input channels. If `None` number of input channels will be automatically
inferred.
:param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler",
"euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of
`["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder", "PaintByExample"]`. :param extract_ema: Only relevant for
checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights
or not. Defaults to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher
quality images for inference. Non-EMA weights are usually better to continue fine-tuning.
:param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
running stable diffusion 2.1.
'''
checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
cache_dir = global_cache_dir('hub')
@ -803,7 +831,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
if "global_step" in checkpoint:
global_step = checkpoint["global_step"]
else:
print("global_step key not found in model")
print(" | global_step key not found in model")
global_step = None
# sometimes there is a state_dict key and sometimes not
@ -893,12 +921,16 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
unet.load_state_dict(converted_unet_checkpoint)
# Convert the VAE model.
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
# Convert the VAE model, or use the one passed
if not vae:
print(f' | Using checkpoint model\'s original VAE')
vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
vae = AutoencoderKL(**vae_config)
vae.load_state_dict(converted_vae_checkpoint)
vae = AutoencoderKL(**vae_config)
vae.load_state_dict(converted_vae_checkpoint)
else:
print(f' | Using external VAE specified in config')
# Convert the text model.
model_type = pipeline_type
@ -907,8 +939,11 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
if model_type == "FrozenOpenCLIPEmbedder":
text_model = convert_open_clip_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer",cache_dir=global_cache_dir('diffusers'))
pipe = StableDiffusionPipeline(
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2",
subfolder="tokenizer",
cache_dir=global_cache_dir('diffusers')
)
pipe = StableDiffusionGeneratorPipeline(
vae=vae,
text_encoder=text_model,
tokenizer=tokenizer,
@ -933,15 +968,14 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
text_model = convert_ldm_clip_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
pipe = StableDiffusionPipeline(
pipe = StableDiffusionGeneratorPipeline(
vae=vae,
text_encoder=text_model,
tokenizer=tokenizer,
unet=unet,
scheduler=scheduler,
safety_checker=safety_checker,
safety_checker=None,
feature_extractor=feature_extractor,
)
else:
@ -950,7 +984,23 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
return pipe
def convert_ckpt_to_diffuser(
checkpoint_path:Union[str,Path],
dump_path:Union[str,Path],
**kwargs,
):
'''
Takes all the arguments of load_pipeline_from_original_stable_diffusion_ckpt(),
and in addition a path-like object indicating the location of the desired diffusers
model to be written.
'''
pipe = load_pipeline_from_original_stable_diffusion_ckpt(
checkpoint_path,
**kwargs
)
pipe.save_pretrained(
dump_path,
safe_serialization=1,
safe_serialization=is_safetensors_available(),
)

View File

@ -51,6 +51,9 @@ Globals.disable_xformers = False
# whether we are forcing full precision
Globals.full_precision = False
# whether we should convert ckpt files into diffusers models on the fly
Globals.ckpt_convert = False
def global_config_file()->Path:
return Path(Globals.root, Globals.config_dir, Globals.models_file)

View File

@ -150,6 +150,10 @@ class ModelManager(object):
'''
Return true if this is a legacy (.ckpt) model
'''
# if we are converting legacy files automatically, then
# there are no legacy ckpts!
if Globals.ckpt_convert:
return False
info = self.model_info(model_name)
if 'weights' in info and info['weights'].endswith(('.ckpt','.safetensors')):
return True
@ -340,6 +344,26 @@ class ModelManager(object):
config = os.path.join(Globals.root,config)
if not os.path.isabs(weights):
weights = os.path.normpath(os.path.join(Globals.root,weights))
# if converting automatically to diffusers, then we do the conversion and return
# a diffusers pipeline
if Globals.ckpt_convert:
print(f'>> Converting legacy checkpoint {model_name} into a diffusers model...')
from ldm.invoke.ckpt_to_diffuser import load_pipeline_from_original_stable_diffusion_ckpt
if vae_config := self._choose_diffusers_vae(model_name):
vae = self._load_vae(vae_config)
pipeline = load_pipeline_from_original_stable_diffusion_ckpt(
checkpoint_path = weights,
original_config_file = config,
vae = vae,
)
return (
pipeline.to(self.device).to(torch.float16 if self.precision == 'float16' else torch.float32),
width,
height,
'NOHASH'
)
# scan model
self.scan_model(model_name, weights)
@ -484,7 +508,7 @@ class ModelManager(object):
return pipeline, width, height, model_hash
def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path:
if isinstance(model_name,DictConfig):
if isinstance(model_name,DictConfig) or isinstance(model_name,dict):
mconfig = model_name
elif model_name in self.config:
mconfig = self.config[model_name]
@ -664,6 +688,7 @@ class ModelManager(object):
diffusers_path:Path,
model_name=None,
model_description=None,
vae= None,
commit_to_conf:Path=None,
)->dict:
'''
@ -681,39 +706,24 @@ class ModelManager(object):
model_description = model_description or 'Optimized version of {model_name}'
print(f'>> Optimizing {model_name} (30-60s)')
try:
verbosity =transformers.logging.get_verbosity()
transformers.logging.set_verbosity_error()
convert_ckpt_to_diffuser(ckpt_path, diffusers_path,extract_ema=True)
transformers.logging.set_verbosity(verbosity)
print(f'>> Success. Optimized model is now located at {str(diffusers_path)}')
print(f'>> Writing new config file entry for {model_name}')
# By passing the specified VAE too the conversion function, the autoencoder
# will be built into the model rather than tacked on afterward via the config file
vae_model = self._load_vae(vae) if vae else None
convert_ckpt_to_diffuser(
ckpt_path,
diffusers_path,
extract_ema = True,
vae = vae_model,
)
print(f' | Success. Optimized model is now located at {str(diffusers_path)}')
print(f' | Writing new config file entry for {model_name}')
new_config = dict(
path=str(diffusers_path),
description=model_description,
format='diffusers',
)
# HACK (LS): in the event that the original entry is using a custom ckpt VAE, we try to
# map that VAE onto a diffuser VAE using a hard-coded dictionary.
# I would prefer to do this differently: We load the ckpt model into memory, swap the
# VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
# VAE is built into the model. However, when I tried this I got obscure key errors.
if model_name in self.config and (vae_ckpt_path := self.model_info(model_name)['vae']):
vae_basename = Path(vae_ckpt_path).stem
diffusers_vae = None
if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
new_config.update(
vae = {'repo_id': diffusers_vae}
)
else:
print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
print(f'** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
new_config.update(
vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
)
self.del_model(model_name)
if model_name in self.config:
self.del_model(model_name)
self.add_model(model_name, new_config, True)
if commit_to_conf:
self.commit(commit_to_conf)
@ -742,6 +752,27 @@ class ModelManager(object):
return search_folder, found_models
def _choose_diffusers_vae(self, model_name:str, vae:str=None)->Union[dict,str]:
# In the event that the original entry is using a custom ckpt VAE, we try to
# map that VAE onto a diffuser VAE using a hard-coded dictionary.
# I would prefer to do this differently: We load the ckpt model into memory, swap the
# VAE in memory, and then pass that to convert_ckpt_to_diffuser() so that the swapped
# VAE is built into the model. However, when I tried this I got obscure key errors.
if vae:
return vae
if model_name in self.config and (vae_ckpt_path := self.model_info(model_name).get('vae',None)):
vae_basename = Path(vae_ckpt_path).stem
diffusers_vae = None
if (diffusers_vae := VAE_TO_REPO_ID.get(vae_basename,None)):
print(f'>> {vae_basename} VAE corresponds to known {diffusers_vae} diffusers version')
vae = {'repo_id': diffusers_vae}
else:
print(f'** Custom VAE "{vae_basename}" found, but corresponding diffusers model unknown')
print('** Using "stabilityai/sd-vae-ft-mse"; If this isn\'t right, please edit the model config')
vae = {'repo_id': 'stabilityai/sd-vae-ft-mse'}
return vae
def _make_cache_room(self) -> None:
num_loaded_models = len(self.models)
if num_loaded_models >= self.max_loaded_models:
@ -976,7 +1007,7 @@ class ModelManager(object):
f.write(hash)
return hash
def _load_vae(self, vae_config):
def _load_vae(self, vae_config)->AutoencoderKL:
vae_args = {}
name_or_path = self.model_name_or_path(vae_config)
using_fp16 = self.precision == 'float16'