Merge branch 'main' into lstein-import-safetensors

This commit is contained in:
Lincoln Stein
2023-01-23 21:58:07 -05:00
committed by GitHub
59 changed files with 2415 additions and 745 deletions

View File

@ -45,6 +45,7 @@ def main():
Globals.try_patchmatch = args.patchmatch
Globals.always_use_cpu = args.always_use_cpu
Globals.internet_available = args.internet_available and check_internet()
Globals.disable_xformers = not args.xformers
print(f'>> Internet connectivity is {Globals.internet_available}')
if not args.conf:
@ -124,7 +125,7 @@ def main():
# preload the model
try:
gen.load_model()
except KeyError as e:
except KeyError:
pass
except Exception as e:
report_model_error(opt, e)
@ -731,11 +732,6 @@ def del_config(model_name:str, gen, opt, completer):
completer.update_models(gen.model_manager.list_models())
def edit_model(model_name:str, gen, opt, completer):
current_model = gen.model_name
# if model_name == current_model:
# print("** Can't edit the active model. !switch to another model first. **")
# return
manager = gen.model_manager
if not (info := manager.model_info(model_name)):
print(f'** Unknown model {model_name}')
@ -887,7 +883,7 @@ def prepare_image_metadata(
try:
filename = opt.fnformat.format(**wildcards)
except KeyError as e:
print(f'** The filename format contains an unknown key \'{e.args[0]}\'. Will use \'{{prefix}}.{{seed}}.png\' instead')
print(f'** The filename format contains an unknown key \'{e.args[0]}\'. Will use {{prefix}}.{{seed}}.png\' instead')
filename = f'{prefix}.{seed}.png'
except IndexError:
print(f'** The filename format is broken or complete. Will use \'{{prefix}}.{{seed}}.png\' instead')

View File

@ -482,6 +482,12 @@ class Args(object):
action='store_true',
help='Force free gpu memory before final decoding',
)
model_group.add_argument(
'--xformers',
action=argparse.BooleanOptionalAction,
default=True,
help='Enable/disable xformers support (default enabled if installed)',
)
model_group.add_argument(
"--always_use_cpu",
dest="always_use_cpu",

View File

@ -56,9 +56,11 @@ class CkptGenerator():
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
safety_checker:dict=None,
attention_maps_callback = None,
free_gpu_mem: bool=False,
**kwargs):
scope = choose_autocast(self.precision)
self.safety_checker = safety_checker
self.free_gpu_mem = free_gpu_mem
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image())
make_image = self.get_make_image(

View File

@ -21,7 +21,7 @@ import os
import re
import torch
from pathlib import Path
from ldm.invoke.globals import Globals
from ldm.invoke.globals import Globals, global_cache_dir
from safetensors.torch import load_file
try:
@ -637,7 +637,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
def convert_ldm_clip_checkpoint(checkpoint):
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14",cache_dir=global_cache_dir('hub'))
keys = list(checkpoint.keys())
@ -677,7 +677,8 @@ textenc_pattern = re.compile("|".join(protected.keys()))
def convert_paint_by_example_checkpoint(checkpoint):
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14")
cache_dir = global_cache_dir('hub')
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
model = PaintByExampleImageEncoder(config)
keys = list(checkpoint.keys())
@ -744,7 +745,8 @@ def convert_paint_by_example_checkpoint(checkpoint):
def convert_open_clip_checkpoint(checkpoint):
text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
cache_dir=global_cache_dir('hub')
text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder", cache_dir=cache_dir)
keys = list(checkpoint.keys())
@ -795,6 +797,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
):
checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
cache_dir = global_cache_dir('hub')
# Sometimes models don't have the global_step item
if "global_step" in checkpoint:
@ -904,7 +907,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
if model_type == "FrozenOpenCLIPEmbedder":
text_model = convert_open_clip_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer",cache_dir=global_cache_dir('diffusers'))
pipe = StableDiffusionPipeline(
vae=vae,
text_encoder=text_model,
@ -917,8 +920,8 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
)
elif model_type == "PaintByExample":
vision_model = convert_paint_by_example_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
pipe = PaintByExamplePipeline(
vae=vae,
image_encoder=vision_model,
@ -929,9 +932,9 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
)
elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
text_model = convert_ldm_clip_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker")
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
pipe = StableDiffusionPipeline(
vae=vae,
text_encoder=text_model,
@ -944,7 +947,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
else:
text_config = create_ldm_bert_config(original_config)
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
pipe.save_pretrained(

View File

@ -62,9 +62,11 @@ class Generator:
def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None,
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
safety_checker:dict=None,
free_gpu_mem: bool=False,
**kwargs):
scope = nullcontext
self.safety_checker = safety_checker
self.free_gpu_mem = free_gpu_mem
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image())
make_image = self.get_make_image(

View File

@ -39,6 +39,7 @@ from diffusers.utils.outputs import BaseOutput
from torchvision.transforms.functional import resize as tv_resize
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from ldm.invoke.globals import Globals
from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent, ThresholdSettings
from ldm.modules.textual_inversion_manager import TextualInversionManager
@ -306,7 +307,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
textual_inversion_manager=self.textual_inversion_manager
)
if is_xformers_available():
if is_xformers_available() and not Globals.disable_xformers:
self.enable_xformers_memory_efficient_attention()
def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int,

View File

@ -3,6 +3,7 @@ ldm.invoke.generator.txt2img inherits from ldm.invoke.generator
'''
import math
from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error
from typing import Callable, Optional
import torch
@ -66,6 +67,8 @@ class Txt2Img2Img(Generator):
second_pass_noise = self.get_noise_like(resized_latents)
verbosity = get_verbosity()
set_verbosity_error()
pipeline_output = pipeline.img2img_from_latents_and_embeddings(
resized_latents,
num_inference_steps=steps,
@ -73,6 +76,7 @@ class Txt2Img2Img(Generator):
strength=strength,
noise=second_pass_noise,
callback=step_callback)
set_verbosity(verbosity)
return pipeline.numpy_to_pil(pipeline_output.images)[0]

View File

@ -29,6 +29,7 @@ else:
# Where to look for the initialization file
Globals.initfile = 'invokeai.init'
Globals.models_file = 'models.yaml'
Globals.models_dir = 'models'
Globals.config_dir = 'configs'
Globals.autoscan_dir = 'weights'
@ -43,9 +44,15 @@ Globals.always_use_cpu = False
# The CLI will test connectivity at startup time.
Globals.internet_available = True
# Whether to disable xformers
Globals.disable_xformers = False
# whether we are forcing full precision
Globals.full_precision = False
def global_config_file()->Path:
return Path(Globals.root, Globals.config_dir, Globals.models_file)
def global_config_dir()->Path:
return Path(Globals.root, Globals.config_dir)

View File

@ -0,0 +1,62 @@
'''
ldm.invoke.merge_diffusers exports a single function call merge_diffusion_models()
used to merge 2-3 models together and create a new InvokeAI-registered diffusion model.
'''
import os
from typing import List
from diffusers import DiffusionPipeline
from ldm.invoke.globals import global_config_file, global_models_dir, global_cache_dir
from ldm.invoke.model_manager import ModelManager
from omegaconf import OmegaConf
def merge_diffusion_models(models:List['str'],
merged_model_name:str,
alpha:float=0.5,
interp:str=None,
force:bool=False,
**kwargs):
'''
models - up to three models, designated by their InvokeAI models.yaml model name
merged_model_name = name for new model
alpha - The interpolation parameter. Ranges from 0 to 1. It affects the ratio in which the checkpoints are merged. A 0.8 alpha
would mean that the first model checkpoints would affect the final result far less than an alpha of 0.2
interp - The interpolation method to use for the merging. Supports "sigmoid", "inv_sigmoid", "add_difference" and None.
Passing None uses the default interpolation which is weighted sum interpolation. For merging three checkpoints, only "add_difference" is supported.
force - Whether to ignore mismatch in model_config.json for the current models. Defaults to False.
**kwargs - the default DiffusionPipeline.get_config_dict kwargs:
cache_dir, resume_download, force_download, proxies, local_files_only, use_auth_token, revision, torch_dtype, device_map
'''
config_file = global_config_file()
model_manager = ModelManager(OmegaConf.load(config_file))
for mod in models:
assert (mod in model_manager.model_names()), f'** Unknown model "{mod}"'
assert (model_manager.model_info(mod).get('format',None) == 'diffusers'), f'** {mod} is not a diffusers model. It must be optimized before merging.'
model_ids_or_paths = [model_manager.model_name_or_path(x) for x in models]
pipe = DiffusionPipeline.from_pretrained(model_ids_or_paths[0],
cache_dir=kwargs.get('cache_dir',global_cache_dir()),
custom_pipeline='checkpoint_merger')
merged_pipe = pipe.merge(pretrained_model_name_or_path_list=model_ids_or_paths,
alpha=alpha,
interp=interp,
force=force,
**kwargs)
dump_path = global_models_dir() / 'merged_diffusers'
os.makedirs(dump_path,exist_ok=True)
dump_path = dump_path / merged_model_name
merged_pipe.save_pretrained (
dump_path,
safe_serialization=1
)
model_manager.import_diffuser_model(
dump_path,
model_name = merged_model_name,
description = f'Merge of models {", ".join(models)}'
)
print('REMINDER: When PR 2369 is merged, replace merge_diffusers.py line 56 with vae= argument to impormodel()')
if vae := model_manager.config[models[0]].get('vae',None):
print(f'>> Using configured VAE assigned to {models[0]}')
model_manager.config[merged_model_name]['vae'] = vae
model_manager.commit(config_file)

View File

@ -25,6 +25,7 @@ import torch
import safetensors
import transformers
from diffusers import AutoencoderKL, logging as dlogging
from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error
from omegaconf import OmegaConf
from omegaconf.dictconfig import DictConfig
from picklescan.scanner import scan_file_path
@ -36,7 +37,11 @@ from ldm.util import instantiate_from_config, ask_user
DEFAULT_MAX_MODELS=2
class ModelManager(object):
def __init__(self, config:OmegaConf, device_type:str, precision:str, max_loaded_models=DEFAULT_MAX_MODELS):
def __init__(self,
config:OmegaConf,
device_type:str='cpu',
precision:str='float16',
max_loaded_models=DEFAULT_MAX_MODELS):
'''
Initialize with the path to the models.yaml config file,
the torch device type, and precision. The optional
@ -541,7 +546,7 @@ class ModelManager(object):
format='diffusers',
)
if isinstance(repo_or_path,Path) and repo_or_path.exists():
new_config.update(path=repo_or_path)
new_config.update(path=str(repo_or_path))
else:
new_config.update(repo_id=repo_or_path)
@ -833,11 +838,11 @@ class ModelManager(object):
return model
# diffusers really really doesn't like us moving a float16 model onto CPU
import logging
logging.getLogger('diffusers.pipeline_utils').setLevel(logging.CRITICAL)
verbosity = get_verbosity()
set_verbosity_error()
model.cond_stage_model.device = 'cpu'
model.to('cpu')
logging.getLogger('pipeline_utils').setLevel(logging.INFO)
set_verbosity(verbosity)
for submodel in ('first_stage_model','cond_stage_model','model'):
try: