Merge branch 'main' into lstein-import-safetensors

This commit is contained in:
Lincoln Stein
2023-01-23 21:58:07 -05:00
committed by GitHub
59 changed files with 2415 additions and 745 deletions

View File

@ -146,7 +146,7 @@ class Generate:
gfpgan=None,
codeformer=None,
esrgan=None,
free_gpu_mem=False,
free_gpu_mem: bool=False,
safety_checker:bool=False,
max_loaded_models:int=2,
# these are deprecated; if present they override values in the conf file
@ -460,10 +460,13 @@ class Generate:
init_image = None
mask_image = None
if self.free_gpu_mem and self.model.cond_stage_model.device != self.model.device:
self.model.cond_stage_model.device = self.model.device
self.model.cond_stage_model.to(self.model.device)
try:
if self.free_gpu_mem and self.model.cond_stage_model.device != self.model.device:
self.model.cond_stage_model.device = self.model.device
self.model.cond_stage_model.to(self.model.device)
except AttributeError:
print(">> Warning: '--free_gpu_mem' is not yet supported when generating image using model based on HuggingFace Diffuser.")
pass
try:
uc, c, extra_conditioning_info = get_uc_and_c_and_ec(
@ -531,6 +534,7 @@ class Generate:
inpaint_height = inpaint_height,
inpaint_width = inpaint_width,
enable_image_debugging = enable_image_debugging,
free_gpu_mem=self.free_gpu_mem,
)
if init_color:

View File

@ -45,6 +45,7 @@ def main():
Globals.try_patchmatch = args.patchmatch
Globals.always_use_cpu = args.always_use_cpu
Globals.internet_available = args.internet_available and check_internet()
Globals.disable_xformers = not args.xformers
print(f'>> Internet connectivity is {Globals.internet_available}')
if not args.conf:
@ -124,7 +125,7 @@ def main():
# preload the model
try:
gen.load_model()
except KeyError as e:
except KeyError:
pass
except Exception as e:
report_model_error(opt, e)
@ -731,11 +732,6 @@ def del_config(model_name:str, gen, opt, completer):
completer.update_models(gen.model_manager.list_models())
def edit_model(model_name:str, gen, opt, completer):
current_model = gen.model_name
# if model_name == current_model:
# print("** Can't edit the active model. !switch to another model first. **")
# return
manager = gen.model_manager
if not (info := manager.model_info(model_name)):
print(f'** Unknown model {model_name}')
@ -887,7 +883,7 @@ def prepare_image_metadata(
try:
filename = opt.fnformat.format(**wildcards)
except KeyError as e:
print(f'** The filename format contains an unknown key \'{e.args[0]}\'. Will use \'{{prefix}}.{{seed}}.png\' instead')
print(f'** The filename format contains an unknown key \'{e.args[0]}\'. Will use {{prefix}}.{{seed}}.png\' instead')
filename = f'{prefix}.{seed}.png'
except IndexError:
print(f'** The filename format is broken or complete. Will use \'{{prefix}}.{{seed}}.png\' instead')

View File

@ -482,6 +482,12 @@ class Args(object):
action='store_true',
help='Force free gpu memory before final decoding',
)
model_group.add_argument(
'--xformers',
action=argparse.BooleanOptionalAction,
default=True,
help='Enable/disable xformers support (default enabled if installed)',
)
model_group.add_argument(
"--always_use_cpu",
dest="always_use_cpu",

View File

@ -56,9 +56,11 @@ class CkptGenerator():
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
safety_checker:dict=None,
attention_maps_callback = None,
free_gpu_mem: bool=False,
**kwargs):
scope = choose_autocast(self.precision)
self.safety_checker = safety_checker
self.free_gpu_mem = free_gpu_mem
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image())
make_image = self.get_make_image(

View File

@ -21,7 +21,7 @@ import os
import re
import torch
from pathlib import Path
from ldm.invoke.globals import Globals
from ldm.invoke.globals import Globals, global_cache_dir
from safetensors.torch import load_file
try:
@ -637,7 +637,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
def convert_ldm_clip_checkpoint(checkpoint):
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14",cache_dir=global_cache_dir('hub'))
keys = list(checkpoint.keys())
@ -677,7 +677,8 @@ textenc_pattern = re.compile("|".join(protected.keys()))
def convert_paint_by_example_checkpoint(checkpoint):
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14")
cache_dir = global_cache_dir('hub')
config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
model = PaintByExampleImageEncoder(config)
keys = list(checkpoint.keys())
@ -744,7 +745,8 @@ def convert_paint_by_example_checkpoint(checkpoint):
def convert_open_clip_checkpoint(checkpoint):
text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
cache_dir=global_cache_dir('hub')
text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder", cache_dir=cache_dir)
keys = list(checkpoint.keys())
@ -795,6 +797,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
):
checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path)
cache_dir = global_cache_dir('hub')
# Sometimes models don't have the global_step item
if "global_step" in checkpoint:
@ -904,7 +907,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
if model_type == "FrozenOpenCLIPEmbedder":
text_model = convert_open_clip_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer",cache_dir=global_cache_dir('diffusers'))
pipe = StableDiffusionPipeline(
vae=vae,
text_encoder=text_model,
@ -917,8 +920,8 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
)
elif model_type == "PaintByExample":
vision_model = convert_paint_by_example_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
pipe = PaintByExamplePipeline(
vae=vae,
image_encoder=vision_model,
@ -929,9 +932,9 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
)
elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']:
text_model = convert_ldm_clip_checkpoint(checkpoint)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker")
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker")
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14",cache_dir=cache_dir)
safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker",cache_dir=cache_dir)
pipe = StableDiffusionPipeline(
vae=vae,
text_encoder=text_model,
@ -944,7 +947,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str,
else:
text_config = create_ldm_bert_config(original_config)
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased",cache_dir=cache_dir)
pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler)
pipe.save_pretrained(

View File

@ -62,9 +62,11 @@ class Generator:
def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None,
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
safety_checker:dict=None,
free_gpu_mem: bool=False,
**kwargs):
scope = nullcontext
self.safety_checker = safety_checker
self.free_gpu_mem = free_gpu_mem
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image())
make_image = self.get_make_image(

View File

@ -39,6 +39,7 @@ from diffusers.utils.outputs import BaseOutput
from torchvision.transforms.functional import resize as tv_resize
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from ldm.invoke.globals import Globals
from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent, ThresholdSettings
from ldm.modules.textual_inversion_manager import TextualInversionManager
@ -306,7 +307,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
textual_inversion_manager=self.textual_inversion_manager
)
if is_xformers_available():
if is_xformers_available() and not Globals.disable_xformers:
self.enable_xformers_memory_efficient_attention()
def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int,

View File

@ -3,6 +3,7 @@ ldm.invoke.generator.txt2img inherits from ldm.invoke.generator
'''
import math
from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error
from typing import Callable, Optional
import torch
@ -66,6 +67,8 @@ class Txt2Img2Img(Generator):
second_pass_noise = self.get_noise_like(resized_latents)
verbosity = get_verbosity()
set_verbosity_error()
pipeline_output = pipeline.img2img_from_latents_and_embeddings(
resized_latents,
num_inference_steps=steps,
@ -73,6 +76,7 @@ class Txt2Img2Img(Generator):
strength=strength,
noise=second_pass_noise,
callback=step_callback)
set_verbosity(verbosity)
return pipeline.numpy_to_pil(pipeline_output.images)[0]

View File

@ -29,6 +29,7 @@ else:
# Where to look for the initialization file
Globals.initfile = 'invokeai.init'
Globals.models_file = 'models.yaml'
Globals.models_dir = 'models'
Globals.config_dir = 'configs'
Globals.autoscan_dir = 'weights'
@ -43,9 +44,15 @@ Globals.always_use_cpu = False
# The CLI will test connectivity at startup time.
Globals.internet_available = True
# Whether to disable xformers
Globals.disable_xformers = False
# whether we are forcing full precision
Globals.full_precision = False
def global_config_file()->Path:
return Path(Globals.root, Globals.config_dir, Globals.models_file)
def global_config_dir()->Path:
return Path(Globals.root, Globals.config_dir)

View File

@ -0,0 +1,62 @@
'''
ldm.invoke.merge_diffusers exports a single function call merge_diffusion_models()
used to merge 2-3 models together and create a new InvokeAI-registered diffusion model.
'''
import os
from typing import List
from diffusers import DiffusionPipeline
from ldm.invoke.globals import global_config_file, global_models_dir, global_cache_dir
from ldm.invoke.model_manager import ModelManager
from omegaconf import OmegaConf
def merge_diffusion_models(models:List['str'],
merged_model_name:str,
alpha:float=0.5,
interp:str=None,
force:bool=False,
**kwargs):
'''
models - up to three models, designated by their InvokeAI models.yaml model name
merged_model_name = name for new model
alpha - The interpolation parameter. Ranges from 0 to 1. It affects the ratio in which the checkpoints are merged. A 0.8 alpha
would mean that the first model checkpoints would affect the final result far less than an alpha of 0.2
interp - The interpolation method to use for the merging. Supports "sigmoid", "inv_sigmoid", "add_difference" and None.
Passing None uses the default interpolation which is weighted sum interpolation. For merging three checkpoints, only "add_difference" is supported.
force - Whether to ignore mismatch in model_config.json for the current models. Defaults to False.
**kwargs - the default DiffusionPipeline.get_config_dict kwargs:
cache_dir, resume_download, force_download, proxies, local_files_only, use_auth_token, revision, torch_dtype, device_map
'''
config_file = global_config_file()
model_manager = ModelManager(OmegaConf.load(config_file))
for mod in models:
assert (mod in model_manager.model_names()), f'** Unknown model "{mod}"'
assert (model_manager.model_info(mod).get('format',None) == 'diffusers'), f'** {mod} is not a diffusers model. It must be optimized before merging.'
model_ids_or_paths = [model_manager.model_name_or_path(x) for x in models]
pipe = DiffusionPipeline.from_pretrained(model_ids_or_paths[0],
cache_dir=kwargs.get('cache_dir',global_cache_dir()),
custom_pipeline='checkpoint_merger')
merged_pipe = pipe.merge(pretrained_model_name_or_path_list=model_ids_or_paths,
alpha=alpha,
interp=interp,
force=force,
**kwargs)
dump_path = global_models_dir() / 'merged_diffusers'
os.makedirs(dump_path,exist_ok=True)
dump_path = dump_path / merged_model_name
merged_pipe.save_pretrained (
dump_path,
safe_serialization=1
)
model_manager.import_diffuser_model(
dump_path,
model_name = merged_model_name,
description = f'Merge of models {", ".join(models)}'
)
print('REMINDER: When PR 2369 is merged, replace merge_diffusers.py line 56 with vae= argument to impormodel()')
if vae := model_manager.config[models[0]].get('vae',None):
print(f'>> Using configured VAE assigned to {models[0]}')
model_manager.config[merged_model_name]['vae'] = vae
model_manager.commit(config_file)

View File

@ -25,6 +25,7 @@ import torch
import safetensors
import transformers
from diffusers import AutoencoderKL, logging as dlogging
from diffusers.utils.logging import get_verbosity, set_verbosity, set_verbosity_error
from omegaconf import OmegaConf
from omegaconf.dictconfig import DictConfig
from picklescan.scanner import scan_file_path
@ -36,7 +37,11 @@ from ldm.util import instantiate_from_config, ask_user
DEFAULT_MAX_MODELS=2
class ModelManager(object):
def __init__(self, config:OmegaConf, device_type:str, precision:str, max_loaded_models=DEFAULT_MAX_MODELS):
def __init__(self,
config:OmegaConf,
device_type:str='cpu',
precision:str='float16',
max_loaded_models=DEFAULT_MAX_MODELS):
'''
Initialize with the path to the models.yaml config file,
the torch device type, and precision. The optional
@ -541,7 +546,7 @@ class ModelManager(object):
format='diffusers',
)
if isinstance(repo_or_path,Path) and repo_or_path.exists():
new_config.update(path=repo_or_path)
new_config.update(path=str(repo_or_path))
else:
new_config.update(repo_id=repo_or_path)
@ -833,11 +838,11 @@ class ModelManager(object):
return model
# diffusers really really doesn't like us moving a float16 model onto CPU
import logging
logging.getLogger('diffusers.pipeline_utils').setLevel(logging.CRITICAL)
verbosity = get_verbosity()
set_verbosity_error()
model.cond_stage_model.device = 'cpu'
model.to('cpu')
logging.getLogger('pipeline_utils').setLevel(logging.INFO)
set_verbosity(verbosity)
for submodel in ('first_stage_model','cond_stage_model','model'):
try:

View File

@ -1,18 +1,16 @@
import math
import os.path
from functools import partial
from typing import Optional
import clip
import kornia
import torch
import torch.nn as nn
from functools import partial
import clip
from einops import rearrange, repeat
from einops import repeat
from transformers import CLIPTokenizer, CLIPTextModel
import kornia
from ldm.invoke.devices import choose_torch_device
from ldm.invoke.globals import Globals, global_cache_dir
#from ldm.modules.textual_inversion_manager import TextualInversionManager
from ldm.invoke.devices import choose_torch_device
from ldm.invoke.globals import global_cache_dir
from ldm.modules.x_transformer import (
Encoder,
TransformerWrapper,
@ -654,21 +652,22 @@ class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder):
per_token_weights += [weight] * len(this_fragment_token_ids)
# leave room for bos/eos
if len(all_token_ids) > self.max_length - 2:
excess_token_count = len(all_token_ids) - self.max_length - 2
max_token_count_without_bos_eos_markers = self.max_length - 2
if len(all_token_ids) > max_token_count_without_bos_eos_markers:
excess_token_count = len(all_token_ids) - max_token_count_without_bos_eos_markers
# TODO build nice description string of how the truncation was applied
# this should be done by calling self.tokenizer.convert_ids_to_tokens() then passing the result to
# self.tokenizer.convert_tokens_to_string() for the token_ids on each side of the truncation limit.
print(f">> Prompt is {excess_token_count} token(s) too long and has been truncated")
all_token_ids = all_token_ids[0:self.max_length]
per_token_weights = per_token_weights[0:self.max_length]
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
# pad out to a 77-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
# pad out to a 77-entry array: [bos_token, <prompt tokens>, eos_token, pad_token]
# (77 = self.max_length)
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
per_token_weights = [1.0] + per_token_weights + [1.0]
pad_length = self.max_length - len(all_token_ids)
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
per_token_weights += [1.0] * pad_length
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long).to(self.device)

View File

@ -3,8 +3,9 @@ import math
import torch
from transformers import CLIPTokenizer, CLIPTextModel
from ldm.modules.textual_inversion_manager import TextualInversionManager
from ldm.invoke.devices import torch_dtype
from ldm.modules.textual_inversion_manager import TextualInversionManager
class WeightedPromptFragmentsToEmbeddingsConverter():
@ -22,8 +23,8 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
return self.tokenizer.model_max_length
def get_embeddings_for_weighted_prompt_fragments(self,
text: list[str],
fragment_weights: list[float],
text: list[list[str]],
fragment_weights: list[list[float]],
should_return_tokens: bool = False,
device='cpu'
) -> torch.Tensor:
@ -198,12 +199,12 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
all_token_ids = all_token_ids[0:max_token_count_without_bos_eos_markers]
per_token_weights = per_token_weights[0:max_token_count_without_bos_eos_markers]
# pad out to a self.max_length-entry array: [eos_token, <prompt tokens>, eos_token, ..., eos_token]
# pad out to a self.max_length-entry array: [bos_token, <prompt tokens>, eos_token, pad_token]
# (typically self.max_length == 77)
all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id]
per_token_weights = [1.0] + per_token_weights + [1.0]
pad_length = self.max_length - len(all_token_ids)
all_token_ids += [self.tokenizer.eos_token_id] * pad_length
all_token_ids += [self.tokenizer.pad_token_id] * pad_length
per_token_weights += [1.0] * pad_length
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device)