Merge branch 'main' into patch-1

This commit is contained in:
Lincoln Stein 2023-01-17 23:52:13 -05:00 committed by GitHub
commit 6e2365f835
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 143 additions and 73 deletions

View File

@ -119,6 +119,7 @@ jobs:
run: >
configure_invokeai.py
--yes
--default_only
--full-precision # can't use fp16 weights without a GPU
- name: Run the tests

View File

@ -1,18 +1,32 @@
stable-diffusion-2.1-768:
description: Stable Diffusion version 2.1 diffusers model, trained on 768x768 images (5.21 GB)
repo_id: stabilityai/stable-diffusion-2-1
format: diffusers
recommended: True
stable-diffusion-2.1-base:
description: Stable Diffusion version 2.1 diffusers base model, trained on 512x512 images (5.21 GB)
repo_id: stabilityai/stable-diffusion-2-1-base
format: diffusers
recommended: False
stable-diffusion-1.5:
description: Stable Diffusion version 1.5 weight file (4.27 GB)
repo_id: runwayml/stable-diffusion-v1-5
format: diffusers
recommended: True
default: True
vae:
repo_id: stabilityai/sd-vae-ft-mse
default: True
stable-diffusion-2.1:
description: Stable Diffusion version 2.1 diffusers model (5.21 GB)
repo_id: stabilityai/stable-diffusion-2-1
stable-diffusion-1.4:
description: The original Stable Diffusion version 1.4 weight file (4.27 GB)
repo_id: CompVis/stable-diffusion-v1-4
recommended: False
format: diffusers
recommended: True
vae:
repo_id: stabilityai/sd-vae-ft-mse
width: 512
height: 512
inpainting-1.5:
description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB)
description: RunwayML SD 1.5 model optimized for inpainting (ckpt version) (4.27 GB)
repo_id: runwayml/stable-diffusion-inpainting
config: v1-inpainting-inference.yaml
file: sd-v1-5-inpainting.ckpt
@ -23,19 +37,13 @@ inpainting-1.5:
recommended: True
width: 512
height: 512
stable-diffusion-1.4:
description: The original Stable Diffusion version 1.4 weight file (4.27 GB)
repo_id: CompVis/stable-diffusion-v1-4
recommended: False
format: diffusers
vae:
repo_id: stabilityai/sd-vae-ft-mse
waifu-diffusion-1.4:
description: Waifu diffusion 1.4
description: Latest waifu diffusion 1.4 (diffusers version)
format: diffusers
repo_id: hakurei/waifu-diffusion
recommended: True
waifu-diffusion-1.3:
description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27 GB)
description: Stable Diffusion 1.4 fine tuned on anime-styled images (ckpt version) (4.27 GB)
repo_id: hakurei/waifu-diffusion-v1-3
config: v1-inference.yaml
file: model-epoch09-float32.ckpt
@ -53,8 +61,8 @@ trinart-2.0:
recommended: False
vae:
repo_id: stabilityai/sd-vae-ft-mse
trinart_characters-2.0:
description: An SD model finetuned with 19.2M anime/manga style images (4.27 GB)
trinart_characters-2_0:
description: An SD model finetuned with 19.2M anime/manga style images (ckpt version) (4.27 GB)
repo_id: naclbit/trinart_derrida_characters_v2_stable_diffusion
config: v1-inference.yaml
file: derrida_final.ckpt
@ -65,6 +73,11 @@ trinart_characters-2.0:
recommended: False
width: 512
height: 512
anything-4.0:
description: High-quality, highly detailed anime style images with just a few prompts
format: diffusers
repo_id: andite/anything-v4.0
recommended: False
papercut-1.0:
description: SD 1.5 fine-tuned for papercut art (use "PaperCut" in your prompts) (2.13 GB)
repo_id: Fictiverse/Stable_Diffusion_PaperCut_Model
@ -72,8 +85,6 @@ papercut-1.0:
vae:
repo_id: stabilityai/sd-vae-ft-mse
recommended: False
width: 512
height: 512
voxel_art-1.0:
description: Stable Diffusion trained on voxel art (use "VoxelArt" in your prompts) (4.27 GB)
repo_id: Fictiverse/Stable_Diffusion_VoxelArt_Model

View File

@ -1,6 +1,6 @@
-r environments-and-requirements/requirements-base.txt
# Get hardware-appropriate torch/torchvision
--extra-index-url https://download.pytorch.org/whl/rocm5.1.1 --trusted-host https://download.pytorch.org
--extra-index-url https://download.pytorch.org/whl/rocm5.2 --trusted-host https://download.pytorch.org
torch>=1.13.1
torchvision>=0.14.1
-e .

View File

@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cu116 --trusted-host https://download.pytorch.org
--trusted-host https://download.pytorch.org
-r environments-and-requirements/requirements-base.txt
torch>=1.13.1
torchvision>=0.14.1

View File

@ -1,6 +1,6 @@
-r environments-and-requirements/requirements-base.txt
# Get hardware-appropriate torch/torchvision
--extra-index-url https://download.pytorch.org/whl/cu116 --trusted-host https://download.pytorch.org
--extra-index-url https://download.pytorch.org/whl/cu117 --trusted-host https://download.pytorch.org
torch==1.13.1
torchvision==0.14.1
-e .

View File

@ -29,7 +29,7 @@ from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary
from ldm.invoke.conditioning import get_uc_and_c_and_ec
from ldm.invoke.devices import choose_torch_device, choose_precision
from ldm.invoke.generator.inpaint import infill_methods
from ldm.invoke.globals import global_cache_dir
from ldm.invoke.globals import global_cache_dir, Globals
from ldm.invoke.image_util import InitImageResizer
from ldm.invoke.model_manager import ModelManager
from ldm.invoke.pngwriter import PngWriter
@ -201,6 +201,7 @@ class Generate:
self.precision = 'float32'
if self.precision == 'auto':
self.precision = choose_precision(self.device)
Globals.full_precision = self.precision=='float32'
# model caching system for fast switching
self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models)

View File

@ -613,8 +613,6 @@ def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str:
description = model_description):
print('** model failed to import')
return None
if input('Make this the default model? [n] ').startswith(('y','Y')):
manager.set_default_model(model_name)
return model_name
def import_ckpt_model(path_or_url:str, gen, opt, completer)->str:
@ -647,8 +645,6 @@ def import_ckpt_model(path_or_url:str, gen, opt, completer)->str:
print('** model failed to import')
return None
if input('Make this the default model? [n] ').startswith(('y','Y')):
manager.set_model_default(model_name)
return model_name
def _verify_load(model_name:str, gen)->bool:
@ -726,6 +722,9 @@ def del_config(model_name:str, gen, opt, completer):
if model_name == current_model:
print("** Can't delete active model. !switch to another model first. **")
return
if model_name not in gen.model_manager.config:
print(f"** Unknown model {model_name}")
return
gen.model_manager.del_model(model_name)
gen.model_manager.commit(opt.conf)
print(f'** {model_name} deleted')

View File

@ -335,4 +335,5 @@ class CkptGenerator():
os.makedirs(dirname, exist_ok=True)
image.save(filepath,'PNG')
def torch_dtype(self)->torch.dtype:
return torch.float16 if self.precision == 'float16' else torch.float32

View File

@ -75,12 +75,14 @@ class CkptTxt2Img(CkptGenerator):
self.latent_channels,
height // self.downsampling_factor,
width // self.downsampling_factor],
dtype=self.torch_dtype(),
device='cpu').to(device)
else:
x = torch.randn([1,
self.latent_channels,
height // self.downsampling_factor,
width // self.downsampling_factor],
dtype=self.torch_dtype(),
device=device)
if self.perlin > 0.0:
x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor)

View File

@ -21,10 +21,19 @@ def choose_precision(device) -> str:
return 'float16'
return 'float32'
def torch_dtype(device) -> torch.dtype:
if Globals.full_precision:
return torch.float32
if choose_precision(device) == 'float16':
return torch.float16
else:
return torch.float32
def choose_autocast(precision):
'''Returns an autocast context or nullcontext for the given precision string'''
# float16 currently requires autocast to avoid errors like:
# 'expected scalar type Half but found Float'
print(f'DEBUG: choose_autocast() called')
if precision == 'autocast' or precision == 'float16':
return autocast
return nullcontext

View File

@ -8,6 +8,7 @@ import os
import os.path as osp
import random
import traceback
from contextlib import nullcontext
import cv2
import numpy as np
@ -18,8 +19,6 @@ from einops import rearrange
from pytorch_lightning import seed_everything
from tqdm import trange
from ldm.invoke.devices import choose_autocast
from ldm.models.diffusion.cross_attention_map_saving import AttentionMapSaver
from ldm.models.diffusion.ddpm import DiffusionWrapper
from ldm.util import rand_perlin_2d
@ -64,7 +63,7 @@ class Generator:
image_callback=None, step_callback=None, threshold=0.0, perlin=0.0,
safety_checker:dict=None,
**kwargs):
scope = choose_autocast(self.precision)
scope = nullcontext
self.safety_checker = safety_checker
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image())
@ -236,7 +235,8 @@ class Generator:
def get_perlin_noise(self,width,height):
fixdevice = 'cpu' if (self.model.device.type == 'mps') else self.model.device
return torch.stack([rand_perlin_2d((height, width), (8, 8), device = self.model.device).to(fixdevice) for _ in range(self.latent_channels)], dim=0).to(self.model.device)
noise = torch.stack([rand_perlin_2d((height, width), (8, 8), device = self.model.device).to(fixdevice) for _ in range(self.latent_channels)], dim=0).to(self.model.device)
return noise
def new_seed(self):
self.seed = random.randrange(0, np.iinfo(np.uint32).max)
@ -341,3 +341,6 @@ class Generator:
image.save(filepath,'PNG')
def torch_dtype(self)->torch.dtype:
return torch.float16 if self.precision == 'float16' else torch.float32

View File

@ -391,7 +391,9 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
for i, t in enumerate(self.progress_bar(timesteps)):
batched_t.fill_(t)
step_output = self.step(batched_t, latents, conditioning_data,
i, additional_guidance=additional_guidance)
step_index=i,
total_step_count=len(timesteps),
additional_guidance=additional_guidance)
latents = step_output.prev_sample
predicted_original = getattr(step_output, 'pred_original_sample', None)
@ -410,7 +412,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
@torch.inference_mode()
def step(self, t: torch.Tensor, latents: torch.Tensor,
conditioning_data: ConditioningData,
step_index:int | None = None, additional_guidance: List[Callable] = None):
step_index:int, total_step_count:int,
additional_guidance: List[Callable] = None):
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
timestep = t[0]
@ -427,6 +430,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
conditioning_data.unconditioned_embeddings, conditioning_data.text_embeddings,
conditioning_data.guidance_scale,
step_index=step_index,
total_step_count=total_step_count,
threshold=conditioning_data.threshold
)

View File

@ -36,10 +36,9 @@ class Txt2Img(Generator):
threshold = ThresholdSettings(threshold, warmup=0.2) if threshold else None)
.add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta))
def make_image(x_T) -> PIL.Image.Image:
pipeline_output = pipeline.image_from_embeddings(
latents=torch.zeros_like(x_T),
latents=torch.zeros_like(x_T,dtype=self.torch_dtype()),
noise=x_T,
num_inference_steps=steps,
conditioning_data=conditioning_data,
@ -62,12 +61,14 @@ class Txt2Img(Generator):
input_channels,
height // self.downsampling_factor,
width // self.downsampling_factor],
dtype=self.torch_dtype(),
device='cpu').to(device)
else:
x = torch.randn([1,
input_channels,
height // self.downsampling_factor,
width // self.downsampling_factor],
dtype=self.torch_dtype(),
device=device)
if self.perlin > 0.0:
x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor)

View File

@ -90,9 +90,9 @@ class Txt2Img2Img(Generator):
def get_noise_like(self, like: torch.Tensor):
device = like.device
if device.type == 'mps':
x = torch.randn_like(like, device='cpu').to(device)
x = torch.randn_like(like, device='cpu', dtype=self.torch_dtype()).to(device)
else:
x = torch.randn_like(like, device=device)
x = torch.randn_like(like, device=device, dtype=self.torch_dtype())
if self.perlin > 0.0:
shape = like.shape
x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2])
@ -117,10 +117,12 @@ class Txt2Img2Img(Generator):
self.latent_channels,
scaled_height // self.downsampling_factor,
scaled_width // self.downsampling_factor],
dtype=self.torch_dtype(),
device='cpu').to(device)
else:
return torch.randn([1,
self.latent_channels,
scaled_height // self.downsampling_factor,
scaled_width // self.downsampling_factor],
dtype=self.torch_dtype(),
device=device)

View File

@ -43,6 +43,9 @@ Globals.always_use_cpu = False
# The CLI will test connectivity at startup time.
Globals.internet_available = True
# whether we are forcing full precision
Globals.full_precision = False
def global_config_dir()->Path:
return Path(Globals.root, Globals.config_dir)

View File

@ -230,6 +230,9 @@ class ModelManager(object):
Delete the named model.
'''
omega = self.config
if model_name not in omega:
print(f'** Unknown model {model_name}')
return
del omega[model_name]
if model_name in self.stack:
self.stack.remove(model_name)
@ -253,9 +256,8 @@ class ModelManager(object):
assert (clobber or model_name not in omega), f'attempt to overwrite existing model definition "{model_name}"'
if model_name not in omega:
omega[model_name] = dict()
OmegaConf.update(omega,model_name,model_attributes,merge=False)
omega[model_name] = model_attributes
if 'weights' in omega[model_name]:
omega[model_name]['weights'].replace('\\','/')
@ -349,7 +351,7 @@ class ModelManager(object):
if self.precision == 'float16':
print(' | Using faster float16 precision')
model.to(torch.float16)
model = model.to(torch.float16)
else:
print(' | Using more accurate float32 precision')
@ -753,16 +755,20 @@ class ModelManager(object):
print('** Legacy version <= 2.2.5 model directory layout detected. Reorganizing.')
print('** This is a quick one-time operation.')
from shutil import move
from shutil import move, rmtree
# transformer files get moved into the hub directory
hub = models_dir / 'hub'
os.makedirs(hub, exist_ok=True)
for model in legacy_locations:
source = models_dir / model
dest = hub / model.stem
print(f'** {source} => {dest}')
if source.exists():
print(f'DEBUG: Moving {models_dir / model} into hub')
move(models_dir / model, hub)
if dest.exists():
rmtree(source)
else:
move(source, dest)
# anything else gets moved into the diffusers directory
diffusers = models_dir / 'diffusers'
@ -773,7 +779,12 @@ class ModelManager(object):
if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
continue
if Path(dir).match('models--*--*'):
move(full_path,diffusers)
dest = diffusers / dir
print(f'** {full_path} => {dest}')
if dest.exists():
rmtree(full_path)
else:
move(full_path,dest)
# now clean up by removing any empty directories
empty = [root for root, dirs, files, in os.walk(models_dir) if not len(dirs) and not len(files)]

View File

@ -7,6 +7,7 @@ import torch
import diffusers
from torch import nn
from diffusers.models.unet_2d_condition import UNet2DConditionModel
from ldm.invoke.devices import torch_dtype
# adapted from bloc97's CrossAttentionControl colab
# https://github.com/bloc97/CrossAttentionControl
@ -383,7 +384,7 @@ def inject_attention_function(unet, context: Context):
remapped_saved_attention_slice = torch.index_select(saved_attention_slice, -1, index_map)
this_attention_slice = suggested_attention_slice
mask = context.cross_attention_mask
mask = context.cross_attention_mask.to(torch_dtype(suggested_attention_slice.device))
saved_mask = mask
this_mask = 1 - mask
attention_slice = remapped_saved_attention_slice * saved_mask + \

View File

@ -89,6 +89,7 @@ class InvokeAIDiffuserComponent:
conditioning: Union[torch.Tensor,dict],
unconditional_guidance_scale: float,
step_index: Optional[int]=None,
total_step_count: Optional[int]=None,
threshold: Optional[ThresholdSettings]=None,
):
"""
@ -106,6 +107,14 @@ class InvokeAIDiffuserComponent:
cross_attention_control_types_to_do = []
context: Context = self.cross_attention_control_context
if self.cross_attention_control_context is not None:
if step_index is not None and total_step_count is not None:
# 🧨diffusers codepath
percent_through = step_index / total_step_count # will never reach 1.0 - this is deliberate
else:
# legacy compvis codepath
# TODO remove when compvis codepath support is dropped
if step_index is None and sigma is None:
raise ValueError(f"Either step_index or sigma is required when doing cross attention control, but both are None.")
percent_through = self.estimate_percent_through(step_index, sigma)
cross_attention_control_types_to_do = context.get_active_cross_attention_control_types_for_step(percent_through)

View File

@ -4,7 +4,7 @@ import torch
from transformers import CLIPTokenizer, CLIPTextModel
from ldm.modules.textual_inversion_manager import TextualInversionManager
from ldm.invoke.devices import torch_dtype
class WeightedPromptFragmentsToEmbeddingsConverter():
@ -207,7 +207,7 @@ class WeightedPromptFragmentsToEmbeddingsConverter():
per_token_weights += [1.0] * pad_length
all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device)
per_token_weights_tensor = torch.tensor(per_token_weights, dtype=torch.float32, device=device)
per_token_weights_tensor = torch.tensor(per_token_weights, dtype=torch_dtype(self.text_encoder.device), device=device)
#print(f"assembled all_token_ids_tensor with shape {all_token_ids_tensor.shape}")
return all_token_ids_tensor, per_token_weights_tensor

View File

@ -111,7 +111,6 @@ class TextualInversionManager():
if ti.trigger_token_id is not None:
raise ValueError(f"Tokens already injected for textual inversion with trigger '{ti.trigger_string}'")
print(f'DEBUG: Injecting token {ti.trigger_string}')
trigger_token_id = self._get_or_create_token_id_and_assign_embedding(ti.trigger_string, ti.embedding[0])
if ti.embedding_vector_length > 1:

View File

@ -8,6 +8,7 @@ from threading import Thread
from urllib import request
from tqdm import tqdm
from pathlib import Path
from ldm.invoke.devices import torch_dtype
import numpy as np
import torch
@ -235,7 +236,8 @@ def rand_perlin_2d(shape, res, device, fade = lambda t: 6*t**5 - 15*t**4 + 10*t*
n01 = dot(tile_grads([0, -1],[1, None]), [0, -1]).to(device)
n11 = dot(tile_grads([1, None], [1, None]), [-1,-1]).to(device)
t = fade(grid[:shape[0], :shape[1]])
return math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1]).to(device)
noise = math.sqrt(2) * torch.lerp(torch.lerp(n00, n10, t[..., 0]), torch.lerp(n01, n11, t[..., 0]), t[..., 1]).to(device)
return noise.to(dtype=torch_dtype(device))
def ask_user(question: str, answers: list):
from itertools import chain, repeat

View File

@ -197,6 +197,14 @@ def recommended_datasets()->dict:
datasets[ds]=True
return datasets
#---------------------------------------------
def default_dataset()->dict:
datasets = dict()
for ds in Datasets.keys():
if Datasets[ds].get('default',False):
datasets[ds]=True
return datasets
#---------------------------------------------
def all_datasets()->dict:
datasets = dict()
@ -646,7 +654,7 @@ def download_weights(opt:dict) -> Union[str, None]:
precision = 'float32' if opt.full_precision else choose_precision(torch.device(choose_torch_device()))
if opt.yes_to_all:
models = recommended_datasets()
models = default_dataset() if opt.default_only else recommended_datasets()
access_token = authenticate(opt.yes_to_all)
if len(models)>0:
successfully_downloaded = download_weight_datasets(models, access_token, precision=precision)
@ -808,6 +816,9 @@ def main():
dest='yes_to_all',
action='store_true',
help='answer "yes" to all prompts')
parser.add_argument('--default_only',
action='store_true',
help='when --yes specified, only install the default model')
parser.add_argument('--config_file',
'-c',
dest='config_file',