From 4f44b64052223418c5b99ee913f713f3169fc079 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 23 Feb 2023 15:43:58 -0500 Subject: [PATCH 1/8] fix ckpt_convert module to work with dreambooth v2 models - Discord member @marcus.llewellyn reported that some civitai 2.1-derived checkpoints were not converting properly (probably dreambooth-generated): https://discord.com/channels/1020123559063990373/1078386197589655582/1078387806122025070 - @blessedcoolant tracked this down to a missing key that was used to derive vector length of the CLIP model used by fetching the second dimension of the tensor at "cond_stage_model.model.text_projection". His proposed solution was to hardcode a value of 1024. - On inspection, I found that the same second dimension can be recovered from key 'cond_stage_model.model.ln_final.bias', and use that instead. I hope this is correct; tested on multiple v1, v2 and inpainting models and they converted correctly. - While debugging this, I found and fixed several other issues: - model download script was not pre-downloading the OpenCLIP text_encoder or text_tokenizer. This is fixed. - got rid of legacy code in `ckpt_to_diffuser.py` and replaced with calls into `model_manager` - more consistent status reporting in the CLI. --- ldm/invoke/CLI.py | 13 ++++------- ldm/invoke/ckpt_to_diffuser.py | 29 +++++++++++++++---------- ldm/invoke/config/invokeai_configure.py | 15 ++++++++----- ldm/invoke/model_manager.py | 26 +++++++++++----------- 4 files changed, 45 insertions(+), 38 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 1d76b68a66..b755eafed4 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -625,7 +625,7 @@ def set_default_output_dir(opt: Args, completer: Completer): completer.set_default_dir(opt.outdir) -def import_model(model_path: str, gen, opt, completer, convert=False) -> str: +def import_model(model_path: str, gen, opt, completer, convert=False): """ model_path can be (1) a URL to a .ckpt file; (2) a local .ckpt file path; (3) a huggingface repository id; or (4) a local directory containing a @@ -679,7 +679,7 @@ def _verify_load(model_name: str, gen) -> bool: current_model = gen.model_name try: if not gen.set_model(model_name): - return False + return except Exception as e: print(f"** model failed to load: {str(e)}") print( @@ -706,7 +706,7 @@ def _get_model_name_and_desc( ) return model_name, model_description -def convert_model(model_name_or_path: Union[Path, str], gen, opt, completer) -> str: +def convert_model(model_name_or_path: Union[Path, str], gen, opt, completer): model_name_or_path = model_name_or_path.replace("\\", "/") # windows manager = gen.model_manager ckpt_path = None @@ -740,19 +740,14 @@ def convert_model(model_name_or_path: Union[Path, str], gen, opt, completer) -> ) else: try: - model_name = import_model(model_name_or_path, gen, opt, completer, convert=True) + import_model(model_name_or_path, gen, opt, completer, convert=True) except KeyboardInterrupt: return - if not model_name: - print("** Conversion failed. Aborting.") - return - manager.commit(opt.conf) if click.confirm(f"Delete the original .ckpt file at {ckpt_path}?", default=False): ckpt_path.unlink(missing_ok=True) print(f"{ckpt_path} deleted") - return model_name def del_config(model_name: str, gen, opt, completer): diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index 4ce01cd34e..82ba73b0a4 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -17,16 +17,15 @@ # Original file at: https://github.com/huggingface/diffusers/blob/main/scripts/convert_ldm_original_checkpoint_to_diffusers.py """ Conversion script for the LDM checkpoints. """ -import os import re import torch import warnings from pathlib import Path from ldm.invoke.globals import ( - Globals, global_cache_dir, global_config_dir, ) +from ldm.invoke.model_manager import ModelManager, SDLegacyType from safetensors.torch import load_file from typing import Union @@ -760,7 +759,12 @@ def convert_open_clip_checkpoint(checkpoint): text_model_dict = {} - d_model = int(checkpoint["cond_stage_model.model.text_projection"].shape[0]) + if 'cond_stage_model.model.text_projection' in keys: + d_model = int(checkpoint["cond_stage_model.model.text_projection"].shape[0]) + elif 'cond_stage_model.model.ln_final.bias' in keys: + d_model = int(checkpoint['cond_stage_model.model.ln_final.bias'].shape[0]) + else: + raise KeyError('Expected key "cond_stage_model.model.text_projection" not found in model') text_model_dict["text_model.embeddings.position_ids"] = text_model.text_model.embeddings.get_buffer("position_ids") @@ -856,20 +860,23 @@ def load_pipeline_from_original_stable_diffusion_ckpt( upcast_attention = False if original_config_file is None: - key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" - - if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024: + model_type = ModelManager.probe_model_type(checkpoint) + + if model_type == SDLegacyType.V2: original_config_file = global_config_dir() / 'stable-diffusion' / 'v2-inference-v.yaml' - if global_step == 110000: # v2.1 needs to upcast attention upcast_attention = True - elif str(checkpoint_path).lower().find('inpaint') >= 0: # brittle - please pass original_config_file parameter! - print(f' | checkpoint has "inpaint" in name, assuming an inpainting model') + + elif model_type == SDLegacyType.V1_INPAINT: original_config_file = global_config_dir() / 'stable-diffusion' / 'v1-inpainting-inference.yaml' - else: + + elif model_type == SDLegacyType.V1: original_config_file = global_config_dir() / 'stable-diffusion' / 'v1-inference.yaml' + else: + raise Exception('Unknown checkpoint type') + original_config = OmegaConf.load(original_config_file) if num_in_channels is not None: @@ -960,7 +967,7 @@ def load_pipeline_from_original_stable_diffusion_ckpt( text_model = convert_open_clip_checkpoint(checkpoint) tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer", - cache_dir=global_cache_dir('diffusers') + cache_dir=cache_dir, ) pipe = pipeline_class( vae=vae, diff --git a/ldm/invoke/config/invokeai_configure.py b/ldm/invoke/config/invokeai_configure.py index eb753f5c33..bb967fba37 100755 --- a/ldm/invoke/config/invokeai_configure.py +++ b/ldm/invoke/config/invokeai_configure.py @@ -191,14 +191,18 @@ def download_bert(): # --------------------------------------------- -def download_clip(): - print("Installing CLIP model...", file=sys.stderr) +def download_sd1_clip(): + print("Installing SD1 clip model...", file=sys.stderr) version = "openai/clip-vit-large-patch14" - print("Tokenizer...", file=sys.stderr) download_from_hf(CLIPTokenizer, version) - print("Text model...", file=sys.stderr) download_from_hf(CLIPTextModel, version) +# --------------------------------------------- +def download_sd2_clip(): + version = 'stabilityai/stable-diffusion-2' + print("Installing SD2 clip model...", file=sys.stderr) + download_from_hf(CLIPTokenizer, version, subfolder='tokenizer') + download_from_hf(CLIPTextModel, version, subfolder='text_encoder') # --------------------------------------------- def download_realesrgan(): @@ -832,7 +836,8 @@ def main(): else: print("\n** DOWNLOADING SUPPORT MODELS **") download_bert() - download_clip() + download_sd1_clip() + download_sd2_clip() download_realesrgan() download_gfpgan() download_codeformer() diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 2efe494a19..694d65c1a7 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -725,7 +725,7 @@ class ModelManager(object): SDLegacyType.V1 SDLegacyType.V1_INPAINT SDLegacyType.V2 - UNKNOWN + SDLegacyType.UNKNOWN """ key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024: @@ -785,7 +785,7 @@ class ModelManager(object): print(f">> Probing {thing} for import") if thing.startswith(("http:", "https:", "ftp:")): - print(f" | {thing} appears to be a URL") + print(f" | {thing} appears to be a URL") model_path = self._resolve_path( thing, "models/ldm/stable-diffusion-v1" ) # _resolve_path does a download if needed @@ -793,15 +793,15 @@ class ModelManager(object): elif Path(thing).is_file() and thing.endswith((".ckpt", ".safetensors")): if Path(thing).stem in ["model", "diffusion_pytorch_model"]: print( - f" | {Path(thing).name} appears to be part of a diffusers model. Skipping import" + f" | {Path(thing).name} appears to be part of a diffusers model. Skipping import" ) return else: - print(f" | {thing} appears to be a checkpoint file on disk") + print(f" | {thing} appears to be a checkpoint file on disk") model_path = self._resolve_path(thing, "models/ldm/stable-diffusion-v1") elif Path(thing).is_dir() and Path(thing, "model_index.json").exists(): - print(f" | {thing} appears to be a diffusers file on disk") + print(f" | {thing} appears to be a diffusers file on disk") model_name = self.import_diffuser_model( thing, vae=dict(repo_id="stabilityai/sd-vae-ft-mse"), @@ -812,13 +812,13 @@ class ModelManager(object): elif Path(thing).is_dir(): if (Path(thing) / "model_index.json").exists(): - print(f">> {thing} appears to be a diffusers model.") + print(f" | {thing} appears to be a diffusers model.") model_name = self.import_diffuser_model( thing, commit_to_conf=commit_to_conf ) else: print( - f">> {thing} appears to be a directory. Will scan for models to import" + f" |{thing} appears to be a directory. Will scan for models to import" ) for m in list(Path(thing).rglob("*.ckpt")) + list( Path(thing).rglob("*.safetensors") @@ -830,7 +830,7 @@ class ModelManager(object): return model_name elif re.match(r"^[\w.+-]+/[\w.+-]+$", thing): - print(f" | {thing} appears to be a HuggingFace diffusers repo_id") + print(f" | {thing} appears to be a HuggingFace diffusers repo_id") model_name = self.import_diffuser_model( thing, commit_to_conf=commit_to_conf ) @@ -847,7 +847,7 @@ class ModelManager(object): return if model_path.stem in self.config: # already imported - print(" | Already imported. Skipping") + print(" | Already imported. Skipping") return # another round of heuristics to guess the correct config file. @@ -860,18 +860,18 @@ class ModelManager(object): model_config_file = None if model_type == SDLegacyType.V1: - print(" | SD-v1 model detected") + print(" | SD-v1 model detected") model_config_file = Path( Globals.root, "configs/stable-diffusion/v1-inference.yaml" ) elif model_type == SDLegacyType.V1_INPAINT: - print(" | SD-v1 inpainting model detected") + print(" | SD-v1 inpainting model detected") model_config_file = Path( Globals.root, "configs/stable-diffusion/v1-inpainting-inference.yaml" ) elif model_type == SDLegacyType.V2: print( - " | SD-v2 model detected; model will be converted to diffusers format" + " | SD-v2 model detected; model will be converted to diffusers format" ) model_config_file = Path( Globals.root, "configs/stable-diffusion/v2-inference-v.yaml" @@ -923,7 +923,7 @@ class ModelManager(object): vae=None, original_config_file: Path = None, commit_to_conf: Path = None, - ) -> dict: + ) -> str: """ Convert a legacy ckpt weights file to diffuser model and import into models.yaml. From b5b541c7479a4ae85575c58004081985f2aada9c Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 23 Feb 2023 17:47:36 -0500 Subject: [PATCH 2/8] bump version; use correct format for PyPi --- ldm/invoke/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/_version.py b/ldm/invoke/_version.py index 0ac7042811..92cc704e25 100644 --- a/ldm/invoke/_version.py +++ b/ldm/invoke/_version.py @@ -1 +1 @@ -__version__='2.3.1+rc3' +__version__='2.3.1-rc4' From 7fb2da8741b6f0c6e4050a5460e317ecc26591ce Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 23 Feb 2023 22:03:28 -0500 Subject: [PATCH 3/8] fix generate backend to generate "accurate" intermediate images - Closes #2784 - Closes #2775 --- ldm/invoke/generator/base.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 4c73b997e7..21d6f271ca 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -137,17 +137,9 @@ class Generator: Given samples returned from a sampler, converts it into a PIL Image """ - x_samples = self.model.decode_first_stage(samples) - x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) - if len(x_samples) != 1: - raise Exception( - f'>> expected to get a single image, but got {len(x_samples)}') - x_sample = 255.0 * rearrange( - x_samples[0].cpu().numpy(), 'c h w -> h w c' - ) - return Image.fromarray(x_sample.astype(np.uint8)) - - # write an approximate RGB image from latent samples for a single step to PNG + with torch.inference_mode(): + image = self.model.decode_latents(samples) + return self.model.numpy_to_pil(image)[0] def repaste_and_color_correct(self, result: Image.Image, init_image: Image.Image, init_mask: Image.Image, mask_blur_radius: int = 8) -> Image.Image: if init_image is None or init_mask is None: From a540cc537f39a1a31e8d598b070b858c2b7c8fa0 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 24 Feb 2023 00:53:48 -0500 Subject: [PATCH 4/8] add curated set of HuggingFace diffusers models for 2.3.1 release - Final list can be found in invokeai/configs/INITIAL_MODELS.yaml - After installing all the models, I discovered a bug in the file selection form that caused a crash when no remaining uninstalled models remained. So had to fix this. --- invokeai/configs/INITIAL_MODELS.yaml | 77 +++++++++++++++++--------- ldm/invoke/config/model_install.py | 82 +++++++++++++++------------- 2 files changed, 96 insertions(+), 63 deletions(-) diff --git a/invokeai/configs/INITIAL_MODELS.yaml b/invokeai/configs/INITIAL_MODELS.yaml index 60ddbf3324..edc6285a38 100644 --- a/invokeai/configs/INITIAL_MODELS.yaml +++ b/invokeai/configs/INITIAL_MODELS.yaml @@ -6,53 +6,78 @@ stable-diffusion-1.5: repo_id: stabilityai/sd-vae-ft-mse recommended: True default: True -inpainting-1.5: +sd-inpainting-1.5: description: RunwayML SD 1.5 model optimized for inpainting, diffusers version (4.27 GB) repo_id: runwayml/stable-diffusion-inpainting format: diffusers vae: repo_id: stabilityai/sd-vae-ft-mse recommended: True -dreamlike-diffusion-1.0: - description: An SD 1.5 model fine tuned on high quality art by dreamlike.art, diffusers version (2.13 BG) - format: diffusers - repo_id: dreamlike-art/dreamlike-diffusion-1.0 - vae: - repo_id: stabilityai/sd-vae-ft-mse - recommended: True -dreamlike-photoreal-2.0: - description: A photorealistic model trained on 768 pixel images based on SD 1.5 (2.13 GB) - format: diffusers - repo_id: dreamlike-art/dreamlike-photoreal-2.0 - recommended: False -stable-diffusion-2.1-768: +stable-diffusion-2.1: description: Stable Diffusion version 2.1 diffusers model, trained on 768 pixel images (5.21 GB) repo_id: stabilityai/stable-diffusion-2-1 format: diffusers recommended: True -stable-diffusion-2.1-base: - description: Stable Diffusion version 2.1 diffusers base model, trained on 512 pixel images (5.21 GB) - repo_id: stabilityai/stable-diffusion-2-1-base +sd-inpainting-2.0: + description: Stable Diffusion version 2.0 inpainting model (5.21 GB) + repo_id: stabilityai/stable-diffusion-2-1 format: diffusers recommended: False +analog-diffusion-1.0: + description: An SD-1.5 model trained on diverse analog photographs (2.13 GB) + repo_id: wavymulder/Analog-Diffusion + format: diffusers + recommended: false +deliberate-1.0: + description: Versatile model that produces detailed images up to 768px (4.27 GB) + format: diffusers + repo_id: XpucT/Deliberate + recommended: False +d&d-diffusion-1.0: + description: Dungeons & Dragons characters (2.13 GB) + format: diffusers + repo_id: 0xJustin/Dungeons-and-Diffusion + recommended: False +dreamlike-photoreal-2.0: + description: A photorealistic model trained on 768 pixel images based on SD 1.5 (2.13 GB) + format: diffusers + repo_id: dreamlike-art/dreamlike-photoreal-2.0 + recommended: False +inkpunk-1.0: + description: Stylized illustrations inspired by Gorillaz, FLCL and Shinkawa; prompt with "nvinkpunk" (4.27 GB) + format: diffusers + repo_id: Envvi/Inkpunk-Diffusion + recommended: False openjourney-4.0: - description: An SD 1.5 model fine tuned on Midjourney images by PromptHero - include "mdjrny-v4 style" in your prompts (2.13 GB) - format: diffusers - repo_id: prompthero/openjourney - vae: + description: An SD 1.5 model fine tuned on Midjourney; prompt with "mdjrny-v4 style" (2.13 GB) + format: diffusers + repo_id: prompthero/openjourney + vae: repo_id: stabilityai/sd-vae-ft-mse - recommended: False -nitro-diffusion-1.0: - description: A SD 1.5 model trained on three artstyles - prompt with "archer style", "arcane style" and/or "modern disney style" (2.13 GB) - repo_id: nitrosocke/Nitro-Diffusion + recommended: False +portrait-plus-1.0: + description: An SD-1.5 model trained on close range portraits of people; prompt with "portrait+" (2.13 GB) + format: diffusers + repo_id: wavymulder/portraitplus + recommended: False +seek-art-mega-1.0: + description: A general use SD-1.5 "anything" model that supports multiple styles (2.1 GB) + repo_id: coreco/seek.art_MEGA format: diffusers vae: repo_id: stabilityai/sd-vae-ft-mse recommended: False trinart-2.0: - description: An SD model finetuned with ~40,000 assorted high resolution manga/anime-style pictures, diffusers version (2.13 GB) + description: An SD-1.5 model finetuned with ~40K assorted high resolution manga/anime-style images (2.13 GB) repo_id: naclbit/trinart_stable_diffusion_v2 format: diffusers vae: repo_id: stabilityai/sd-vae-ft-mse recommended: False +waifu-diffusion-1.4: + description: An SD-1.5 model trained on 680k anime/manga-style images (2.13 GB) + repo_id: hakurei/waifu-diffusion + format: diffusers + vae: + repo_id: stabilityai/sd-vae-ft-mse + recommended: False diff --git a/ldm/invoke/config/model_install.py b/ldm/invoke/config/model_install.py index 7dd5831707..287283ca27 100644 --- a/ldm/invoke/config/model_install.py +++ b/ldm/invoke/config/model_install.py @@ -114,37 +114,37 @@ class addModelsForm(npyscreen.FormMultiPage): relx=4, ) self.nextrely += 1 - self.add_widget_intelligent( - CenteredTitleText, - name="== STARTER MODELS (recommended ones selected) ==", - editable=False, - color="CONTROL", - ) - self.nextrely -= 1 - self.add_widget_intelligent( - CenteredTitleText, - name="Select from a starter set of Stable Diffusion models from HuggingFace:", - editable=False, - labelColor="CAUTION", - ) - - self.nextrely -= 1 - # if user has already installed some initial models, then don't patronize them - # by showing more recommendations - show_recommended = not self.existing_models - self.models_selected = self.add_widget_intelligent( - npyscreen.MultiSelect, - name="Install Starter Models", - values=starter_model_labels, - value=[ - self.starter_model_list.index(x) - for x in self.starter_model_list - if show_recommended and x in recommended_models - ], - max_height=len(starter_model_labels) + 1, - relx=4, - scroll_exit=True, - ) + if len(self.starter_model_list) > 0: + self.add_widget_intelligent( + CenteredTitleText, + name="== STARTER MODELS (recommended ones selected) ==", + editable=False, + color="CONTROL", + ) + self.nextrely -= 1 + self.add_widget_intelligent( + CenteredTitleText, + name="Select from a starter set of Stable Diffusion models from HuggingFace.", + editable=False, + labelColor="CAUTION", + ) + self.nextrely -= 1 + # if user has already installed some initial models, then don't patronize them + # by showing more recommendations + show_recommended = not self.existing_models + self.models_selected = self.add_widget_intelligent( + npyscreen.MultiSelect, + name="Install Starter Models", + values=starter_model_labels, + value=[ + self.starter_model_list.index(x) + for x in self.starter_model_list + if show_recommended and x in recommended_models + ], + max_height=len(starter_model_labels) + 1, + relx=4, + scroll_exit=True, + ) self.add_widget_intelligent( CenteredTitleText, name='== IMPORT LOCAL AND REMOTE MODELS ==', @@ -166,7 +166,11 @@ class addModelsForm(npyscreen.FormMultiPage): ) self.nextrely -= 1 self.import_model_paths = self.add_widget_intelligent( - TextBox, max_height=5, scroll_exit=True, editable=True, relx=4 + TextBox, + max_height=7, + scroll_exit=True, + editable=True, + relx=4 ) self.nextrely += 1 self.show_directory_fields = self.add_widget_intelligent( @@ -241,7 +245,8 @@ class addModelsForm(npyscreen.FormMultiPage): def resize(self): super().resize() - self.models_selected.values = self._get_starter_model_labels() + if hasattr(self,'models_selected'): + self.models_selected.values = self._get_starter_model_labels() def _clear_scan_directory(self): if not self.show_directory_fields.value: @@ -320,11 +325,14 @@ class addModelsForm(npyscreen.FormMultiPage): selections = self.parentApp.user_selections # starter models to install/remove - starter_models = dict( - map( - lambda x: (self.starter_model_list[x], True), self.models_selected.value + if hasattr(self,'models_selected'): + starter_models = dict( + map( + lambda x: (self.starter_model_list[x], True), self.models_selected.value + ) ) - ) + else: + starter_models = dict() selections.purge_deleted_models = False if hasattr(self, "previously_installed_models"): unchecked = [ From ec2890c19b2c6aad9869899e27da9f9c0d4180cd Mon Sep 17 00:00:00 2001 From: Jonathan <34005131+JPPhoto@users.noreply.github.com> Date: Fri, 24 Feb 2023 07:48:54 -0600 Subject: [PATCH 5/8] Run garbage collection to allow the CUDA cache to completely empty. (#2791) --- ldm/generate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ldm/generate.py b/ldm/generate.py index 7695c3a0bc..413a1e25cb 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -650,6 +650,8 @@ class Generate: def clear_cuda_cache(self): if self._has_cuda(): self.gather_cuda_stats() + # Run garbage collection prior to emptying the CUDA cache + gc.collect() torch.cuda.empty_cache() def clear_cuda_stats(self): From 230d3a496d0f2a85198fdbcb6826c75423d99ac3 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 24 Feb 2023 09:33:07 -0500 Subject: [PATCH 6/8] document starter models - add new script `scripts/make_models_markdown_table.py` that parses INITIAL_MODELS.yaml and creates markdown table for the model installation documentation file - update 050_INSTALLING_MODELS.md with above table, and add a warning about additional license terms that apply to some of the models. --- docs/installation/050_INSTALLING_MODELS.md | 40 +++++++++++++--------- scripts/make_models_markdown_table.py | 23 +++++++++++++ 2 files changed, 46 insertions(+), 17 deletions(-) create mode 100755 scripts/make_models_markdown_table.py diff --git a/docs/installation/050_INSTALLING_MODELS.md b/docs/installation/050_INSTALLING_MODELS.md index 5621075506..10589098d2 100644 --- a/docs/installation/050_INSTALLING_MODELS.md +++ b/docs/installation/050_INSTALLING_MODELS.md @@ -43,25 +43,31 @@ InvokeAI comes with support for a good set of starter models. You'll find them listed in the master models file `configs/INITIAL_MODELS.yaml` in the InvokeAI root directory. The subset that are currently installed are found in -`configs/models.yaml`. The current list is: +`configs/models.yaml`. As of v2.3.1, the list of starter models is: -| Model | HuggingFace Repo ID | Description | URL -| -------------------- | --------------------------------- | ---------------------------------------------------------- | -------------------------------------------------------------- | -| stable-diffusion-1.5 | runwayml/stable-diffusion-v1-5 | Most recent version of base Stable Diffusion model | https://huggingface.co/runwayml/stable-diffusion-v1-5 | -| stable-diffusion-1.4 | runwayml/stable-diffusion-v1-4 | Previous version of base Stable Diffusion model | https://huggingface.co/runwayml/stable-diffusion-v1-4 | -| inpainting-1.5 | runwayml/stable-diffusion-inpainting | Stable diffusion 1.5 optimized for inpainting | https://huggingface.co/runwayml/stable-diffusion-inpainting | -| stable-diffusion-2.1-base |stabilityai/stable-diffusion-2-1-base | Stable Diffusion version 2.1 trained on 512 pixel images | https://huggingface.co/stabilityai/stable-diffusion-2-1-base | -| stable-diffusion-2.1-768 |stabilityai/stable-diffusion-2-1 | Stable Diffusion version 2.1 trained on 768 pixel images | https://huggingface.co/stabilityai/stable-diffusion-2-1 | -| dreamlike-diffusion-1.0 | dreamlike-art/dreamlike-diffusion-1.0 | An SD 1.5 model finetuned on high quality art | https://huggingface.co/dreamlike-art/dreamlike-diffusion-1.0 | -| dreamlike-photoreal-2.0 | dreamlike-art/dreamlike-photoreal-2.0 | A photorealistic model trained on 768 pixel images| https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0 | -| openjourney-4.0 | prompthero/openjourney | An SD 1.5 model finetuned on Midjourney images prompt with "mdjrny-v4 style" | https://huggingface.co/prompthero/openjourney | -| nitro-diffusion-1.0 | nitrosocke/Nitro-Diffusion | An SD 1.5 model finetuned on three styles, prompt with "archer style", "arcane style" or "modern disney style" | https://huggingface.co/nitrosocke/Nitro-Diffusion| -| trinart-2.0 | naclbit/trinart_stable_diffusion_v2 | An SD 1.5 model finetuned with ~40,000 assorted high resolution manga/anime-style pictures | https://huggingface.co/naclbit/trinart_stable_diffusion_v2| -| trinart-characters-2_0 | naclbit/trinart_derrida_characters_v2_stable_diffusion | An SD 1.5 model finetuned with 19.2M manga/anime-style pictures | https://huggingface.co/naclbit/trinart_derrida_characters_v2_stable_diffusion| +|Model Name | HuggingFace Repo ID | Description | URL | +|---------- | ---------- | ----------- | --- | +|stable-diffusion-1.5|runwayml/stable-diffusion-v1-5|Stable Diffusion version 1.5 diffusers model (4.27 GB)|https://huggingface.co/runwayml/stable-diffusion-v1-5 | +|sd-inpainting-1.5|runwayml/stable-diffusion-inpainting|RunwayML SD 1.5 model optimized for inpainting, diffusers version (4.27 GB)|https://huggingface.co/runwayml/stable-diffusion-inpainting | +|stable-diffusion-2.1|stabilityai/stable-diffusion-2-1|Stable Diffusion version 2.1 diffusers model, trained on 768 pixel images (5.21 GB)|https://huggingface.co/stabilityai/stable-diffusion-2-1 | +|sd-inpainting-2.0|stabilityai/stable-diffusion-2-1|Stable Diffusion version 2.0 inpainting model (5.21 GB)|https://huggingface.co/stabilityai/stable-diffusion-2-1 | +|analog-diffusion-1.0|wavymulder/Analog-Diffusion|An SD-1.5 model trained on diverse analog photographs (2.13 GB)|https://huggingface.co/wavymulder/Analog-Diffusion | +|deliberate-1.0|XpucT/Deliberate|Versatile model that produces detailed images up to 768px (4.27 GB)|https://huggingface.co/XpucT/Deliberate | +|d&d-diffusion-1.0|0xJustin/Dungeons-and-Diffusion|Dungeons & Dragons characters (2.13 GB)|https://huggingface.co/0xJustin/Dungeons-and-Diffusion | +|dreamlike-photoreal-2.0|dreamlike-art/dreamlike-photoreal-2.0|A photorealistic model trained on 768 pixel images based on SD 1.5 (2.13 GB)|https://huggingface.co/dreamlike-art/dreamlike-photoreal-2.0 | +|inkpunk-1.0|Envvi/Inkpunk-Diffusion|Stylized illustrations inspired by Gorillaz, FLCL and Shinkawa; prompt with "nvinkpunk" (4.27 GB)|https://huggingface.co/Envvi/Inkpunk-Diffusion | +|openjourney-4.0|prompthero/openjourney|An SD 1.5 model fine tuned on Midjourney; prompt with "mdjrny-v4 style" (2.13 GB)|https://huggingface.co/prompthero/openjourney | +|portrait-plus-1.0|wavymulder/portraitplus|An SD-1.5 model trained on close range portraits of people; prompt with "portrait+" (2.13 GB)|https://huggingface.co/wavymulder/portraitplus | +|seek-art-mega-1.0|coreco/seek.art_MEGA|A general use SD-1.5 "anything" model that supports multiple styles (2.1 GB)|https://huggingface.co/coreco/seek.art_MEGA | +|trinart-2.0|naclbit/trinart_stable_diffusion_v2|An SD-1.5 model finetuned with ~40K assorted high resolution manga/anime-style images (2.13 GB)|https://huggingface.co/naclbit/trinart_stable_diffusion_v2 | +|waifu-diffusion-1.4|hakurei/waifu-diffusion|An SD-1.5 model trained on 680k anime/manga-style images (2.13 GB)|https://huggingface.co/hakurei/waifu-diffusion | -Note that these files are covered by an "Ethical AI" license which forbids -certain uses. When you initially download them, you are asked to -accept the license terms. +Note that these files are covered by an "Ethical AI" license which +forbids certain uses. When you initially download them, you are asked +to accept the license terms. In addition, some of these models carry +additional license terms that limit their use in commercial +applications or on public servers. Be sure to familiarize yourself +with the model terms by visiting the URLs in the table above. ## Community-Contributed Models diff --git a/scripts/make_models_markdown_table.py b/scripts/make_models_markdown_table.py new file mode 100755 index 0000000000..128ced371d --- /dev/null +++ b/scripts/make_models_markdown_table.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +''' +This script is used at release time to generate a markdown table describing the +starter models. This text is then manually copied into 050_INSTALL_MODELS.md. +''' + +from omegaconf import OmegaConf +from pathlib import Path + + +def main(): + initial_models_file = Path(__file__).parent / '../invokeai/configs/INITIAL_MODELS.yaml' + models = OmegaConf.load(initial_models_file) + print('|Model Name | HuggingFace Repo ID | Description | URL |') + print('|---------- | ---------- | ----------- | --- |') + for model in models: + repo_id = models[model].repo_id + url = f'https://huggingface.co/{repo_id}' + print(f'|{model}|{repo_id}|{models[model].description}|{url} |') + +if __name__ == '__main__': + main() From d078941316f2bb227785222c5f3a8958a6bb2e27 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 24 Feb 2023 10:04:06 -0500 Subject: [PATCH 7/8] add low memory troubleshooting guide --- docs/installation/010_INSTALL_AUTOMATED.md | 51 +++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/docs/installation/010_INSTALL_AUTOMATED.md b/docs/installation/010_INSTALL_AUTOMATED.md index 7f50e906da..228c0ae9a4 100644 --- a/docs/installation/010_INSTALL_AUTOMATED.md +++ b/docs/installation/010_INSTALL_AUTOMATED.md @@ -221,7 +221,10 @@ experimental versions later. - ***NSFW checker*** If checked, InvokeAI will test images for potential sexual content - and blur them out if found. + and blur them out if found. Note that the NSFW checker consumes + an additional 0.6 GB of VRAM on top of the 2-3 GB of VRAM used + by most image models. If you have a low VRAM GPU (4-6 GB), you + can reduce out of memory errors by disabling the checker. - ***HuggingFace Access Token*** InvokeAI has the ability to download embedded styles and subjects @@ -440,6 +443,52 @@ the [InvokeAI Issues](https://github.com/invoke-ai/InvokeAI/issues) section, or visit our [Discord Server](https://discord.gg/ZmtBAhwWhy) for interactive assistance. +### Out of Memory Issues + +The models are large, VRAM is expensive, and you may find yourself +faced with Out of Memory errors when generating images. Here are some +tips to reduce the problem: + +* **4 GB of VRAM** + +This should be adequate for 512x512 pixel images using Stable Diffusion 1.5 +and derived models, provided that you **disable** the NSFW checker. To +disable the filter, do one of the following: + + * Select option (6) "_change InvokeAI startup options_" from the + launcher. This will bring up the console-based startup settings + dialogue and allow you to unselect the "NSFW Checker" option. + * Start the startup settings dialogue directly by running + `invokeai-configure --skip-sd-weights --skip-support-models` + from the command line. + * Find the `invokeai.init` initialization file in the InvokeAI root + directory, open it in a text editor, and change `--nsfw_checker` + to `--no-nsfw_checker` + +If you are on a CUDA system, you can realize significant memory +savings by activating the `xformers` library as described above. The +downside is `xformers` introduces non-deterministic behavior, such +that images generated with exactly the same prompt and settings will +be slightly different from each other. See above for more information. + +* **6 GB of VRAM** + +This is a border case. Using the SD 1.5 series you should be able to +generate images up to 640x640 with the NSFW checker enabled, and up to +1024x1024 with it disabled and `xformers` activated. + +If you run into persistent memory issues there are a series of +environment variables that you can set before launching InvokeAI that +alter how the PyTorch machine learning library manages memory. See +https://pytorch.org/docs/stable/notes/cuda.html#memory-management for +a list of these tweaks. + +* **12 GB of VRAM** + +This should be sufficient to generate larger images up to about +1280x1280. If you wish to push further, consider activating +`xformers`. + ### Other Problems If you run into problems during or after installation, the InvokeAI team is From 4c93b514bbd0d2be2cdf5b5517f311677cd64f40 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 24 Feb 2023 10:04:41 -0500 Subject: [PATCH 8/8] bump version to final 2.3.1 --- ldm/invoke/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/_version.py b/ldm/invoke/_version.py index 92cc704e25..259b4f09e5 100644 --- a/ldm/invoke/_version.py +++ b/ldm/invoke/_version.py @@ -1 +1 @@ -__version__='2.3.1-rc4' +__version__='2.3.1'