From ef68a419f13708a0e94955f63e51026df6ccc2b5 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Oct 2022 01:02:45 -0400 Subject: [PATCH 1/9] preload_models.py script downloads the weight files - user can select which weight files to download using huggingface cache - user must log in to huggingface, generate an access token, and accept license terms the very first time this is run. After that, everything works automatically. - added placeholder for docs for installing models - also got rid of unused config files. hopefully they weren't needed for textual inversion, but I don't think so. --- .gitignore | 8 +- .../autoencoder/autoencoder_kl_16x16x16.yaml | 54 ---- .../autoencoder/autoencoder_kl_32x32x4.yaml | 53 ---- .../autoencoder/autoencoder_kl_64x64x3.yaml | 54 ---- .../autoencoder/autoencoder_kl_8x8x64.yaml | 53 ---- .../latent-diffusion/celebahq-ldm-vq-4.yaml | 86 ------ configs/latent-diffusion/cin-ldm-vq-f8.yaml | 98 ------- configs/latent-diffusion/cin256-v2.yaml | 68 ----- configs/latent-diffusion/ffhq-ldm-vq-4.yaml | 85 ------ .../lsun_bedrooms-ldm-vq-4.yaml | 85 ------ .../lsun_churches-ldm-kl-8.yaml | 91 ------- .../latent-diffusion/txt2img-1p4B-eval.yaml | 71 ----- configs/models.yaml | 29 +- .../768x768.yaml | 68 ----- docs/features/INSTALLING_MODELS.md | 9 + ldm/invoke/model_cache.py | 2 +- models/first_stage_models/kl-f16/config.yaml | 44 --- models/first_stage_models/kl-f32/config.yaml | 46 ---- models/first_stage_models/kl-f4/config.yaml | 41 --- models/first_stage_models/kl-f8/config.yaml | 42 --- models/first_stage_models/vq-f16/config.yaml | 49 ---- .../vq-f4-noattn/config.yaml | 46 ---- models/first_stage_models/vq-f4/config.yaml | 45 ---- .../first_stage_models/vq-f8-n256/config.yaml | 48 ---- models/first_stage_models/vq-f8/config.yaml | 48 ---- models/ldm/bsr_sr/config.yaml | 80 ------ models/ldm/celeba256/config.yaml | 70 ----- models/ldm/cin256/config.yaml | 80 ------ models/ldm/ffhq256/config.yaml | 70 ----- models/ldm/inpainting_big/config.yaml | 67 ----- .../ldm/layout2img-openimages256/config.yaml | 81 ------ models/ldm/lsun_beds256/config.yaml | 70 ----- models/ldm/lsun_churches256/config.yaml | 92 ------- models/ldm/semantic_synthesis256/config.yaml | 59 ---- models/ldm/semantic_synthesis512/config.yaml | 78 ------ models/ldm/text2img256/config.yaml | 77 ------ scripts/preload_models.py | 254 +++++++++++++++++- 37 files changed, 286 insertions(+), 2115 deletions(-) delete mode 100644 configs/autoencoder/autoencoder_kl_16x16x16.yaml delete mode 100644 configs/autoencoder/autoencoder_kl_32x32x4.yaml delete mode 100644 configs/autoencoder/autoencoder_kl_64x64x3.yaml delete mode 100644 configs/autoencoder/autoencoder_kl_8x8x64.yaml delete mode 100644 configs/latent-diffusion/celebahq-ldm-vq-4.yaml delete mode 100644 configs/latent-diffusion/cin-ldm-vq-f8.yaml delete mode 100644 configs/latent-diffusion/cin256-v2.yaml delete mode 100644 configs/latent-diffusion/ffhq-ldm-vq-4.yaml delete mode 100644 configs/latent-diffusion/lsun_bedrooms-ldm-vq-4.yaml delete mode 100644 configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml delete mode 100644 configs/latent-diffusion/txt2img-1p4B-eval.yaml delete mode 100644 configs/retrieval-augmented-diffusion/768x768.yaml create mode 100644 docs/features/INSTALLING_MODELS.md delete mode 100644 models/first_stage_models/kl-f16/config.yaml delete mode 100644 models/first_stage_models/kl-f32/config.yaml delete mode 100644 models/first_stage_models/kl-f4/config.yaml delete mode 100644 models/first_stage_models/kl-f8/config.yaml delete mode 100644 models/first_stage_models/vq-f16/config.yaml delete mode 100644 models/first_stage_models/vq-f4-noattn/config.yaml delete mode 100644 models/first_stage_models/vq-f4/config.yaml delete mode 100644 models/first_stage_models/vq-f8-n256/config.yaml delete mode 100644 models/first_stage_models/vq-f8/config.yaml delete mode 100644 models/ldm/bsr_sr/config.yaml delete mode 100644 models/ldm/celeba256/config.yaml delete mode 100644 models/ldm/cin256/config.yaml delete mode 100644 models/ldm/ffhq256/config.yaml delete mode 100644 models/ldm/inpainting_big/config.yaml delete mode 100644 models/ldm/layout2img-openimages256/config.yaml delete mode 100644 models/ldm/lsun_beds256/config.yaml delete mode 100644 models/ldm/lsun_churches256/config.yaml delete mode 100644 models/ldm/semantic_synthesis256/config.yaml delete mode 100644 models/ldm/semantic_synthesis512/config.yaml delete mode 100644 models/ldm/text2img256/config.yaml diff --git a/.gitignore b/.gitignore index ecef2713bc..33f0de4df2 100644 --- a/.gitignore +++ b/.gitignore @@ -199,7 +199,13 @@ checkpoints .scratch/ .vscode/ gfpgan/ -models/ldm/stable-diffusion-v1/model.sha256 +models/ldm/stable-diffusion-v1/*.sha256 # GFPGAN model files gfpgan/ + +# config file (will be created by installer) +configs/models.yaml + +# weights (will be created by installer) +models/ldm/stable-diffusion-v1/*.ckpt \ No newline at end of file diff --git a/configs/autoencoder/autoencoder_kl_16x16x16.yaml b/configs/autoencoder/autoencoder_kl_16x16x16.yaml deleted file mode 100644 index 5f1d10ec75..0000000000 --- a/configs/autoencoder/autoencoder_kl_16x16x16.yaml +++ /dev/null @@ -1,54 +0,0 @@ -model: - base_learning_rate: 4.5e-6 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: "val/rec_loss" - embed_dim: 16 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 0.000001 - disc_weight: 0.5 - - ddconfig: - double_z: True - z_channels: 16 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1 - num_res_blocks: 2 - attn_resolutions: [16] - dropout: 0.0 - - -data: - target: main.DataModuleFromConfig - params: - batch_size: 12 - wrap: True - train: - target: ldm.data.imagenet.ImageNetSRTrain - params: - size: 256 - degradation: pil_nearest - validation: - target: ldm.data.imagenet.ImageNetSRValidation - params: - size: 256 - degradation: pil_nearest - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 1000 - max_images: 8 - increase_log_steps: True - - trainer: - benchmark: True - accumulate_grad_batches: 2 diff --git a/configs/autoencoder/autoencoder_kl_32x32x4.yaml b/configs/autoencoder/autoencoder_kl_32x32x4.yaml deleted file mode 100644 index ab8b36fe6e..0000000000 --- a/configs/autoencoder/autoencoder_kl_32x32x4.yaml +++ /dev/null @@ -1,53 +0,0 @@ -model: - base_learning_rate: 4.5e-6 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: "val/rec_loss" - embed_dim: 4 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 0.000001 - disc_weight: 0.5 - - ddconfig: - double_z: True - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1 - num_res_blocks: 2 - attn_resolutions: [ ] - dropout: 0.0 - -data: - target: main.DataModuleFromConfig - params: - batch_size: 12 - wrap: True - train: - target: ldm.data.imagenet.ImageNetSRTrain - params: - size: 256 - degradation: pil_nearest - validation: - target: ldm.data.imagenet.ImageNetSRValidation - params: - size: 256 - degradation: pil_nearest - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 1000 - max_images: 8 - increase_log_steps: True - - trainer: - benchmark: True - accumulate_grad_batches: 2 diff --git a/configs/autoencoder/autoencoder_kl_64x64x3.yaml b/configs/autoencoder/autoencoder_kl_64x64x3.yaml deleted file mode 100644 index 5e3db5c4e2..0000000000 --- a/configs/autoencoder/autoencoder_kl_64x64x3.yaml +++ /dev/null @@ -1,54 +0,0 @@ -model: - base_learning_rate: 4.5e-6 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: "val/rec_loss" - embed_dim: 3 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 0.000001 - disc_weight: 0.5 - - ddconfig: - double_z: True - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1 - num_res_blocks: 2 - attn_resolutions: [ ] - dropout: 0.0 - - -data: - target: main.DataModuleFromConfig - params: - batch_size: 12 - wrap: True - train: - target: ldm.data.imagenet.ImageNetSRTrain - params: - size: 256 - degradation: pil_nearest - validation: - target: ldm.data.imagenet.ImageNetSRValidation - params: - size: 256 - degradation: pil_nearest - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 1000 - max_images: 8 - increase_log_steps: True - - trainer: - benchmark: True - accumulate_grad_batches: 2 diff --git a/configs/autoencoder/autoencoder_kl_8x8x64.yaml b/configs/autoencoder/autoencoder_kl_8x8x64.yaml deleted file mode 100644 index 5ccd09d38e..0000000000 --- a/configs/autoencoder/autoencoder_kl_8x8x64.yaml +++ /dev/null @@ -1,53 +0,0 @@ -model: - base_learning_rate: 4.5e-6 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: "val/rec_loss" - embed_dim: 64 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 0.000001 - disc_weight: 0.5 - - ddconfig: - double_z: True - z_channels: 64 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: [ 1,1,2,2,4,4] # num_down = len(ch_mult)-1 - num_res_blocks: 2 - attn_resolutions: [16,8] - dropout: 0.0 - -data: - target: main.DataModuleFromConfig - params: - batch_size: 12 - wrap: True - train: - target: ldm.data.imagenet.ImageNetSRTrain - params: - size: 256 - degradation: pil_nearest - validation: - target: ldm.data.imagenet.ImageNetSRValidation - params: - size: 256 - degradation: pil_nearest - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 1000 - max_images: 8 - increase_log_steps: True - - trainer: - benchmark: True - accumulate_grad_batches: 2 diff --git a/configs/latent-diffusion/celebahq-ldm-vq-4.yaml b/configs/latent-diffusion/celebahq-ldm-vq-4.yaml deleted file mode 100644 index 89b3df4fe1..0000000000 --- a/configs/latent-diffusion/celebahq-ldm-vq-4.yaml +++ /dev/null @@ -1,86 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - image_size: 64 - channels: 3 - monitor: val/loss_simple_ema - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 224 - attention_resolutions: - # note: this isn\t actually the resolution but - # the downsampling factor, i.e. this corresnponds to - # attention on spatial resolution 8,16,32, as the - # spatial reolution of the latents is 64 for f4 - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ckpt_path: models/first_stage_models/vq-f4/model.ckpt - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: __is_unconditional__ -data: - target: main.DataModuleFromConfig - params: - batch_size: 48 - num_workers: 5 - wrap: false - train: - target: taming.data.faceshq.CelebAHQTrain - params: - size: 256 - validation: - target: taming.data.faceshq.CelebAHQValidation - params: - size: 256 - - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 5000 - max_images: 8 - increase_log_steps: False - - trainer: - benchmark: True \ No newline at end of file diff --git a/configs/latent-diffusion/cin-ldm-vq-f8.yaml b/configs/latent-diffusion/cin-ldm-vq-f8.yaml deleted file mode 100644 index b8cd9e2ef5..0000000000 --- a/configs/latent-diffusion/cin-ldm-vq-f8.yaml +++ /dev/null @@ -1,98 +0,0 @@ -model: - base_learning_rate: 1.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: class_label - image_size: 32 - channels: 4 - cond_stage_trainable: true - conditioning_key: crossattn - monitor: val/loss_simple_ema - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 - in_channels: 4 - out_channels: 4 - model_channels: 256 - attention_resolutions: - #note: this isn\t actually the resolution but - # the downsampling factor, i.e. this corresnponds to - # attention on spatial resolution 8,16,32, as the - # spatial reolution of the latents is 32 for f8 - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - num_head_channels: 32 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 512 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 4 - n_embed: 16384 - ckpt_path: configs/first_stage_models/vq-f8/model.yaml - ddconfig: - double_z: false - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 32 - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: ldm.modules.encoders.modules.ClassEmbedder - params: - embed_dim: 512 - key: class_label -data: - target: main.DataModuleFromConfig - params: - batch_size: 64 - num_workers: 12 - wrap: false - train: - target: ldm.data.imagenet.ImageNetTrain - params: - config: - size: 256 - validation: - target: ldm.data.imagenet.ImageNetValidation - params: - config: - size: 256 - - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 5000 - max_images: 8 - increase_log_steps: False - - trainer: - benchmark: True \ No newline at end of file diff --git a/configs/latent-diffusion/cin256-v2.yaml b/configs/latent-diffusion/cin256-v2.yaml deleted file mode 100644 index b7c1aa240c..0000000000 --- a/configs/latent-diffusion/cin256-v2.yaml +++ /dev/null @@ -1,68 +0,0 @@ -model: - base_learning_rate: 0.0001 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: class_label - image_size: 64 - channels: 3 - cond_stage_trainable: true - conditioning_key: crossattn - monitor: val/loss - use_ema: False - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 192 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 5 - num_heads: 1 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 512 - - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.ClassEmbedder - params: - n_classes: 1001 - embed_dim: 512 - key: class_label diff --git a/configs/latent-diffusion/ffhq-ldm-vq-4.yaml b/configs/latent-diffusion/ffhq-ldm-vq-4.yaml deleted file mode 100644 index 1899e30f77..0000000000 --- a/configs/latent-diffusion/ffhq-ldm-vq-4.yaml +++ /dev/null @@ -1,85 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - image_size: 64 - channels: 3 - monitor: val/loss_simple_ema - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 224 - attention_resolutions: - # note: this isn\t actually the resolution but - # the downsampling factor, i.e. this corresnponds to - # attention on spatial resolution 8,16,32, as the - # spatial reolution of the latents is 64 for f4 - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ckpt_path: configs/first_stage_models/vq-f4/model.yaml - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: __is_unconditional__ -data: - target: main.DataModuleFromConfig - params: - batch_size: 42 - num_workers: 5 - wrap: false - train: - target: taming.data.faceshq.FFHQTrain - params: - size: 256 - validation: - target: taming.data.faceshq.FFHQValidation - params: - size: 256 - - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 5000 - max_images: 8 - increase_log_steps: False - - trainer: - benchmark: True \ No newline at end of file diff --git a/configs/latent-diffusion/lsun_bedrooms-ldm-vq-4.yaml b/configs/latent-diffusion/lsun_bedrooms-ldm-vq-4.yaml deleted file mode 100644 index c4ca66c16c..0000000000 --- a/configs/latent-diffusion/lsun_bedrooms-ldm-vq-4.yaml +++ /dev/null @@ -1,85 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - image_size: 64 - channels: 3 - monitor: val/loss_simple_ema - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 224 - attention_resolutions: - # note: this isn\t actually the resolution but - # the downsampling factor, i.e. this corresnponds to - # attention on spatial resolution 8,16,32, as the - # spatial reolution of the latents is 64 for f4 - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - ckpt_path: configs/first_stage_models/vq-f4/model.yaml - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: __is_unconditional__ -data: - target: main.DataModuleFromConfig - params: - batch_size: 48 - num_workers: 5 - wrap: false - train: - target: ldm.data.lsun.LSUNBedroomsTrain - params: - size: 256 - validation: - target: ldm.data.lsun.LSUNBedroomsValidation - params: - size: 256 - - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 5000 - max_images: 8 - increase_log_steps: False - - trainer: - benchmark: True \ No newline at end of file diff --git a/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml b/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml deleted file mode 100644 index 18dc8c2d9c..0000000000 --- a/configs/latent-diffusion/lsun_churches-ldm-kl-8.yaml +++ /dev/null @@ -1,91 +0,0 @@ -model: - base_learning_rate: 5.0e-5 # set to target_lr by starting main.py with '--scale_lr False' - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0155 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - loss_type: l1 - first_stage_key: "image" - cond_stage_key: "image" - image_size: 32 - channels: 4 - cond_stage_trainable: False - concat_mode: False - scale_by_std: True - monitor: 'val/loss_simple_ema' - - scheduler_config: # 10000 warmup steps - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: [10000] - cycle_lengths: [10000000000000] - f_start: [1.e-6] - f_max: [1.] - f_min: [ 1.] - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 - in_channels: 4 - out_channels: 4 - model_channels: 192 - attention_resolutions: [ 1, 2, 4, 8 ] # 32, 16, 8, 4 - num_res_blocks: 2 - channel_mult: [ 1,2,2,4,4 ] # 32, 16, 8, 4, 2 - num_heads: 8 - use_scale_shift_norm: True - resblock_updown: True - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: "val/rec_loss" - ckpt_path: "models/first_stage_models/kl-f8/model.ckpt" - ddconfig: - double_z: True - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1 - num_res_blocks: 2 - attn_resolutions: [ ] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: "__is_unconditional__" - -data: - target: main.DataModuleFromConfig - params: - batch_size: 96 - num_workers: 5 - wrap: False - train: - target: ldm.data.lsun.LSUNChurchesTrain - params: - size: 256 - validation: - target: ldm.data.lsun.LSUNChurchesValidation - params: - size: 256 - -lightning: - callbacks: - image_logger: - target: main.ImageLogger - params: - batch_frequency: 5000 - max_images: 8 - increase_log_steps: False - - - trainer: - benchmark: True \ No newline at end of file diff --git a/configs/latent-diffusion/txt2img-1p4B-eval.yaml b/configs/latent-diffusion/txt2img-1p4B-eval.yaml deleted file mode 100644 index 8e331cbfdf..0000000000 --- a/configs/latent-diffusion/txt2img-1p4B-eval.yaml +++ /dev/null @@ -1,71 +0,0 @@ -model: - base_learning_rate: 5.0e-05 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.00085 - linear_end: 0.012 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: caption - image_size: 32 - channels: 4 - cond_stage_trainable: true - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_factor: 0.18215 - use_ema: False - - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 - in_channels: 4 - out_channels: 4 - model_channels: 320 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - - 4 - num_heads: 8 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 1280 - use_checkpoint: true - legacy: False - - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: - target: ldm.modules.encoders.modules.BERTEmbedder - params: - n_embed: 1280 - n_layer: 32 diff --git a/configs/models.yaml b/configs/models.yaml index 162da38da2..cb4191c503 100644 --- a/configs/models.yaml +++ b/configs/models.yaml @@ -1,5 +1,5 @@ # This file describes the alternative machine learning models -# available to the dream script. +# available to InvokeAI script. # # To add a new model, follow the examples below. Each # model requires a model config file, a weights file, @@ -8,22 +8,29 @@ stable-diffusion-1.4: config: configs/stable-diffusion/v1-inference.yaml weights: models/ldm/stable-diffusion-v1/model.ckpt -# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt + vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt description: Stable Diffusion inference model version 1.4 width: 512 height: 512 +stable-diffusion-1.5: + description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) + weights: ./models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt + config: ./configs/stable-diffusion/v1-inference.yaml + width: 512 + height: 512 + vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt default: true inpainting-1.5: - description: runwayML tuned inpainting model v1.5 - weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt - config: configs/stable-diffusion/v1-inpainting-inference.yaml -# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt + description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB) + weights: ./models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt + config: ./configs/stable-diffusion/v1-inpainting-inference.yaml width: 512 height: 512 -stable-diffusion-1.5: - config: configs/stable-diffusion/v1-inference.yaml - weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt -# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt - description: Stable Diffusion inference model version 1.5 + vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt +waifu-diffusion-1.3: + description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27) + weights: ./models/ldm/stable-diffusion-v1/model-epoch09-float32.ckpt + config: ./configs/stable-diffusion/v1-inference.yaml width: 512 height: 512 + vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt diff --git a/configs/retrieval-augmented-diffusion/768x768.yaml b/configs/retrieval-augmented-diffusion/768x768.yaml deleted file mode 100644 index b51b1d8373..0000000000 --- a/configs/retrieval-augmented-diffusion/768x768.yaml +++ /dev/null @@ -1,68 +0,0 @@ -model: - base_learning_rate: 0.0001 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.015 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: jpg - cond_stage_key: nix - image_size: 48 - channels: 16 - cond_stage_trainable: false - conditioning_key: crossattn - monitor: val/loss_simple_ema - scale_by_std: false - scale_factor: 0.22765929 - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 48 - in_channels: 16 - out_channels: 16 - model_channels: 448 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - use_scale_shift_norm: false - resblock_updown: false - num_head_channels: 32 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 768 - use_checkpoint: true - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: val/rec_loss - embed_dim: 16 - ddconfig: - double_z: true - z_channels: 16 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 16 - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: torch.nn.Identity \ No newline at end of file diff --git a/docs/features/INSTALLING_MODELS.md b/docs/features/INSTALLING_MODELS.md new file mode 100644 index 0000000000..aa04cc14b7 --- /dev/null +++ b/docs/features/INSTALLING_MODELS.md @@ -0,0 +1,9 @@ +--- +title: Installing Models +--- + +# :octicons-paintbrush-16: Installing Models + +## TO COME + + diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index f972a9eb16..a5aa343303 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -281,7 +281,7 @@ class ModelCache(object): Returns the preamble for the config file. ''' return '''# This file describes the alternative machine learning models -# available to the dream script. +# available to InvokeAI script. # # To add a new model, follow the examples below. Each # model requires a model config file, a weights file, diff --git a/models/first_stage_models/kl-f16/config.yaml b/models/first_stage_models/kl-f16/config.yaml deleted file mode 100644 index 661921cf75..0000000000 --- a/models/first_stage_models/kl-f16/config.yaml +++ /dev/null @@ -1,44 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: val/rec_loss - embed_dim: 16 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 1.0e-06 - disc_weight: 0.5 - ddconfig: - double_z: true - z_channels: 16 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 16 - dropout: 0.0 -data: - target: main.DataModuleFromConfig - params: - batch_size: 6 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/first_stage_models/kl-f32/config.yaml b/models/first_stage_models/kl-f32/config.yaml deleted file mode 100644 index 7b642b136a..0000000000 --- a/models/first_stage_models/kl-f32/config.yaml +++ /dev/null @@ -1,46 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: val/rec_loss - embed_dim: 64 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 1.0e-06 - disc_weight: 0.5 - ddconfig: - double_z: true - z_channels: 64 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 1 - - 2 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 16 - - 8 - dropout: 0.0 -data: - target: main.DataModuleFromConfig - params: - batch_size: 6 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/first_stage_models/kl-f4/config.yaml b/models/first_stage_models/kl-f4/config.yaml deleted file mode 100644 index 85cfb3e94e..0000000000 --- a/models/first_stage_models/kl-f4/config.yaml +++ /dev/null @@ -1,41 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: val/rec_loss - embed_dim: 3 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 1.0e-06 - disc_weight: 0.5 - ddconfig: - double_z: true - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 -data: - target: main.DataModuleFromConfig - params: - batch_size: 10 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/first_stage_models/kl-f8/config.yaml b/models/first_stage_models/kl-f8/config.yaml deleted file mode 100644 index 921aa42533..0000000000 --- a/models/first_stage_models/kl-f8/config.yaml +++ /dev/null @@ -1,42 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.AutoencoderKL - params: - monitor: val/rec_loss - embed_dim: 4 - lossconfig: - target: ldm.modules.losses.LPIPSWithDiscriminator - params: - disc_start: 50001 - kl_weight: 1.0e-06 - disc_weight: 0.5 - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 -data: - target: main.DataModuleFromConfig - params: - batch_size: 4 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/first_stage_models/vq-f16/config.yaml b/models/first_stage_models/vq-f16/config.yaml deleted file mode 100644 index 91c7454906..0000000000 --- a/models/first_stage_models/vq-f16/config.yaml +++ /dev/null @@ -1,49 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.VQModel - params: - embed_dim: 8 - n_embed: 16384 - ddconfig: - double_z: false - z_channels: 8 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 16 - dropout: 0.0 - lossconfig: - target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator - params: - disc_conditional: false - disc_in_channels: 3 - disc_start: 250001 - disc_weight: 0.75 - disc_num_layers: 2 - codebook_weight: 1.0 - -data: - target: main.DataModuleFromConfig - params: - batch_size: 14 - num_workers: 20 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/first_stage_models/vq-f4-noattn/config.yaml b/models/first_stage_models/vq-f4-noattn/config.yaml deleted file mode 100644 index f8e499fa2a..0000000000 --- a/models/first_stage_models/vq-f4-noattn/config.yaml +++ /dev/null @@ -1,46 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.VQModel - params: - embed_dim: 3 - n_embed: 8192 - monitor: val/rec_loss - - ddconfig: - attn_type: none - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator - params: - disc_conditional: false - disc_in_channels: 3 - disc_start: 11 - disc_weight: 0.75 - codebook_weight: 1.0 - -data: - target: main.DataModuleFromConfig - params: - batch_size: 8 - num_workers: 12 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - crop_size: 256 diff --git a/models/first_stage_models/vq-f4/config.yaml b/models/first_stage_models/vq-f4/config.yaml deleted file mode 100644 index 7d8cef3252..0000000000 --- a/models/first_stage_models/vq-f4/config.yaml +++ /dev/null @@ -1,45 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.VQModel - params: - embed_dim: 3 - n_embed: 8192 - monitor: val/rec_loss - - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator - params: - disc_conditional: false - disc_in_channels: 3 - disc_start: 0 - disc_weight: 0.75 - codebook_weight: 1.0 - -data: - target: main.DataModuleFromConfig - params: - batch_size: 8 - num_workers: 16 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - crop_size: 256 diff --git a/models/first_stage_models/vq-f8-n256/config.yaml b/models/first_stage_models/vq-f8-n256/config.yaml deleted file mode 100644 index 8519e13d61..0000000000 --- a/models/first_stage_models/vq-f8-n256/config.yaml +++ /dev/null @@ -1,48 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.VQModel - params: - embed_dim: 4 - n_embed: 256 - monitor: val/rec_loss - ddconfig: - double_z: false - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 32 - dropout: 0.0 - lossconfig: - target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator - params: - disc_conditional: false - disc_in_channels: 3 - disc_start: 250001 - disc_weight: 0.75 - codebook_weight: 1.0 - -data: - target: main.DataModuleFromConfig - params: - batch_size: 10 - num_workers: 20 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/first_stage_models/vq-f8/config.yaml b/models/first_stage_models/vq-f8/config.yaml deleted file mode 100644 index efd6801ca9..0000000000 --- a/models/first_stage_models/vq-f8/config.yaml +++ /dev/null @@ -1,48 +0,0 @@ -model: - base_learning_rate: 4.5e-06 - target: ldm.models.autoencoder.VQModel - params: - embed_dim: 4 - n_embed: 16384 - monitor: val/rec_loss - ddconfig: - double_z: false - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 32 - dropout: 0.0 - lossconfig: - target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator - params: - disc_conditional: false - disc_in_channels: 3 - disc_num_layers: 2 - disc_start: 1 - disc_weight: 0.6 - codebook_weight: 1.0 -data: - target: main.DataModuleFromConfig - params: - batch_size: 10 - num_workers: 20 - wrap: true - train: - target: ldm.data.openimages.FullOpenImagesTrain - params: - size: 384 - crop_size: 256 - validation: - target: ldm.data.openimages.FullOpenImagesValidation - params: - size: 384 - crop_size: 256 diff --git a/models/ldm/bsr_sr/config.yaml b/models/ldm/bsr_sr/config.yaml deleted file mode 100644 index 861692a8d1..0000000000 --- a/models/ldm/bsr_sr/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -model: - base_learning_rate: 1.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0155 - log_every_t: 100 - timesteps: 1000 - loss_type: l2 - first_stage_key: image - cond_stage_key: LR_image - image_size: 64 - channels: 3 - concat_mode: true - cond_stage_trainable: false - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 6 - out_channels: 3 - model_channels: 160 - attention_resolutions: - - 16 - - 8 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 2 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - monitor: val/rec_loss - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: torch.nn.Identity -data: - target: main.DataModuleFromConfig - params: - batch_size: 64 - wrap: false - num_workers: 12 - train: - target: ldm.data.openimages.SuperresOpenImagesAdvancedTrain - params: - size: 256 - degradation: bsrgan_light - downscale_f: 4 - min_crop_f: 0.5 - max_crop_f: 1.0 - random_crop: true - validation: - target: ldm.data.openimages.SuperresOpenImagesAdvancedValidation - params: - size: 256 - degradation: bsrgan_light - downscale_f: 4 - min_crop_f: 0.5 - max_crop_f: 1.0 - random_crop: true diff --git a/models/ldm/celeba256/config.yaml b/models/ldm/celeba256/config.yaml deleted file mode 100644 index a12f4e9d39..0000000000 --- a/models/ldm/celeba256/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: class_label - image_size: 64 - channels: 3 - cond_stage_trainable: false - concat_mode: false - monitor: val/loss - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 224 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: __is_unconditional__ -data: - target: main.DataModuleFromConfig - params: - batch_size: 48 - num_workers: 5 - wrap: false - train: - target: ldm.data.faceshq.CelebAHQTrain - params: - size: 256 - validation: - target: ldm.data.faceshq.CelebAHQValidation - params: - size: 256 diff --git a/models/ldm/cin256/config.yaml b/models/ldm/cin256/config.yaml deleted file mode 100644 index 9bc1b4566a..0000000000 --- a/models/ldm/cin256/config.yaml +++ /dev/null @@ -1,80 +0,0 @@ -model: - base_learning_rate: 1.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: class_label - image_size: 32 - channels: 4 - cond_stage_trainable: true - conditioning_key: crossattn - monitor: val/loss_simple_ema - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 - in_channels: 4 - out_channels: 4 - model_channels: 256 - attention_resolutions: - - 4 - - 2 - - 1 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 4 - num_head_channels: 32 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 512 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 4 - n_embed: 16384 - ddconfig: - double_z: false - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: - - 32 - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: ldm.modules.encoders.modules.ClassEmbedder - params: - embed_dim: 512 - key: class_label -data: - target: main.DataModuleFromConfig - params: - batch_size: 64 - num_workers: 12 - wrap: false - train: - target: ldm.data.imagenet.ImageNetTrain - params: - config: - size: 256 - validation: - target: ldm.data.imagenet.ImageNetValidation - params: - config: - size: 256 diff --git a/models/ldm/ffhq256/config.yaml b/models/ldm/ffhq256/config.yaml deleted file mode 100644 index 0ddfd1b93e..0000000000 --- a/models/ldm/ffhq256/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: class_label - image_size: 64 - channels: 3 - cond_stage_trainable: false - concat_mode: false - monitor: val/loss - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 224 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: __is_unconditional__ -data: - target: main.DataModuleFromConfig - params: - batch_size: 42 - num_workers: 5 - wrap: false - train: - target: ldm.data.faceshq.FFHQTrain - params: - size: 256 - validation: - target: ldm.data.faceshq.FFHQValidation - params: - size: 256 diff --git a/models/ldm/inpainting_big/config.yaml b/models/ldm/inpainting_big/config.yaml deleted file mode 100644 index da5fd5ea50..0000000000 --- a/models/ldm/inpainting_big/config.yaml +++ /dev/null @@ -1,67 +0,0 @@ -model: - base_learning_rate: 1.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0205 - log_every_t: 100 - timesteps: 1000 - loss_type: l1 - first_stage_key: image - cond_stage_key: masked_image - image_size: 64 - channels: 3 - concat_mode: true - monitor: val/loss - scheduler_config: - target: ldm.lr_scheduler.LambdaWarmUpCosineScheduler - params: - verbosity_interval: 0 - warm_up_steps: 1000 - max_decay_steps: 50000 - lr_start: 0.001 - lr_max: 0.1 - lr_min: 0.0001 - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 7 - out_channels: 3 - model_channels: 256 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_heads: 8 - resblock_updown: true - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - monitor: val/rec_loss - ddconfig: - attn_type: none - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: ldm.modules.losses.contperceptual.DummyLoss - cond_stage_config: __is_first_stage__ diff --git a/models/ldm/layout2img-openimages256/config.yaml b/models/ldm/layout2img-openimages256/config.yaml deleted file mode 100644 index 9e1dc15fe2..0000000000 --- a/models/ldm/layout2img-openimages256/config.yaml +++ /dev/null @@ -1,81 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0205 - log_every_t: 100 - timesteps: 1000 - loss_type: l1 - first_stage_key: image - cond_stage_key: coordinates_bbox - image_size: 64 - channels: 3 - conditioning_key: crossattn - cond_stage_trainable: true - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 128 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - use_spatial_transformer: true - transformer_depth: 3 - context_dim: 512 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - monitor: val/rec_loss - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: ldm.modules.encoders.modules.BERTEmbedder - params: - n_embed: 512 - n_layer: 16 - vocab_size: 8192 - max_seq_len: 92 - use_tokenizer: false - monitor: val/loss_simple_ema -data: - target: main.DataModuleFromConfig - params: - batch_size: 24 - wrap: false - num_workers: 10 - train: - target: ldm.data.openimages.OpenImagesBBoxTrain - params: - size: 256 - validation: - target: ldm.data.openimages.OpenImagesBBoxValidation - params: - size: 256 diff --git a/models/ldm/lsun_beds256/config.yaml b/models/ldm/lsun_beds256/config.yaml deleted file mode 100644 index 1a50c766a5..0000000000 --- a/models/ldm/lsun_beds256/config.yaml +++ /dev/null @@ -1,70 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: class_label - image_size: 64 - channels: 3 - cond_stage_trainable: false - concat_mode: false - monitor: val/loss - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 224 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 4 - num_head_channels: 32 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: __is_unconditional__ -data: - target: main.DataModuleFromConfig - params: - batch_size: 48 - num_workers: 5 - wrap: false - train: - target: ldm.data.lsun.LSUNBedroomsTrain - params: - size: 256 - validation: - target: ldm.data.lsun.LSUNBedroomsValidation - params: - size: 256 diff --git a/models/ldm/lsun_churches256/config.yaml b/models/ldm/lsun_churches256/config.yaml deleted file mode 100644 index 424d0914c9..0000000000 --- a/models/ldm/lsun_churches256/config.yaml +++ /dev/null @@ -1,92 +0,0 @@ -model: - base_learning_rate: 5.0e-05 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0155 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - loss_type: l1 - first_stage_key: image - cond_stage_key: image - image_size: 32 - channels: 4 - cond_stage_trainable: false - concat_mode: false - scale_by_std: true - monitor: val/loss_simple_ema - scheduler_config: - target: ldm.lr_scheduler.LambdaLinearScheduler - params: - warm_up_steps: - - 10000 - cycle_lengths: - - 10000000000000 - f_start: - - 1.0e-06 - f_max: - - 1.0 - f_min: - - 1.0 - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 32 - in_channels: 4 - out_channels: 4 - model_channels: 192 - attention_resolutions: - - 1 - - 2 - - 4 - - 8 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 2 - - 4 - - 4 - num_heads: 8 - use_scale_shift_norm: true - resblock_updown: true - first_stage_config: - target: ldm.models.autoencoder.AutoencoderKL - params: - embed_dim: 4 - monitor: val/rec_loss - ddconfig: - double_z: true - z_channels: 4 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - - cond_stage_config: '__is_unconditional__' - -data: - target: main.DataModuleFromConfig - params: - batch_size: 96 - num_workers: 5 - wrap: false - train: - target: ldm.data.lsun.LSUNChurchesTrain - params: - size: 256 - validation: - target: ldm.data.lsun.LSUNChurchesValidation - params: - size: 256 diff --git a/models/ldm/semantic_synthesis256/config.yaml b/models/ldm/semantic_synthesis256/config.yaml deleted file mode 100644 index 1a721cfffa..0000000000 --- a/models/ldm/semantic_synthesis256/config.yaml +++ /dev/null @@ -1,59 +0,0 @@ -model: - base_learning_rate: 1.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0205 - log_every_t: 100 - timesteps: 1000 - loss_type: l1 - first_stage_key: image - cond_stage_key: segmentation - image_size: 64 - channels: 3 - concat_mode: true - cond_stage_trainable: true - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 6 - out_channels: 3 - model_channels: 128 - attention_resolutions: - - 32 - - 16 - - 8 - num_res_blocks: 2 - channel_mult: - - 1 - - 4 - - 8 - num_heads: 8 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: ldm.modules.encoders.modules.SpatialRescaler - params: - n_stages: 2 - in_channels: 182 - out_channels: 3 diff --git a/models/ldm/semantic_synthesis512/config.yaml b/models/ldm/semantic_synthesis512/config.yaml deleted file mode 100644 index 8faded2eec..0000000000 --- a/models/ldm/semantic_synthesis512/config.yaml +++ /dev/null @@ -1,78 +0,0 @@ -model: - base_learning_rate: 1.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0205 - log_every_t: 100 - timesteps: 1000 - loss_type: l1 - first_stage_key: image - cond_stage_key: segmentation - image_size: 128 - channels: 3 - concat_mode: true - cond_stage_trainable: true - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 128 - in_channels: 6 - out_channels: 3 - model_channels: 128 - attention_resolutions: - - 32 - - 16 - - 8 - num_res_blocks: 2 - channel_mult: - - 1 - - 4 - - 8 - num_heads: 8 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - monitor: val/rec_loss - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: ldm.modules.encoders.modules.SpatialRescaler - params: - n_stages: 2 - in_channels: 182 - out_channels: 3 -data: - target: main.DataModuleFromConfig - params: - batch_size: 8 - wrap: false - num_workers: 10 - train: - target: ldm.data.landscapes.RFWTrain - params: - size: 768 - crop_size: 512 - segmentation_to_float32: true - validation: - target: ldm.data.landscapes.RFWValidation - params: - size: 768 - crop_size: 512 - segmentation_to_float32: true diff --git a/models/ldm/text2img256/config.yaml b/models/ldm/text2img256/config.yaml deleted file mode 100644 index 3f54a01515..0000000000 --- a/models/ldm/text2img256/config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -model: - base_learning_rate: 2.0e-06 - target: ldm.models.diffusion.ddpm.LatentDiffusion - params: - linear_start: 0.0015 - linear_end: 0.0195 - num_timesteps_cond: 1 - log_every_t: 200 - timesteps: 1000 - first_stage_key: image - cond_stage_key: caption - image_size: 64 - channels: 3 - cond_stage_trainable: true - conditioning_key: crossattn - monitor: val/loss_simple_ema - unet_config: - target: ldm.modules.diffusionmodules.openaimodel.UNetModel - params: - image_size: 64 - in_channels: 3 - out_channels: 3 - model_channels: 192 - attention_resolutions: - - 8 - - 4 - - 2 - num_res_blocks: 2 - channel_mult: - - 1 - - 2 - - 3 - - 5 - num_head_channels: 32 - use_spatial_transformer: true - transformer_depth: 1 - context_dim: 640 - first_stage_config: - target: ldm.models.autoencoder.VQModelInterface - params: - embed_dim: 3 - n_embed: 8192 - ddconfig: - double_z: false - z_channels: 3 - resolution: 256 - in_channels: 3 - out_ch: 3 - ch: 128 - ch_mult: - - 1 - - 2 - - 4 - num_res_blocks: 2 - attn_resolutions: [] - dropout: 0.0 - lossconfig: - target: torch.nn.Identity - cond_stage_config: - target: ldm.modules.encoders.modules.BERTEmbedder - params: - n_embed: 640 - n_layer: 32 -data: - target: main.DataModuleFromConfig - params: - batch_size: 28 - num_workers: 10 - wrap: false - train: - target: ldm.data.previews.pytorch_dataset.PreviewsTrain - params: - size: 256 - validation: - target: ldm.data.previews.pytorch_dataset.PreviewsValidation - params: - size: 256 diff --git a/scripts/preload_models.py b/scripts/preload_models.py index bf0a5ffb99..e5a4850a8b 100644 --- a/scripts/preload_models.py +++ b/scripts/preload_models.py @@ -3,9 +3,11 @@ # Before running stable-diffusion on an internet-isolated machine, # run this script from one with internet connectivity. The # two machines must share a common .cache directory. -from transformers import CLIPTokenizer, CLIPTextModel +# +# Coauthor: Kevin Turner http://github.com/keturn +# +print('Loading Python libraries...\n') import clip -from transformers import BertTokenizerFast, AutoFeatureExtractor import sys import transformers import os @@ -14,9 +16,247 @@ import torch import urllib.request import zipfile import traceback +import getpass +from omegaconf import OmegaConf +from pathlib import Path +from transformers import CLIPTokenizer, CLIPTextModel +from transformers import BertTokenizerFast, AutoFeatureExtractor +from huggingface_hub import hf_hub_download, HfFolder, hf_hub_url transformers.logging.set_verbosity_error() +#--------------------------globals-- +Model_dir = './models/ldm/stable-diffusion-v1/' +Config_file = './configs/models.yaml' +SD_Configs = './configs/stable-diffusion' +Datasets = { + 'stable-diffusion-1.5': { + 'description': 'The newest Stable Diffusion version 1.5 weight file (4.27 GB)', + 'repo_id': 'runwayml/stable-diffusion-v1-5', + 'config': 'v1-inference.yaml', + 'file': 'v1-5-pruned-emaonly.ckpt', + 'recommended': True, + 'width': 512, + 'height': 512, + }, + 'inpainting-1.5': { + 'description': 'RunwayML SD 1.5 model optimized for inpainting (4.27 GB)', + 'repo_id': 'runwayml/stable-diffusion-inpainting', + 'config': 'v1-inpainting-inference.yaml', + 'file': 'sd-v1-5-inpainting.ckpt', + 'recommended': True, + 'width': 512, + 'height': 512, + }, + 'stable-diffusion-1.4': { + 'description': 'The original Stable Diffusion version 1.4 weight file (4.27 GB)', + 'repo_id': 'CompVis/stable-diffusion-v-1-4-original', + 'config': 'v1-inference.yaml', + 'file': 'sd-v1-4.ckpt', + 'recommended': False, + 'width': 512, + 'height': 512, + }, + 'waifu-diffusion-1.3': { + 'description': 'Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)', + 'repo_id': 'hakurei/waifu-diffusion-v1-3', + 'config': 'v1-inference.yaml', + 'file': 'model-epoch09-float32.ckpt', + 'recommended': False, + 'width': 512, + 'height': 512, + }, + 'ft-mse-improved-autoencoder-840000': { + 'description': 'StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB)', + 'repo_id': 'stabilityai/sd-vae-ft-mse-original', + 'config': 'VAE', + 'file': 'vae-ft-mse-840000-ema-pruned.ckpt', + 'recommended': True, + 'width': 512, + 'height': 512, + }, +} +Config_preamble = '''# This file describes the alternative machine learning models +# available to InvokeAI script. +# +# To add a new model, follow the examples below. Each +# model requires a model config file, a weights file, +# and the width and height of the images it +# was trained on. +''' + +#--------------------------------------------- +def introduction(): + print( + '''Welcome to InvokeAI. This script will help download the Stable Diffusion weight files +and other large models that are needed for text to image generation. At any point you may interrupt +this program and resume later.\n''' + ) + +#--------------------------------------------- +def yes_or_no(prompt:str, default_yes=True): + default = "y" if default_yes else 'n' + response = input(f'{prompt} [{default}] ') or default + if default_yes: + return response[0] not in ('n','N') + else: + return response[0] in ('y','Y') + +#--------------------------------------------- +def user_wants_to_download_weights(): + return yes_or_no('Would you like to download the Stable Diffusion model weights now?') + +#--------------------------------------------- +def select_datasets(): + done = False + while not done: + print(''' +Choose the weight file(s) you wish to download. Before downloading you +will be given the option to view and change your selections. +''' + ) + datasets = dict() + + counter = 1 + dflt = None # the first model selected will be the default; TODO let user change + for ds in Datasets.keys(): + recommended = '(recommended)' if Datasets[ds]['recommended'] else '' + print(f'[{counter}] {ds}:\n {Datasets[ds]["description"]} {recommended}') + if yes_or_no(' Download?',default_yes=Datasets[ds]['recommended']): + datasets[ds]=counter + counter += 1 + + print('The following weight files will be downloaded:') + for ds in datasets: + dflt = '*' if dflt is None else '' + print(f' [{datasets[ds]}] {ds}{dflt}') + print("*default") + ok_to_download = yes_or_no('Ok to download?') + if not ok_to_download: + if yes_or_no('Change your selection?'): + pass + else: + done = True + else: + done = True + return datasets if ok_to_download else None + +#-------------------------------Authenticate against Hugging Face +def authenticate(): + print(''' +To download the Stable Diffusion weight files you need to read and accept the +CreativeML Responsible AI license. If you have not already done so, please +create an account at https://huggingface.co. Then login under your account and +read and accept the license available at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original. +''' + ) + input('Press when you are ready to continue:') + access_token = HfFolder.get_token() + if access_token is None: + print(''' +Thank you! Now you need to authenticate with your HuggingFace access token. +Go to https://huggingface.co/settings/tokens and create a token. Copy it to the +clipboard and paste it here: ''' + ) + access_token = getpass.getpass() + HfFolder.save_token(access_token) + return access_token + +#--------------------------------------------- +# look for legacy model.ckpt in models directory and offer to +# normalize its name +def migrate_models_ckpt(): + if not os.path.exists(os.path.join(Model_dir,'model.ckpt')): + return + new_name = Datasets['stable-diffusion-1.4']['file'] + print('You seem to have the Stable Diffusion v4.1 "model.ckpt" already installed.') + rename = yes_or_no(f'Ok to rename it to "{new_name}" for future reference?') + if rename: + print(f'model.ckpt => {new_name}') + os.rename(os.path.join(Model_dir,'model.ckpt'),os.path.join(Model_dir,new_name)) + +#--------------------------------------------- +def download_weight_datasets(models:dict, access_token:str): + migrate_models_ckpt() + successful = dict() + for mod in models.keys(): + repo_id = Datasets[mod]['repo_id'] + filename = Datasets[mod]['file'] + success = conditional_download( + repo_id=repo_id, + model_name=filename, + access_token=access_token + ) + if success: + successful[mod] = True + keys = ', '.join(successful.keys()) + print(f'Successfully installed {keys}') + return successful + +#--------------------------------------------- +def conditional_download(repo_id:str, model_name:str, access_token:str): + model_dest = os.path.join(Model_dir, model_name) + if os.path.exists(model_dest): + print(f' * {model_name}: exists') + return True + os.makedirs(os.path.dirname(model_dest), exist_ok=True) + + try: + print(f' * {model_name}: downloading or retrieving from cache...') + path = Path(hf_hub_download(repo_id, model_name, use_auth_token=access_token)) + path.resolve(strict=True).link_to(model_dest) + except Exception as e: + print(f'** Error downloading {model_name}: {str(e)} **') + return False + return True + +#--------------------------------------------- +def update_config_file(successfully_downloaded:dict): + try: + yaml = new_config_file_contents(successfully_downloaded) + tmpfile = os.path.join(os.path.dirname(Config_file),'new_config.tmp') + with open(tmpfile, 'w') as outfile: + outfile.write(Config_preamble) + outfile.write(yaml) + os.rename(tmpfile,Config_file) + except Exception as e: + print(f'**Error creating config file {Config_file}: {str(e)} **') + return + print(f'Successfully created new configuration file {Config_file}') + + +#--------------------------------------------- +def new_config_file_contents(successfully_downloaded:dict)->str: + conf = OmegaConf.load(Config_file) + + # find the VAE file, if there is one + vae = None + default_selected = False + + for model in successfully_downloaded: + if Datasets[model]['config'] == 'VAE': + vae = Datasets[model]['file'] + + for model in successfully_downloaded: + if Datasets[model]['config'] == 'VAE': # skip VAE entries + continue + stanza = conf[model] if model in conf else { } + + stanza['description'] = Datasets[model]['description'] + stanza['weights'] = os.path.join(Model_dir,Datasets[model]['file']) + stanza['config'] =os.path.join(SD_Configs, Datasets[model]['config']) + stanza['width'] = Datasets[model]['width'] + stanza['height'] = Datasets[model]['height'] + stanza.pop('default',None) # this will be set later + if vae: + stanza['vae'] = os.path.join(Model_dir,vae) + # BUG - the first stanza is always the default. User should select. + if not default_selected: + stanza['default'] = True + default_selected = True + conf[model] = stanza + return OmegaConf.to_yaml(conf) + #--------------------------------------------- # this will preload the Bert tokenizer fles def download_bert(): @@ -66,7 +306,6 @@ def download_gfpgan(): print(traceback.format_exc()) print('Loading models from GFPGAN') - import urllib.request for model in ( [ 'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth', @@ -152,6 +391,15 @@ def download_safety_checker(): #------------------------------------- if __name__ == '__main__': + introduction() + if user_wants_to_download_weights(): + models = select_datasets() + if models is None: + if yes_or_no('Quit?',default_yes=False): + sys.exit(0) + access_token = authenticate() + successfully_downloaded = download_weight_datasets(models, access_token) + update_config_file(successfully_downloaded) download_bert() download_kornia() download_clip() From 3caa95ced91972611a93febad9987d1af4066d06 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Oct 2022 09:18:48 -0400 Subject: [PATCH 2/9] add more step-by-step documentation and links --- configs/models.yaml | 8 ++-- scripts/preload_models.py | 95 +++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 27 deletions(-) diff --git a/configs/models.yaml b/configs/models.yaml index cb4191c503..aabe4b9ce2 100644 --- a/configs/models.yaml +++ b/configs/models.yaml @@ -6,10 +6,10 @@ # and the width and height of the images it # was trained on. stable-diffusion-1.4: - config: configs/stable-diffusion/v1-inference.yaml - weights: models/ldm/stable-diffusion-v1/model.ckpt - vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt - description: Stable Diffusion inference model version 1.4 + config: ./configs/stable-diffusion/v1-inference.yaml + weights: ./models/ldm/stable-diffusion-v1/sd-v1-4.ckpt + vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt + description: The original Stable Diffusion version 1.4 weight file (4.27 GB) width: 512 height: 512 stable-diffusion-1.5: diff --git a/scripts/preload_models.py b/scripts/preload_models.py index e5a4850a8b..02127f0af9 100644 --- a/scripts/preload_models.py +++ b/scripts/preload_models.py @@ -93,6 +93,22 @@ and other large models that are needed for text to image generation. At any poin this program and resume later.\n''' ) +#-------------------------------------------- +def postscript(): + print( + '''You're all set! You may now launch InvokeAI using one of these two commands: +Web version: + + python scripts/invoke.py --web (connect to http://localhost:9090) + +Command-line version: + + python scripts/invoke.py + +Have fun! +''' +) + #--------------------------------------------- def yes_or_no(prompt:str, default_yes=True): default = "y" if default_yes else 'n' @@ -144,19 +160,43 @@ will be given the option to view and change your selections. #-------------------------------Authenticate against Hugging Face def authenticate(): print(''' -To download the Stable Diffusion weight files you need to read and accept the -CreativeML Responsible AI license. If you have not already done so, please -create an account at https://huggingface.co. Then login under your account and -read and accept the license available at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original. +To download the Stable Diffusion weight files from the official Hugging Face +repository, you need to read and accept the CreativeML Responsible AI license. + +This involves a few easy steps. + +1. If you have not already done so, create an account on Hugging Face's web site + using the "Sign Up" button: + + https://huggingface.co/join + + You will need to verify your email address as part of the HuggingFace + registration process. + +2. Log into your account Hugging Face: + + https://huggingface.co/login + +3. Accept the license terms located here: + + https://huggingface.co/CompVis/stable-diffusion-v-1-4-original ''' ) input('Press when you are ready to continue:') access_token = HfFolder.get_token() if access_token is None: print(''' -Thank you! Now you need to authenticate with your HuggingFace access token. -Go to https://huggingface.co/settings/tokens and create a token. Copy it to the -clipboard and paste it here: ''' +4. Thank you! The last step is to enter your HuggingFace access token so that + this script is authorized to initiate the download. Go to the access tokens + page of your Hugging Face account and create a token by clicking the + "New token" button: + + https://huggingface.co/settings/tokens + + (You can enter anything you like in the token creation field marked "Name". + "Role" should be "read"). + + Now copy the token to your clipboard and paste it here: ''' ) access_token = getpass.getpass() HfFolder.save_token(access_token) @@ -391,21 +431,30 @@ def download_safety_checker(): #------------------------------------- if __name__ == '__main__': - introduction() - if user_wants_to_download_weights(): - models = select_datasets() - if models is None: - if yes_or_no('Quit?',default_yes=False): - sys.exit(0) - access_token = authenticate() - successfully_downloaded = download_weight_datasets(models, access_token) - update_config_file(successfully_downloaded) - download_bert() - download_kornia() - download_clip() - download_gfpgan() - download_codeformer() - download_clipseg() - download_safety_checker() + try: + introduction() + print('** WEIGHT SELECTION **') + if user_wants_to_download_weights(): + models = select_datasets() + if models is None: + if yes_or_no('Quit?',default_yes=False): + sys.exit(0) + print('** LICENSE AGREEMENT FOR WEIGHT FILES **') + access_token = authenticate() + print('\n** DOWNLOADING WEIGHTS **') + successfully_downloaded = download_weight_datasets(models, access_token) + update_config_file(successfully_downloaded) + print('\n** DOWNLOADING SUPPORT MODELS **') + download_bert() + download_kornia() + download_clip() + download_gfpgan() + download_codeformer() + download_clipseg() + download_safety_checker() + postscript() + except KeyboardInterrupt: + print("\nGoodbye! Come back soon.") + From 13f26a99b83db10e8945a43ac1a719ad7ea30937 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 29 Oct 2022 10:37:38 -0400 Subject: [PATCH 3/9] documentation and usability fixes --- docs/features/CLI.md | 2 +- docs/features/INSTALLING_MODELS.md | 254 +++++++++++++++++- docs/installation/INSTALL_LINUX.md | 26 +- docs/installation/INSTALL_MAC.md | 12 +- docs/installation/INSTALL_WINDOWS.md | 25 +- ldm/invoke/model_cache.py | 13 +- .../place-ckpt-files-here.txt | 2 + 7 files changed, 290 insertions(+), 44 deletions(-) create mode 100644 models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt diff --git a/docs/features/CLI.md b/docs/features/CLI.md index 8e7b5780f0..7ce06d1fea 100644 --- a/docs/features/CLI.md +++ b/docs/features/CLI.md @@ -385,7 +385,7 @@ automatically. Example:
-invoke> !import_model models/ldm/stable-diffusion-v1/	model-epoch08-float16.ckpt
+invoke> !import_model models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
 >> Model import in process. Please enter the values needed to configure this model:
 
 Name for this model: waifu-diffusion
diff --git a/docs/features/INSTALLING_MODELS.md b/docs/features/INSTALLING_MODELS.md
index aa04cc14b7..078df329b9 100644
--- a/docs/features/INSTALLING_MODELS.md
+++ b/docs/features/INSTALLING_MODELS.md
@@ -4,6 +4,258 @@ title: Installing Models
 
 # :octicons-paintbrush-16: Installing Models
 
-## TO COME
+## Model Weight Files
+
+The model weight files ('*.ckpt') are the Stable Diffusion "secret
+sauce". They are the product of training the AI on millions of
+captioned images gathered from multiple sources.
+
+Originally there was only a single Stable Diffusion weights file,
+which many people named `model.ckpt`. Now there are dozens or more
+that have been "fine tuned" to provide particulary styles, genres, or
+other features. InvokeAI allows you to install and run multiple model
+weight files and switch between them quickly in the command-line and
+web interfaces.
+
+This manual will guide you through installing and configuring model
+weight files.
+
+## Base Models
+
+InvokeAI comes with support for a good initial set of models listed in
+the model configuration file `configs/models.yaml`. They are:
+
+| Model                   | Weight File                   |   Description                    | DOWNLOAD FROM            |
+| ----------------------  | ----------------------------- |--------------------------------- | ----------------|
+| stable-diffusion-1.5    | v1-5-pruned-emaonly.ckpt      | Most recent version of base Stable Diffusion model| https://huggingface.co/runwayml/stable-diffusion-v1-5 |
+| stable-diffusion-1.4    | sd-v1-4.ckpt                  | Previous version of base Stable Diffusion model | https://huggingface.co/CompVis/stable-diffusion-v-1-4-original |
+| inpainting-1.5          | sd-v1-5-inpainting.ckpt       | Stable Diffusion 1.5 model specialized for inpainting | https://huggingface.co/runwayml/stable-diffusion-inpainting |
+| waifu-diffusion-1.3     | model-epoch09-float32.ckpt    | Stable Diffusion 1.4 trained to produce anime images | https://huggingface.co/hakurei/waifu-diffusion-v1-3 |
+|             | vae-ft-mse-840000-ema-pruned.ckpt   | A fine-tune file add-on file that improves face generation | https://huggingface.co/stabilityai/sd-vae-ft-mse-original/ |
+
+
+Note that these files are covered by an "Ethical AI" license which
+forbids certain uses. You will need to create an account on the
+Hugging Face website and accept the license terms before you can
+access the files.
+
+The predefined configuration file for InvokeAI (located at
+`configs/models.yaml`) provides entries for each of these weights
+files. `stable-diffusion-1.5` is the default model used, and we
+strongly recommend that you install this weights file if nothing else.
+
+## Community-Contributed Models
+
+There are too many to list here and more are being contributed every
+day. [This Page](https://rentry.org/sdmodels) hosts an updated list of
+Stable Diffusion models and where they can be obtained.
+
+## Installation
+
+There are three ways to install weights files:
+
+1. During InvokeAI installation, the `preload_models.py` script can
+download them for you.
+
+2. You can use the command-line interface (CLI) to import, configure
+and modify new models files.
+
+3. You can download the files manually and add the appropriate entries
+to `models.yaml`.
+
+### Installation via `preload_models.py`
+
+This is the most automatic way. Run `scripts/preload_models.py` from
+the console.  It will ask you to select which models to download and
+lead you through the steps of setting up a Hugging Face account if you
+haven't done so already.
+
+To start, from within the InvokeAI directory run the command `python
+scripts/preload_models.py` (Linux/MacOS) or `python
+scripts\preload_models.py` (Windows):
+
+```
+Loading Python libraries...
+
+** INTRODUCTION **
+Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
+and other large models that are needed for text to image generation. At any point you may interrupt
+this program and resume later.
+
+** WEIGHT SELECTION **
+Would you like to download the Stable Diffusion model weights now? [y] 
+
+Choose the weight file(s) you wish to download. Before downloading you 
+will be given the option to view and change your selections.
+
+[1] stable-diffusion-1.5:
+    The newest Stable Diffusion version 1.5 weight file (4.27 GB) (recommended)
+    Download? [y] 
+[2] inpainting-1.5:
+    RunwayML SD 1.5 model optimized for inpainting (4.27 GB) (recommended)
+    Download? [y] 
+[3] stable-diffusion-1.4:
+    The original Stable Diffusion version 1.4 weight file (4.27 GB) 
+    Download? [n] n
+[4] waifu-diffusion-1.3:
+    Stable Diffusion 1.4 fine tuned on anime-styled images (4.27) 
+    Download? [n] y
+[5] ft-mse-improved-autoencoder-840000:
+    StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) (recommended)
+    Download? [y] y
+The following weight files will be downloaded:
+   [1] stable-diffusion-1.5*
+   [2] inpainting-1.5
+   [4] waifu-diffusion-1.3
+   [5] ft-mse-improved-autoencoder-840000
+*default
+Ok to download? [y] 
+** LICENSE AGREEMENT FOR WEIGHT FILES **
+
+1. To download the Stable Diffusion weight files you need to read and accept the
+   CreativeML Responsible AI license. If you have not already done so, please 
+   create an account using the "Sign Up" button:
+
+   https://huggingface.co 
+
+   You will need to verify your email address as part of the HuggingFace
+   registration process.
+
+2. After creating the account, login under your account and accept
+   the license terms located here:
+
+   https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
+
+Press  when you are ready to continue:
+...
+```
+
+When the script is complete, you will find the downloaded weights
+files in `models/ldm/stable-diffusion-v1` and a matching configuration
+file in `configs/models.yaml`.
+
+You can run the script again to add any models you didn't select the
+first time. Note that as a safety measure the script will _never_
+remove a previously-installed weights file. You will have to do this
+manually.
+
+### Installation via the CLI
+
+You can install a new model, including any of the community-supported
+ones, via the command-line client's `!import_model` command.
+
+1. First download the desired model weights file and place it under `models/ldm/stable-diffusion-v1/`.
+   You may rename the weights file to something more memorable if you wish. Record the path of the
+   weights file (e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`)
+
+2. Launch the `invoke.py` CLI with `python scripts/invoke.py`.
+
+3. At the `invoke>` command-line, enter the command `!import_model `.
+   For example:
+
+   `invoke> !import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
+
+   (Hint - the CLI supports file path autocompletion. Type a bit of the path
+   name and hit  in order to get a choice of possible completions.
+
+4. Follow the wizard's instructions to complete installation as shown in the example
+   here:
+
+```
+invoke> !import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
+>> Model import in process. Please enter the values needed to configure this model:
+
+Name for this model: arabian-nights
+Description of this model: Arabian Nights Fine Tune v1.0
+Configuration file for this model: configs/stable-diffusion/v1-inference.yaml
+Default image width: 512
+Default image height: 512
+>> New configuration:
+arabian-nights:
+  config: configs/stable-diffusion/v1-inference.yaml
+  description: Arabian Nights Fine Tune v1.0
+  height: 512
+  weights: models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
+  width: 512
+OK to import [n]? y
+>> Caching model stable-diffusion-1.4 in system RAM
+>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
+   | LatentDiffusion: Running in eps-prediction mode
+   | DiffusionWrapper has 859.52 M params.
+   | Making attention of type 'vanilla' with 512 in_channels
+   | Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
+   | Making attention of type 'vanilla' with 512 in_channels
+   | Using faster float16 precision
+
+```
+
+If you've previously installed the fine-tune VAE file `vae-ft-mse-840000-ema-pruned.ckpt`,
+the wizard will also ask you if you want to add this VAE to the model.
+
+The appropriate entry for this model will be added to `configs/models.yaml` and it will
+be available to use in the CLI immediately.
+
+The CLI has additional commands for switching among, viewing, editing,
+deleting the available models. These are described in [Command Line
+Client](CLI.md#model-selection-and-importation), but the two most
+frequently-used are `!models` and `!switch `. The first
+prints a table of models that InvokeAI knows about and their load
+status. The second will load the requested model and lets you switch
+back and forth quickly among loaded models.
+
+### Manually editing of `configs/models.yaml`
+
+If you are comfortable with a text editor then you may simply edit
+`models.yaml` directly.
+
+First you need to download the desired .ckpt file and place it in
+`models/ldm/stable-diffusion-v1` as descirbed in step #1 in the
+previous section. Record the path to the weights file,
+e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
+
+Then using a **text** editor (e.g. the Windows Notepad application),
+open the file `configs/models.yaml`, and add a new stanza that follows
+this model:
+
+```
+arabian-nights-1.0:
+  description: A great fine-tune in Arabian Nights style
+  weights: ./models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
+  config: ./configs/stable-diffusion/v1-inference.yaml
+  width: 512
+  height: 512
+  vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
+  default: false
+```
+
+* arabian-nights-1.0
+  - This is the name of the model that you will refer to from within the
+  CLI and the WebGUI when you need to load and use the model.
+
+* description
+  - Any description that you want to add to the model to remind you what
+    it is.
+
+* weights
+  - Relative path to the .ckpt weights file for this model.
+
+* config
+  - This is the confusingly-named configuration file for the model itself.
+  Use `./configs/stable-diffusion/v1-inference.yaml` unless the model happens
+  to need a custom configuration, in which case the place you downloaded it
+  from will tell you what to use instead. For example, the runwayML custom
+  inpainting model requires the file `configs/stable-diffusion/v1-inpainting-inference.yaml`.
+  (This is already inclued in the InvokeAI distribution and configured automatically
+  for you by the `preload_models.py` script.
+
+* vae
+  - If you want to add a VAE file to the model, then enter its path here.
+
+* width, height
+  - This is the width and height of the images used to train the model.
+  Currently they are always 512 and 512.
+
+Save the `models.yaml` and relaunch InvokeAI. The new model should be
+available for your use.
 
 
diff --git a/docs/installation/INSTALL_LINUX.md b/docs/installation/INSTALL_LINUX.md
index 629175c3fa..174fdfde05 100644
--- a/docs/installation/INSTALL_LINUX.md
+++ b/docs/installation/INSTALL_LINUX.md
@@ -1,5 +1,5 @@
 ---
-title: Linux
+title: Manual Installation, Linux
 ---
 
 # :fontawesome-brands-linux: Linux
@@ -63,24 +63,16 @@ title: Linux
         model loading scheme to allow the script to work on GPU machines that are not
         internet connected. See [Preload Models](../features/OTHER.md#preload-models)
 
-7. Now you need to install the weights for the stable diffusion model.
+7. Install the weights for the stable diffusion model.
 
-      - For running with the released weights, you will first need to set up an acount
-        with [Hugging Face](https://huggingface.co).
-      - Use your credentials to log in, and then point your browser [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original).
-      - You may be asked to sign a license agreement at this point.
-      - Click on "Files and versions" near the top of the page, and then click on the
-        file named "sd-v1-4.ckpt". You'll be taken to a page that prompts you to click
-        the "download" link. Save the file somewhere safe on your local machine.
+- Sign up at https://huggingface.co
+- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
+- Accept the terms and click Access Repository
+- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
+and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
 
-      Now run the following commands from within the stable-diffusion directory.
-      This will create a symbolic link from the stable-diffusion model.ckpt file, to
-      the true location of the `sd-v1-4.ckpt` file.
-
-    ```bash
-    (invokeai) ~/InvokeAI$ mkdir -p models/ldm/stable-diffusion-v1
-    (invokeai) ~/InvokeAI$ ln -sf /path/to/sd-v1-4.ckpt models/ldm/stable-diffusion-v1/model.ckpt
-    ```
+There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
+for details.
 
 8. Start generating images!
 
diff --git a/docs/installation/INSTALL_MAC.md b/docs/installation/INSTALL_MAC.md
index 06624eb5e8..a458eaf43c 100644
--- a/docs/installation/INSTALL_MAC.md
+++ b/docs/installation/INSTALL_MAC.md
@@ -1,5 +1,5 @@
 ---
-title: macOS
+title: Manual Installation, macOS
 ---
 
 # :fontawesome-brands-apple: macOS
@@ -24,9 +24,15 @@ First you need to download a large checkpoint file.
 1. Sign up at https://huggingface.co
 2. Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
 3. Accept the terms and click Access Repository
-4. Download [sd-v1-4.ckpt (4.27 GB)](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt) and note where you have saved it (probably the Downloads folder). You may want to move it somewhere else for longer term storage - SD needs this file to run.
+4. Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
+and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
 
-While that is downloading, open Terminal and run the following commands one at a time, reading the comments and taking care to run the appropriate command for your Mac's architecture (Intel or M1).
+There are many other models that you can try. Please see [../features/INSTALLING_MODELS.md]
+for details.
+
+While that is downloading, open Terminal and run the following
+commands one at a time, reading the comments and taking care to run
+the appropriate command for your Mac's architecture (Intel or M1).
 
 !!! todo "Homebrew"
 
diff --git a/docs/installation/INSTALL_WINDOWS.md b/docs/installation/INSTALL_WINDOWS.md
index c7dc9065ea..c3cd6465f4 100644
--- a/docs/installation/INSTALL_WINDOWS.md
+++ b/docs/installation/INSTALL_WINDOWS.md
@@ -1,5 +1,5 @@
 ---
-title: Windows
+title: Manual Installation, Windows
 ---
 
 # :fontawesome-brands-windows: Windows
@@ -83,23 +83,14 @@ in the wiki
 
 8. Now you need to install the weights for the big stable diffusion model.
 
-      1. For running with the released weights, you will first need to set up an acount with Hugging Face (https://huggingface.co).
-      2. Use your credentials to log in, and then point your browser at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
-      3. You may be asked to sign a license agreement at this point.
-      4. Click on "Files and versions" near the top of the page, and then click on the file named `sd-v1-4.ckpt`. You'll be taken to a page that
-        prompts you to click the "download" link. Now save the file somewhere safe on your local machine.
-      5. The weight file is >4 GB in size, so
-        downloading may take a while.
+   - Sign up at https://huggingface.co
+   - Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
+   - Accept the terms and click Access Repository
+   - Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
+     and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
 
-    Now run the following commands from **within the InvokeAI directory** to copy the weights file to the right place:
-
-    ```batch
-    mkdir -p models\ldm\stable-diffusion-v1
-    copy C:\path\to\sd-v1-4.ckpt models\ldm\stable-diffusion-v1\model.ckpt
-    ```
-
-    Please replace `C:\path\to\sd-v1.4.ckpt` with the correct path to wherever you stashed this file. If you prefer not to copy or move the .ckpt file,
-    you may instead create a shortcut to it from within `models\ldm\stable-diffusion-v1\`.
+   There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
+   for details.
 
 9. Start generating images!
 
diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py
index a5aa343303..ff72ce951f 100644
--- a/ldm/invoke/model_cache.py
+++ b/ldm/invoke/model_cache.py
@@ -227,11 +227,14 @@ class ModelCache(object):
             print('   | Using more accurate float32 precision')
 
         # look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
-        if vae and os.path.exists(vae):
-            print(f'   | Loading VAE weights from: {vae}')
-            vae_ckpt = torch.load(vae, map_location="cpu")
-            vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
-            model.first_stage_model.load_state_dict(vae_dict, strict=False)
+        if vae:
+            if os.path.exists(vae):
+                print(f'   | Loading VAE weights from: {vae}')
+                vae_ckpt = torch.load(vae, map_location="cpu")
+                vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
+                model.first_stage_model.load_state_dict(vae_dict, strict=False)
+            else:
+                print(f'   | VAE file {vae} not found. Skipping.')
 
         model.to(self.device)
         # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
diff --git a/models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt b/models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt
new file mode 100644
index 0000000000..a174e54540
--- /dev/null
+++ b/models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt
@@ -0,0 +1,2 @@
+See docs/features/INSTALLING_MODELS.md for how to populate this
+directory with one or more Stable Diffusion model weight files.

From b46921c22da2fc20a81e3fcec8aa64417c17d84b Mon Sep 17 00:00:00 2001
From: Lincoln Stein 
Date: Sat, 29 Oct 2022 11:15:57 -0400
Subject: [PATCH 4/9] move model installation docs into installation dir

---
 docs/{features => installation}/INSTALLING_MODELS.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename docs/{features => installation}/INSTALLING_MODELS.md (100%)

diff --git a/docs/features/INSTALLING_MODELS.md b/docs/installation/INSTALLING_MODELS.md
similarity index 100%
rename from docs/features/INSTALLING_MODELS.md
rename to docs/installation/INSTALLING_MODELS.md

From b532e6dd179144a100eb8b386482eedd67393511 Mon Sep 17 00:00:00 2001
From: Lincoln Stein 
Date: Sat, 29 Oct 2022 11:28:17 -0400
Subject: [PATCH 5/9] wording and formatting tweaks

---
 docs/installation/INSTALLING_MODELS.md | 10 +++++-----
 scripts/preload_models.py              | 11 ++++++++++-
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/docs/installation/INSTALLING_MODELS.md b/docs/installation/INSTALLING_MODELS.md
index 078df329b9..d44b379447 100644
--- a/docs/installation/INSTALLING_MODELS.md
+++ b/docs/installation/INSTALLING_MODELS.md
@@ -47,7 +47,7 @@ strongly recommend that you install this weights file if nothing else.
 ## Community-Contributed Models
 
 There are too many to list here and more are being contributed every
-day. [This Page](https://rentry.org/sdmodels) hosts an updated list of
+day. [This page](https://rentry.org/sdmodels) hosts an updated list of
 Stable Diffusion models and where they can be obtained.
 
 ## Installation
@@ -156,7 +156,7 @@ ones, via the command-line client's `!import_model` command.
    `invoke> !import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
 
    (Hint - the CLI supports file path autocompletion. Type a bit of the path
-   name and hit  in order to get a choice of possible completions.
+   name and hit  in order to get a choice of possible completions.)
 
 4. Follow the wizard's instructions to complete installation as shown in the example
    here:
@@ -197,7 +197,7 @@ be available to use in the CLI immediately.
 
 The CLI has additional commands for switching among, viewing, editing,
 deleting the available models. These are described in [Command Line
-Client](CLI.md#model-selection-and-importation), but the two most
+Client](../features/CLI.md#model-selection-and-importation), but the two most
 frequently-used are `!models` and `!switch `. The first
 prints a table of models that InvokeAI knows about and their load
 status. The second will load the requested model and lets you switch
@@ -245,7 +245,7 @@ arabian-nights-1.0:
   to need a custom configuration, in which case the place you downloaded it
   from will tell you what to use instead. For example, the runwayML custom
   inpainting model requires the file `configs/stable-diffusion/v1-inpainting-inference.yaml`.
-  (This is already inclued in the InvokeAI distribution and configured automatically
+  This is already inclued in the InvokeAI distribution and is configured automatically
   for you by the `preload_models.py` script.
 
 * vae
@@ -255,7 +255,7 @@ arabian-nights-1.0:
   - This is the width and height of the images used to train the model.
   Currently they are always 512 and 512.
 
-Save the `models.yaml` and relaunch InvokeAI. The new model should be
+Save the `models.yaml` and relaunch InvokeAI. The new model should now be
 available for your use.
 
 
diff --git a/scripts/preload_models.py b/scripts/preload_models.py
index 02127f0af9..bd737a32a3 100644
--- a/scripts/preload_models.py
+++ b/scripts/preload_models.py
@@ -120,6 +120,13 @@ def yes_or_no(prompt:str, default_yes=True):
 
 #---------------------------------------------
 def user_wants_to_download_weights():
+    print('''You can download and configure the weights files manually or let this
+script do it for you. Manual installation is described at:
+
+https://github.com/invoke-ai/InvokeAI/blob/main/docs/installation/INSTALLING_MODELS.md
+
+'''
+    )
     return yes_or_no('Would you like to download the Stable Diffusion model weights now?')
 
 #---------------------------------------------
@@ -454,7 +461,9 @@ if __name__ == '__main__':
         download_safety_checker()
         postscript()
     except KeyboardInterrupt:
-        print("\nGoodbye! Come back soon.")
+        print('\nGoodbye! Come back soon.')
+    except Exception as e:
+        print(f'\nA problem occurred during download.\nThe error was: "{str(e)}"')
 
 
     

From 19b6c671a683039479d32753af50d199d540edf3 Mon Sep 17 00:00:00 2001
From: Lincoln Stein 
Date: Sun, 30 Oct 2022 00:17:05 -0400
Subject: [PATCH 6/9] further improvements to preload_models script

- User can choose to download just recommended models, customize list to download,
  or skip downloading altogether.
- Does direct download to models directory instead of to HuggingFace cache
- Able to resume interrupted downloads
---
 scripts/preload_models.py | 125 ++++++++++++++++++++++++++++----------
 1 file changed, 94 insertions(+), 31 deletions(-)

diff --git a/scripts/preload_models.py b/scripts/preload_models.py
index bd737a32a3..9485985230 100644
--- a/scripts/preload_models.py
+++ b/scripts/preload_models.py
@@ -13,10 +13,13 @@ import transformers
 import os
 import warnings
 import torch
-import urllib.request
 import zipfile
 import traceback
 import getpass
+import requests
+
+from urllib import request
+from tqdm import tqdm
 from omegaconf import OmegaConf
 from pathlib import Path
 from transformers import CLIPTokenizer, CLIPTextModel
@@ -119,36 +122,56 @@ def yes_or_no(prompt:str, default_yes=True):
         return response[0] in ('y','Y')
 
 #---------------------------------------------
-def user_wants_to_download_weights():
+def user_wants_to_download_weights()->str:
+    '''
+    Returns one of "skip", "recommended" or "customized"
+    '''
     print('''You can download and configure the weights files manually or let this
 script do it for you. Manual installation is described at:
 
 https://github.com/invoke-ai/InvokeAI/blob/main/docs/installation/INSTALLING_MODELS.md
 
+You may download the recommended models (about 10GB total), select a customized set, or
+completely skip this step.
 '''
     )
-    return yes_or_no('Would you like to download the Stable Diffusion model weights now?')
+    selection = None
+    while selection is None:
+        choice = input('Download ecommended models, ustomize the list, or kip this step? [r]: ')
+        if choice.startswith(('r','R')) or len(choice)==0:
+            selection = 'recommended'
+        elif choice.startswith(('c','C')):
+            selection = 'customized'
+        elif choice.startswith(('s','S')):
+            selection = 'skip'
+    return selection
 
 #---------------------------------------------
-def select_datasets():
+def select_datasets(action:str):
     done = False
     while not done:
-        print('''
+        datasets = dict()
+        dflt = None   # the first model selected will be the default; TODO let user change
+        counter = 1
+
+        if action == 'customized':
+            print('''
 Choose the weight file(s) you wish to download. Before downloading you 
 will be given the option to view and change your selections.
 '''
         )
-        datasets = dict()
-
-        counter = 1
-        dflt = None   # the first model selected will be the default; TODO let user change
-        for ds in Datasets.keys():
-            recommended = '(recommended)' if Datasets[ds]['recommended'] else ''
-            print(f'[{counter}] {ds}:\n    {Datasets[ds]["description"]} {recommended}')
-            if yes_or_no('    Download?',default_yes=Datasets[ds]['recommended']):
-                datasets[ds]=counter
-            counter += 1
-
+            for ds in Datasets.keys():
+                recommended = '(recommended)' if Datasets[ds]['recommended'] else ''
+                print(f'[{counter}] {ds}:\n    {Datasets[ds]["description"]} {recommended}')
+                if yes_or_no('    Download?',default_yes=Datasets[ds]['recommended']):
+                    datasets[ds]=counter
+                    counter += 1
+        else:
+            for ds in Datasets.keys():
+                if Datasets[ds]['recommended']:
+                    datasets[ds]=counter
+                    counter += 1
+                
         print('The following weight files will be downloaded:')
         for ds in datasets:
             dflt = '*' if dflt is None else ''
@@ -157,13 +180,15 @@ will be given the option to view and change your selections.
         ok_to_download = yes_or_no('Ok to download?')
         if not ok_to_download:
             if yes_or_no('Change your selection?'):
+                action = 'customized'
                 pass
             else:
                 done = True
         else:
             done = True
     return datasets if ok_to_download else None
-    
+
+
 #-------------------------------Authenticate against Hugging Face
 def authenticate():
     print('''
@@ -180,13 +205,19 @@ This involves a few easy steps.
    You will need to verify your email address as part of the HuggingFace
    registration process.
 
-2. Log into your account Hugging Face:
+2. Log into your Hugging Face account:
 
     https://huggingface.co/login
 
 3. Accept the license terms located here:
 
-   https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
+   https://huggingface.co/runwayml/stable-diffusion-v1-5
+
+   and here:
+
+   https://huggingface.co/runwayml/stable-diffusion-inpainting
+
+    (Yes, you have to accept two slightly different license agreements)
 '''
     )
     input('Press  when you are ready to continue:')
@@ -229,7 +260,7 @@ def download_weight_datasets(models:dict, access_token:str):
     for mod in models.keys():
         repo_id = Datasets[mod]['repo_id']
         filename = Datasets[mod]['file']
-        success = conditional_download(
+        success = download_with_resume(
             repo_id=repo_id,
             model_name=filename,
             access_token=access_token
@@ -241,19 +272,50 @@ def download_weight_datasets(models:dict, access_token:str):
     return successful
     
 #---------------------------------------------
-def conditional_download(repo_id:str, model_name:str, access_token:str):
+def download_with_resume(repo_id:str, model_name:str, access_token:str)->bool:
+
     model_dest = os.path.join(Model_dir, model_name)
-    if os.path.exists(model_dest):
-        print(f' * {model_name}: exists')
-        return True
     os.makedirs(os.path.dirname(model_dest), exist_ok=True)
+    url = hf_hub_url(repo_id, model_name)
+
+    header = {"Authorization": f'Bearer {access_token}'}
+    open_mode = 'wb'
+    exist_size = 0
+    
+    if os.path.exists(model_dest):
+        exist_size = os.path.getsize(model_dest)
+        header['Range'] = f'bytes={exist_size}-'
+        open_mode = 'ab'
+
+    resp = requests.get(url, headers=header, stream=True)
+    total = int(resp.headers.get('content-length', 0))
+    
+    if resp.status_code==416:  # "range not satisfiable", which means nothing to return
+        print(f'* {model_name}: complete file found. Skipping.')
+        return True
+    elif exist_size > 0:
+        print(f'* {model_name}: partial file found. Resuming...')
+    else:
+        print(f'* {model_name}: Downloading...')
 
     try:
-        print(f' * {model_name}: downloading or retrieving from cache...')
-        path = Path(hf_hub_download(repo_id, model_name, use_auth_token=access_token))
-        path.resolve(strict=True).link_to(model_dest)
+        if total < 2000:
+            print(f'* {model_name}: {resp.text}')
+            return False
+
+        with open(model_dest, open_mode) as file, tqdm(
+                desc=model_name,
+                initial=exist_size,
+                total=total+exist_size,
+                unit='iB',
+                unit_scale=True,
+                unit_divisor=1000,
+        ) as bar:
+            for data in resp.iter_content(chunk_size=1024):
+                size = file.write(data)
+                bar.update(size)
     except Exception as e:
-        print(f'** Error downloading {model_name}: {str(e)} **')
+        print(f'An error occurred while downloading {model_name}: {str(e)}')
         return False
     return True
                              
@@ -435,14 +497,15 @@ def download_safety_checker():
     safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
     safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
     print('...success')
-    
+
 #-------------------------------------
 if __name__ == '__main__':
     try:
         introduction()
         print('** WEIGHT SELECTION **')
-        if user_wants_to_download_weights():
-            models = select_datasets()
+        choice = user_wants_to_download_weights()
+        if choice != 'skip':
+            models = select_datasets(choice)
             if models is None:
                 if yes_or_no('Quit?',default_yes=False):
                     sys.exit(0)

From fbfffe028fd55eed3834909ea98a0fcf2f93c9e4 Mon Sep 17 00:00:00 2001
From: Lincoln Stein 
Date: Sun, 30 Oct 2022 00:33:48 -0400
Subject: [PATCH 7/9] add --no-interactive mode

---
 scripts/preload_models.py | 56 +++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/scripts/preload_models.py b/scripts/preload_models.py
index 9485985230..6a1d5741fd 100644
--- a/scripts/preload_models.py
+++ b/scripts/preload_models.py
@@ -7,6 +7,7 @@
 # Coauthor: Kevin Turner http://github.com/keturn
 #
 print('Loading Python libraries...\n')
+import argparse
 import clip
 import sys
 import transformers
@@ -17,7 +18,6 @@ import zipfile
 import traceback
 import getpass
 import requests
-
 from urllib import request
 from tqdm import tqdm
 from omegaconf import OmegaConf
@@ -500,29 +500,39 @@ def download_safety_checker():
 
 #-------------------------------------
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='InvokeAI model downloader')
+    parser.add_argument('--interactive',
+                        dest='interactive',
+                        action=argparse.BooleanOptionalAction,
+                        default=True,
+                        help='run in interactive mode (default)')
+    opt = parser.parse_args()
+    
     try:
-        introduction()
-        print('** WEIGHT SELECTION **')
-        choice = user_wants_to_download_weights()
-        if choice != 'skip':
-            models = select_datasets(choice)
-            if models is None:
-                if yes_or_no('Quit?',default_yes=False):
-                    sys.exit(0)
-            print('** LICENSE AGREEMENT FOR WEIGHT FILES **')
-            access_token = authenticate()
-            print('\n** DOWNLOADING WEIGHTS **')
-            successfully_downloaded = download_weight_datasets(models, access_token)
-            update_config_file(successfully_downloaded)
-        print('\n** DOWNLOADING SUPPORT MODELS **')
-        download_bert()
-        download_kornia()
-        download_clip()
-        download_gfpgan()
-        download_codeformer()
-        download_clipseg()
-        download_safety_checker()
-        postscript()
+        if opt.interactive:
+            introduction()
+            print('** WEIGHT SELECTION **')
+            choice = user_wants_to_download_weights()
+            if choice != 'skip':
+                models = select_datasets(choice)
+                if models is None:
+                    if yes_or_no('Quit?',default_yes=False):
+                        sys.exit(0)
+                print('** LICENSE AGREEMENT FOR WEIGHT FILES **')
+                access_token = authenticate()
+                print('\n** DOWNLOADING WEIGHTS **')
+                successfully_downloaded = download_weight_datasets(models, access_token)
+                update_config_file(successfully_downloaded)
+        else:
+            print('\n** DOWNLOADING SUPPORT MODELS **')
+            download_bert()
+            download_kornia()
+            download_clip()
+            download_gfpgan()
+            download_codeformer()
+            download_clipseg()
+            download_safety_checker()
+            postscript()
     except KeyboardInterrupt:
         print('\nGoodbye! Come back soon.')
     except Exception as e:

From a7517ce0de6df31cb601a0aa3185215dcc391cd8 Mon Sep 17 00:00:00 2001
From: Lincoln Stein 
Date: Sun, 30 Oct 2022 00:54:00 -0400
Subject: [PATCH 8/9] add pointer to hugging face concepts library

---
 docs/installation/INSTALLING_MODELS.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/installation/INSTALLING_MODELS.md b/docs/installation/INSTALLING_MODELS.md
index d44b379447..b5d659b0d1 100644
--- a/docs/installation/INSTALLING_MODELS.md
+++ b/docs/installation/INSTALLING_MODELS.md
@@ -47,8 +47,14 @@ strongly recommend that you install this weights file if nothing else.
 ## Community-Contributed Models
 
 There are too many to list here and more are being contributed every
-day. [This page](https://rentry.org/sdmodels) hosts an updated list of
-Stable Diffusion models and where they can be obtained.
+day. Hugging Face maintains a [fast-growing
+repository](https://huggingface.co/sd-concepts-library) of fine-tune
+(".bin") models that can be imported into InvokeAI by passing the
+`--embedding_path` option to the `invoke.py` command.
+
+[This page](https://rentry.org/sdmodels) hosts a large list of
+official and unofficial Stable Diffusion models and where they can be
+obtained.
 
 ## Installation
 

From 5319796e5860b43d1e18eb62cfec1f7feddfe1a7 Mon Sep 17 00:00:00 2001
From: mauwii 
Date: Sun, 30 Oct 2022 06:47:24 +0100
Subject: [PATCH 9/9] add --no-interactive to preload_models step

---
 .github/workflows/test-invoke-conda.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml
index 65e3e4e1e2..dfc5c04229 100644
--- a/.github/workflows/test-invoke-conda.yml
+++ b/.github/workflows/test-invoke-conda.yml
@@ -93,7 +93,9 @@ jobs:
             ${{ env.cache-name }}-${{ hashFiles('scripts/preload_models.py') }}
 
       - name: run preload_models.py
-        run: python scripts/preload_models.py
+        run: |
+          python scripts/preload_models.py \
+            --no-interactive
 
       - name: Run the tests
         run: |