preload_models interactively downloads sd model files

This commit is contained in:
Lincoln Stein 2022-10-30 12:19:05 -04:00
commit e7368d7231
43 changed files with 730 additions and 2174 deletions

View File

@ -84,7 +84,9 @@ jobs:
- name: run preload_models.py
id: run-preload-models
run: python scripts/preload_models.py
run: |
python scripts/preload_models.py \
--no-interactive
- name: Run the tests
id: run-tests

8
.gitignore vendored
View File

@ -199,7 +199,13 @@ checkpoints
.scratch/
.vscode/
gfpgan/
models/ldm/stable-diffusion-v1/model.sha256
models/ldm/stable-diffusion-v1/*.sha256
# GFPGAN model files
gfpgan/
# config file (will be created by installer)
configs/models.yaml
# weights (will be created by installer)
models/ldm/stable-diffusion-v1/*.ckpt

View File

@ -1,54 +0,0 @@
model:
base_learning_rate: 4.5e-6
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: "val/rec_loss"
embed_dim: 16
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 0.000001
disc_weight: 0.5
ddconfig:
double_z: True
z_channels: 16
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 12
wrap: True
train:
target: ldm.data.imagenet.ImageNetSRTrain
params:
size: 256
degradation: pil_nearest
validation:
target: ldm.data.imagenet.ImageNetSRValidation
params:
size: 256
degradation: pil_nearest
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 1000
max_images: 8
increase_log_steps: True
trainer:
benchmark: True
accumulate_grad_batches: 2

View File

@ -1,53 +0,0 @@
model:
base_learning_rate: 4.5e-6
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: "val/rec_loss"
embed_dim: 4
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 0.000001
disc_weight: 0.5
ddconfig:
double_z: True
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 12
wrap: True
train:
target: ldm.data.imagenet.ImageNetSRTrain
params:
size: 256
degradation: pil_nearest
validation:
target: ldm.data.imagenet.ImageNetSRValidation
params:
size: 256
degradation: pil_nearest
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 1000
max_images: 8
increase_log_steps: True
trainer:
benchmark: True
accumulate_grad_batches: 2

View File

@ -1,54 +0,0 @@
model:
base_learning_rate: 4.5e-6
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: "val/rec_loss"
embed_dim: 3
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 0.000001
disc_weight: 0.5
ddconfig:
double_z: True
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 12
wrap: True
train:
target: ldm.data.imagenet.ImageNetSRTrain
params:
size: 256
degradation: pil_nearest
validation:
target: ldm.data.imagenet.ImageNetSRValidation
params:
size: 256
degradation: pil_nearest
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 1000
max_images: 8
increase_log_steps: True
trainer:
benchmark: True
accumulate_grad_batches: 2

View File

@ -1,53 +0,0 @@
model:
base_learning_rate: 4.5e-6
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: "val/rec_loss"
embed_dim: 64
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 0.000001
disc_weight: 0.5
ddconfig:
double_z: True
z_channels: 64
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,2,4,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16,8]
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 12
wrap: True
train:
target: ldm.data.imagenet.ImageNetSRTrain
params:
size: 256
degradation: pil_nearest
validation:
target: ldm.data.imagenet.ImageNetSRValidation
params:
size: 256
degradation: pil_nearest
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 1000
max_images: 8
increase_log_steps: True
trainer:
benchmark: True
accumulate_grad_batches: 2

View File

@ -1,86 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
image_size: 64
channels: 3
monitor: val/loss_simple_ema
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 224
attention_resolutions:
# note: this isn\t actually the resolution but
# the downsampling factor, i.e. this corresnponds to
# attention on spatial resolution 8,16,32, as the
# spatial reolution of the latents is 64 for f4
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ckpt_path: models/first_stage_models/vq-f4/model.ckpt
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: __is_unconditional__
data:
target: main.DataModuleFromConfig
params:
batch_size: 48
num_workers: 5
wrap: false
train:
target: taming.data.faceshq.CelebAHQTrain
params:
size: 256
validation:
target: taming.data.faceshq.CelebAHQValidation
params:
size: 256
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 5000
max_images: 8
increase_log_steps: False
trainer:
benchmark: True

View File

@ -1,98 +0,0 @@
model:
base_learning_rate: 1.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: class_label
image_size: 32
channels: 4
cond_stage_trainable: true
conditioning_key: crossattn
monitor: val/loss_simple_ema
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 4
out_channels: 4
model_channels: 256
attention_resolutions:
#note: this isn\t actually the resolution but
# the downsampling factor, i.e. this corresnponds to
# attention on spatial resolution 8,16,32, as the
# spatial reolution of the latents is 32 for f8
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
num_head_channels: 32
use_spatial_transformer: true
transformer_depth: 1
context_dim: 512
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 4
n_embed: 16384
ckpt_path: configs/first_stage_models/vq-f8/model.yaml
ddconfig:
double_z: false
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 32
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.ClassEmbedder
params:
embed_dim: 512
key: class_label
data:
target: main.DataModuleFromConfig
params:
batch_size: 64
num_workers: 12
wrap: false
train:
target: ldm.data.imagenet.ImageNetTrain
params:
config:
size: 256
validation:
target: ldm.data.imagenet.ImageNetValidation
params:
config:
size: 256
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 5000
max_images: 8
increase_log_steps: False
trainer:
benchmark: True

View File

@ -1,68 +0,0 @@
model:
base_learning_rate: 0.0001
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: class_label
image_size: 64
channels: 3
cond_stage_trainable: true
conditioning_key: crossattn
monitor: val/loss
use_ema: False
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 192
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 5
num_heads: 1
use_spatial_transformer: true
transformer_depth: 1
context_dim: 512
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.ClassEmbedder
params:
n_classes: 1001
embed_dim: 512
key: class_label

View File

@ -1,85 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
image_size: 64
channels: 3
monitor: val/loss_simple_ema
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 224
attention_resolutions:
# note: this isn\t actually the resolution but
# the downsampling factor, i.e. this corresnponds to
# attention on spatial resolution 8,16,32, as the
# spatial reolution of the latents is 64 for f4
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ckpt_path: configs/first_stage_models/vq-f4/model.yaml
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: __is_unconditional__
data:
target: main.DataModuleFromConfig
params:
batch_size: 42
num_workers: 5
wrap: false
train:
target: taming.data.faceshq.FFHQTrain
params:
size: 256
validation:
target: taming.data.faceshq.FFHQValidation
params:
size: 256
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 5000
max_images: 8
increase_log_steps: False
trainer:
benchmark: True

View File

@ -1,85 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
image_size: 64
channels: 3
monitor: val/loss_simple_ema
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 224
attention_resolutions:
# note: this isn\t actually the resolution but
# the downsampling factor, i.e. this corresnponds to
# attention on spatial resolution 8,16,32, as the
# spatial reolution of the latents is 64 for f4
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
ckpt_path: configs/first_stage_models/vq-f4/model.yaml
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: __is_unconditional__
data:
target: main.DataModuleFromConfig
params:
batch_size: 48
num_workers: 5
wrap: false
train:
target: ldm.data.lsun.LSUNBedroomsTrain
params:
size: 256
validation:
target: ldm.data.lsun.LSUNBedroomsValidation
params:
size: 256
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 5000
max_images: 8
increase_log_steps: False
trainer:
benchmark: True

View File

@ -1,91 +0,0 @@
model:
base_learning_rate: 5.0e-5 # set to target_lr by starting main.py with '--scale_lr False'
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0155
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
loss_type: l1
first_stage_key: "image"
cond_stage_key: "image"
image_size: 32
channels: 4
cond_stage_trainable: False
concat_mode: False
scale_by_std: True
monitor: 'val/loss_simple_ema'
scheduler_config: # 10000 warmup steps
target: ldm.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps: [10000]
cycle_lengths: [10000000000000]
f_start: [1.e-6]
f_max: [1.]
f_min: [ 1.]
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 4
out_channels: 4
model_channels: 192
attention_resolutions: [ 1, 2, 4, 8 ] # 32, 16, 8, 4
num_res_blocks: 2
channel_mult: [ 1,2,2,4,4 ] # 32, 16, 8, 4, 2
num_heads: 8
use_scale_shift_norm: True
resblock_updown: True
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: "val/rec_loss"
ckpt_path: "models/first_stage_models/kl-f8/model.ckpt"
ddconfig:
double_z: True
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: "__is_unconditional__"
data:
target: main.DataModuleFromConfig
params:
batch_size: 96
num_workers: 5
wrap: False
train:
target: ldm.data.lsun.LSUNChurchesTrain
params:
size: 256
validation:
target: ldm.data.lsun.LSUNChurchesValidation
params:
size: 256
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 5000
max_images: 8
increase_log_steps: False
trainer:
benchmark: True

View File

@ -1,71 +0,0 @@
model:
base_learning_rate: 5.0e-05
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.012
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 32
channels: 4
cond_stage_trainable: true
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
- 4
num_heads: 8
use_spatial_transformer: true
transformer_depth: 1
context_dim: 1280
use_checkpoint: true
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.BERTEmbedder
params:
n_embed: 1280
n_layer: 32

View File

@ -1,29 +1,36 @@
# This file describes the alternative machine learning models
# available to the dream script.
# available to InvokeAI script.
#
# To add a new model, follow the examples below. Each
# model requires a model config file, a weights file,
# and the width and height of the images it
# was trained on.
stable-diffusion-1.4:
config: configs/stable-diffusion/v1-inference.yaml
weights: models/ldm/stable-diffusion-v1/model.ckpt
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
description: Stable Diffusion inference model version 1.4
width: 512
height: 512
default: true
inpainting-1.5:
description: runwayML tuned inpainting model v1.5
weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
config: configs/stable-diffusion/v1-inpainting-inference.yaml
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
config: ./configs/stable-diffusion/v1-inference.yaml
weights: ./models/ldm/stable-diffusion-v1/sd-v1-4.ckpt
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
description: The original Stable Diffusion version 1.4 weight file (4.27 GB)
width: 512
height: 512
stable-diffusion-1.5:
config: configs/stable-diffusion/v1-inference.yaml
weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
description: Stable Diffusion inference model version 1.5
description: The newest Stable Diffusion version 1.5 weight file (4.27 GB)
weights: ./models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
config: ./configs/stable-diffusion/v1-inference.yaml
width: 512
height: 512
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
default: true
inpainting-1.5:
description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB)
weights: ./models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
config: ./configs/stable-diffusion/v1-inpainting-inference.yaml
width: 512
height: 512
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
waifu-diffusion-1.3:
description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)
weights: ./models/ldm/stable-diffusion-v1/model-epoch09-float32.ckpt
config: ./configs/stable-diffusion/v1-inference.yaml
width: 512
height: 512
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt

View File

@ -1,68 +0,0 @@
model:
base_learning_rate: 0.0001
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.015
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: jpg
cond_stage_key: nix
image_size: 48
channels: 16
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_by_std: false
scale_factor: 0.22765929
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 48
in_channels: 16
out_channels: 16
model_channels: 448
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
use_scale_shift_norm: false
resblock_updown: false
num_head_channels: 32
use_spatial_transformer: true
transformer_depth: 1
context_dim: 768
use_checkpoint: true
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: val/rec_loss
embed_dim: 16
ddconfig:
double_z: true
z_channels: 16
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: torch.nn.Identity

View File

@ -385,7 +385,7 @@ automatically.
Example:
<pre>
invoke> <b>!import_model models/ldm/stable-diffusion-v1/ model-epoch08-float16.ckpt</b>
invoke> <b>!import_model models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt</b>
>> Model import in process. Please enter the values needed to configure this model:
Name for this model: <b>waifu-diffusion</b>

View File

@ -0,0 +1,267 @@
---
title: Installing Models
---
# :octicons-paintbrush-16: Installing Models
## Model Weight Files
The model weight files ('*.ckpt') are the Stable Diffusion "secret
sauce". They are the product of training the AI on millions of
captioned images gathered from multiple sources.
Originally there was only a single Stable Diffusion weights file,
which many people named `model.ckpt`. Now there are dozens or more
that have been "fine tuned" to provide particulary styles, genres, or
other features. InvokeAI allows you to install and run multiple model
weight files and switch between them quickly in the command-line and
web interfaces.
This manual will guide you through installing and configuring model
weight files.
## Base Models
InvokeAI comes with support for a good initial set of models listed in
the model configuration file `configs/models.yaml`. They are:
| Model | Weight File | Description | DOWNLOAD FROM |
| ---------------------- | ----------------------------- |--------------------------------- | ----------------|
| stable-diffusion-1.5 | v1-5-pruned-emaonly.ckpt | Most recent version of base Stable Diffusion model| https://huggingface.co/runwayml/stable-diffusion-v1-5 |
| stable-diffusion-1.4 | sd-v1-4.ckpt | Previous version of base Stable Diffusion model | https://huggingface.co/CompVis/stable-diffusion-v-1-4-original |
| inpainting-1.5 | sd-v1-5-inpainting.ckpt | Stable Diffusion 1.5 model specialized for inpainting | https://huggingface.co/runwayml/stable-diffusion-inpainting |
| waifu-diffusion-1.3 | model-epoch09-float32.ckpt | Stable Diffusion 1.4 trained to produce anime images | https://huggingface.co/hakurei/waifu-diffusion-v1-3 |
| <all models> | vae-ft-mse-840000-ema-pruned.ckpt | A fine-tune file add-on file that improves face generation | https://huggingface.co/stabilityai/sd-vae-ft-mse-original/ |
Note that these files are covered by an "Ethical AI" license which
forbids certain uses. You will need to create an account on the
Hugging Face website and accept the license terms before you can
access the files.
The predefined configuration file for InvokeAI (located at
`configs/models.yaml`) provides entries for each of these weights
files. `stable-diffusion-1.5` is the default model used, and we
strongly recommend that you install this weights file if nothing else.
## Community-Contributed Models
There are too many to list here and more are being contributed every
day. Hugging Face maintains a [fast-growing
repository](https://huggingface.co/sd-concepts-library) of fine-tune
(".bin") models that can be imported into InvokeAI by passing the
`--embedding_path` option to the `invoke.py` command.
[This page](https://rentry.org/sdmodels) hosts a large list of
official and unofficial Stable Diffusion models and where they can be
obtained.
## Installation
There are three ways to install weights files:
1. During InvokeAI installation, the `preload_models.py` script can
download them for you.
2. You can use the command-line interface (CLI) to import, configure
and modify new models files.
3. You can download the files manually and add the appropriate entries
to `models.yaml`.
### Installation via `preload_models.py`
This is the most automatic way. Run `scripts/preload_models.py` from
the console. It will ask you to select which models to download and
lead you through the steps of setting up a Hugging Face account if you
haven't done so already.
To start, from within the InvokeAI directory run the command `python
scripts/preload_models.py` (Linux/MacOS) or `python
scripts\preload_models.py` (Windows):
```
Loading Python libraries...
** INTRODUCTION **
Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
and other large models that are needed for text to image generation. At any point you may interrupt
this program and resume later.
** WEIGHT SELECTION **
Would you like to download the Stable Diffusion model weights now? [y]
Choose the weight file(s) you wish to download. Before downloading you
will be given the option to view and change your selections.
[1] stable-diffusion-1.5:
The newest Stable Diffusion version 1.5 weight file (4.27 GB) (recommended)
Download? [y]
[2] inpainting-1.5:
RunwayML SD 1.5 model optimized for inpainting (4.27 GB) (recommended)
Download? [y]
[3] stable-diffusion-1.4:
The original Stable Diffusion version 1.4 weight file (4.27 GB)
Download? [n] n
[4] waifu-diffusion-1.3:
Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)
Download? [n] y
[5] ft-mse-improved-autoencoder-840000:
StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) (recommended)
Download? [y] y
The following weight files will be downloaded:
[1] stable-diffusion-1.5*
[2] inpainting-1.5
[4] waifu-diffusion-1.3
[5] ft-mse-improved-autoencoder-840000
*default
Ok to download? [y]
** LICENSE AGREEMENT FOR WEIGHT FILES **
1. To download the Stable Diffusion weight files you need to read and accept the
CreativeML Responsible AI license. If you have not already done so, please
create an account using the "Sign Up" button:
https://huggingface.co
You will need to verify your email address as part of the HuggingFace
registration process.
2. After creating the account, login under your account and accept
the license terms located here:
https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
Press <enter> when you are ready to continue:
...
```
When the script is complete, you will find the downloaded weights
files in `models/ldm/stable-diffusion-v1` and a matching configuration
file in `configs/models.yaml`.
You can run the script again to add any models you didn't select the
first time. Note that as a safety measure the script will _never_
remove a previously-installed weights file. You will have to do this
manually.
### Installation via the CLI
You can install a new model, including any of the community-supported
ones, via the command-line client's `!import_model` command.
1. First download the desired model weights file and place it under `models/ldm/stable-diffusion-v1/`.
You may rename the weights file to something more memorable if you wish. Record the path of the
weights file (e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`)
2. Launch the `invoke.py` CLI with `python scripts/invoke.py`.
3. At the `invoke>` command-line, enter the command `!import_model <path to model>`.
For example:
`invoke> !import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
(Hint - the CLI supports file path autocompletion. Type a bit of the path
name and hit <tab> in order to get a choice of possible completions.)
4. Follow the wizard's instructions to complete installation as shown in the example
here:
```
invoke> <b>!import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt</b>
>> Model import in process. Please enter the values needed to configure this model:
Name for this model: <b>arabian-nights</b>
Description of this model: <b>Arabian Nights Fine Tune v1.0</b>
Configuration file for this model: <b>configs/stable-diffusion/v1-inference.yaml</b>
Default image width: <b>512</b>
Default image height: <b>512</b>
>> New configuration:
arabian-nights:
config: configs/stable-diffusion/v1-inference.yaml
description: Arabian Nights Fine Tune v1.0
height: 512
weights: models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
width: 512
OK to import [n]? <b>y</b>
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
| LatentDiffusion: Running in eps-prediction mode
| DiffusionWrapper has 859.52 M params.
| Making attention of type 'vanilla' with 512 in_channels
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
| Making attention of type 'vanilla' with 512 in_channels
| Using faster float16 precision
```
If you've previously installed the fine-tune VAE file `vae-ft-mse-840000-ema-pruned.ckpt`,
the wizard will also ask you if you want to add this VAE to the model.
The appropriate entry for this model will be added to `configs/models.yaml` and it will
be available to use in the CLI immediately.
The CLI has additional commands for switching among, viewing, editing,
deleting the available models. These are described in [Command Line
Client](../features/CLI.md#model-selection-and-importation), but the two most
frequently-used are `!models` and `!switch <name of model>`. The first
prints a table of models that InvokeAI knows about and their load
status. The second will load the requested model and lets you switch
back and forth quickly among loaded models.
### Manually editing of `configs/models.yaml`
If you are comfortable with a text editor then you may simply edit
`models.yaml` directly.
First you need to download the desired .ckpt file and place it in
`models/ldm/stable-diffusion-v1` as descirbed in step #1 in the
previous section. Record the path to the weights file,
e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
Then using a **text** editor (e.g. the Windows Notepad application),
open the file `configs/models.yaml`, and add a new stanza that follows
this model:
```
arabian-nights-1.0:
description: A great fine-tune in Arabian Nights style
weights: ./models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
config: ./configs/stable-diffusion/v1-inference.yaml
width: 512
height: 512
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
default: false
```
* arabian-nights-1.0
- This is the name of the model that you will refer to from within the
CLI and the WebGUI when you need to load and use the model.
* description
- Any description that you want to add to the model to remind you what
it is.
* weights
- Relative path to the .ckpt weights file for this model.
* config
- This is the confusingly-named configuration file for the model itself.
Use `./configs/stable-diffusion/v1-inference.yaml` unless the model happens
to need a custom configuration, in which case the place you downloaded it
from will tell you what to use instead. For example, the runwayML custom
inpainting model requires the file `configs/stable-diffusion/v1-inpainting-inference.yaml`.
This is already inclued in the InvokeAI distribution and is configured automatically
for you by the `preload_models.py` script.
* vae
- If you want to add a VAE file to the model, then enter its path here.
* width, height
- This is the width and height of the images used to train the model.
Currently they are always 512 and 512.
Save the `models.yaml` and relaunch InvokeAI. The new model should now be
available for your use.

View File

@ -1,5 +1,5 @@
---
title: Linux
title: Manual Installation, Linux
---
# :fontawesome-brands-linux: Linux
@ -63,24 +63,16 @@ title: Linux
model loading scheme to allow the script to work on GPU machines that are not
internet connected. See [Preload Models](../features/OTHER.md#preload-models)
7. Now you need to install the weights for the stable diffusion model.
7. Install the weights for the stable diffusion model.
- For running with the released weights, you will first need to set up an acount
with [Hugging Face](https://huggingface.co).
- Use your credentials to log in, and then point your browser [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original).
- You may be asked to sign a license agreement at this point.
- Click on "Files and versions" near the top of the page, and then click on the
file named "sd-v1-4.ckpt". You'll be taken to a page that prompts you to click
the "download" link. Save the file somewhere safe on your local machine.
- Sign up at https://huggingface.co
- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
- Accept the terms and click Access Repository
- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
Now run the following commands from within the stable-diffusion directory.
This will create a symbolic link from the stable-diffusion model.ckpt file, to
the true location of the `sd-v1-4.ckpt` file.
```bash
(invokeai) ~/InvokeAI$ mkdir -p models/ldm/stable-diffusion-v1
(invokeai) ~/InvokeAI$ ln -sf /path/to/sd-v1-4.ckpt models/ldm/stable-diffusion-v1/model.ckpt
```
There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
for details.
8. Start generating images!

View File

@ -1,5 +1,5 @@
---
title: macOS
title: Manual Installation, macOS
---
# :fontawesome-brands-apple: macOS
@ -24,9 +24,15 @@ First you need to download a large checkpoint file.
1. Sign up at https://huggingface.co
2. Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
3. Accept the terms and click Access Repository
4. Download [sd-v1-4.ckpt (4.27 GB)](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt) and note where you have saved it (probably the Downloads folder). You may want to move it somewhere else for longer term storage - SD needs this file to run.
4. Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
While that is downloading, open Terminal and run the following commands one at a time, reading the comments and taking care to run the appropriate command for your Mac's architecture (Intel or M1).
There are many other models that you can try. Please see [../features/INSTALLING_MODELS.md]
for details.
While that is downloading, open Terminal and run the following
commands one at a time, reading the comments and taking care to run
the appropriate command for your Mac's architecture (Intel or M1).
!!! todo "Homebrew"

View File

@ -1,5 +1,5 @@
---
title: Windows
title: Manual Installation, Windows
---
# :fontawesome-brands-windows: Windows
@ -83,23 +83,14 @@ in the wiki
8. Now you need to install the weights for the big stable diffusion model.
1. For running with the released weights, you will first need to set up an acount with Hugging Face (https://huggingface.co).
2. Use your credentials to log in, and then point your browser at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
3. You may be asked to sign a license agreement at this point.
4. Click on "Files and versions" near the top of the page, and then click on the file named `sd-v1-4.ckpt`. You'll be taken to a page that
prompts you to click the "download" link. Now save the file somewhere safe on your local machine.
5. The weight file is >4 GB in size, so
downloading may take a while.
- Sign up at https://huggingface.co
- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
- Accept the terms and click Access Repository
- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
Now run the following commands from **within the InvokeAI directory** to copy the weights file to the right place:
```batch
mkdir -p models\ldm\stable-diffusion-v1
copy C:\path\to\sd-v1-4.ckpt models\ldm\stable-diffusion-v1\model.ckpt
```
Please replace `C:\path\to\sd-v1.4.ckpt` with the correct path to wherever you stashed this file. If you prefer not to copy or move the .ckpt file,
you may instead create a shortcut to it from within `models\ldm\stable-diffusion-v1\`.
There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
for details.
9. Start generating images!

View File

@ -227,11 +227,14 @@ class ModelCache(object):
print(' | Using more accurate float32 precision')
# look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
if vae and os.path.exists(vae):
if vae:
if os.path.exists(vae):
print(f' | Loading VAE weights from: {vae}')
vae_ckpt = torch.load(vae, map_location="cpu")
vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
model.first_stage_model.load_state_dict(vae_dict, strict=False)
else:
print(f' | VAE file {vae} not found. Skipping.')
model.to(self.device)
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
@ -281,7 +284,7 @@ class ModelCache(object):
Returns the preamble for the config file.
'''
return '''# This file describes the alternative machine learning models
# available to the dream script.
# available to InvokeAI script.
#
# To add a new model, follow the examples below. Each
# model requires a model config file, a weights file,

View File

@ -1,44 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: val/rec_loss
embed_dim: 16
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 1.0e-06
disc_weight: 0.5
ddconfig:
double_z: true
z_channels: 16
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 6
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,46 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: val/rec_loss
embed_dim: 64
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 1.0e-06
disc_weight: 0.5
ddconfig:
double_z: true
z_channels: 64
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions:
- 16
- 8
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 6
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,41 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: val/rec_loss
embed_dim: 3
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 1.0e-06
disc_weight: 0.5
ddconfig:
double_z: true
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 10
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,42 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: val/rec_loss
embed_dim: 4
lossconfig:
target: ldm.modules.losses.LPIPSWithDiscriminator
params:
disc_start: 50001
kl_weight: 1.0e-06
disc_weight: 0.5
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 4
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,49 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.VQModel
params:
embed_dim: 8
n_embed: 16384
ddconfig:
double_z: false
z_channels: 8
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 16
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: false
disc_in_channels: 3
disc_start: 250001
disc_weight: 0.75
disc_num_layers: 2
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 14
num_workers: 20
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,46 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.VQModel
params:
embed_dim: 3
n_embed: 8192
monitor: val/rec_loss
ddconfig:
attn_type: none
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: false
disc_in_channels: 3
disc_start: 11
disc_weight: 0.75
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 8
num_workers: 12
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
crop_size: 256

View File

@ -1,45 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.VQModel
params:
embed_dim: 3
n_embed: 8192
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: false
disc_in_channels: 3
disc_start: 0
disc_weight: 0.75
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 8
num_workers: 16
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
crop_size: 256

View File

@ -1,48 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.VQModel
params:
embed_dim: 4
n_embed: 256
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 32
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: false
disc_in_channels: 3
disc_start: 250001
disc_weight: 0.75
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 10
num_workers: 20
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,48 +0,0 @@
model:
base_learning_rate: 4.5e-06
target: ldm.models.autoencoder.VQModel
params:
embed_dim: 4
n_embed: 16384
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 32
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: false
disc_in_channels: 3
disc_num_layers: 2
disc_start: 1
disc_weight: 0.6
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 10
num_workers: 20
wrap: true
train:
target: ldm.data.openimages.FullOpenImagesTrain
params:
size: 384
crop_size: 256
validation:
target: ldm.data.openimages.FullOpenImagesValidation
params:
size: 384
crop_size: 256

View File

@ -1,80 +0,0 @@
model:
base_learning_rate: 1.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0155
log_every_t: 100
timesteps: 1000
loss_type: l2
first_stage_key: image
cond_stage_key: LR_image
image_size: 64
channels: 3
concat_mode: true
cond_stage_trainable: false
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 6
out_channels: 3
model_channels: 160
attention_resolutions:
- 16
- 8
num_res_blocks: 2
channel_mult:
- 1
- 2
- 2
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: torch.nn.Identity
data:
target: main.DataModuleFromConfig
params:
batch_size: 64
wrap: false
num_workers: 12
train:
target: ldm.data.openimages.SuperresOpenImagesAdvancedTrain
params:
size: 256
degradation: bsrgan_light
downscale_f: 4
min_crop_f: 0.5
max_crop_f: 1.0
random_crop: true
validation:
target: ldm.data.openimages.SuperresOpenImagesAdvancedValidation
params:
size: 256
degradation: bsrgan_light
downscale_f: 4
min_crop_f: 0.5
max_crop_f: 1.0
random_crop: true

View File

@ -1,70 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: class_label
image_size: 64
channels: 3
cond_stage_trainable: false
concat_mode: false
monitor: val/loss
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 224
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: __is_unconditional__
data:
target: main.DataModuleFromConfig
params:
batch_size: 48
num_workers: 5
wrap: false
train:
target: ldm.data.faceshq.CelebAHQTrain
params:
size: 256
validation:
target: ldm.data.faceshq.CelebAHQValidation
params:
size: 256

View File

@ -1,80 +0,0 @@
model:
base_learning_rate: 1.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: class_label
image_size: 32
channels: 4
cond_stage_trainable: true
conditioning_key: crossattn
monitor: val/loss_simple_ema
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 4
out_channels: 4
model_channels: 256
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
num_head_channels: 32
use_spatial_transformer: true
transformer_depth: 1
context_dim: 512
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 4
n_embed: 16384
ddconfig:
double_z: false
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 2
- 4
num_res_blocks: 2
attn_resolutions:
- 32
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.ClassEmbedder
params:
embed_dim: 512
key: class_label
data:
target: main.DataModuleFromConfig
params:
batch_size: 64
num_workers: 12
wrap: false
train:
target: ldm.data.imagenet.ImageNetTrain
params:
config:
size: 256
validation:
target: ldm.data.imagenet.ImageNetValidation
params:
config:
size: 256

View File

@ -1,70 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: class_label
image_size: 64
channels: 3
cond_stage_trainable: false
concat_mode: false
monitor: val/loss
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 224
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: __is_unconditional__
data:
target: main.DataModuleFromConfig
params:
batch_size: 42
num_workers: 5
wrap: false
train:
target: ldm.data.faceshq.FFHQTrain
params:
size: 256
validation:
target: ldm.data.faceshq.FFHQValidation
params:
size: 256

View File

@ -1,67 +0,0 @@
model:
base_learning_rate: 1.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0205
log_every_t: 100
timesteps: 1000
loss_type: l1
first_stage_key: image
cond_stage_key: masked_image
image_size: 64
channels: 3
concat_mode: true
monitor: val/loss
scheduler_config:
target: ldm.lr_scheduler.LambdaWarmUpCosineScheduler
params:
verbosity_interval: 0
warm_up_steps: 1000
max_decay_steps: 50000
lr_start: 0.001
lr_max: 0.1
lr_min: 0.0001
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 7
out_channels: 3
model_channels: 256
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_heads: 8
resblock_updown: true
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
monitor: val/rec_loss
ddconfig:
attn_type: none
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: ldm.modules.losses.contperceptual.DummyLoss
cond_stage_config: __is_first_stage__

View File

@ -1,81 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0205
log_every_t: 100
timesteps: 1000
loss_type: l1
first_stage_key: image
cond_stage_key: coordinates_bbox
image_size: 64
channels: 3
conditioning_key: crossattn
cond_stage_trainable: true
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 128
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
use_spatial_transformer: true
transformer_depth: 3
context_dim: 512
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.BERTEmbedder
params:
n_embed: 512
n_layer: 16
vocab_size: 8192
max_seq_len: 92
use_tokenizer: false
monitor: val/loss_simple_ema
data:
target: main.DataModuleFromConfig
params:
batch_size: 24
wrap: false
num_workers: 10
train:
target: ldm.data.openimages.OpenImagesBBoxTrain
params:
size: 256
validation:
target: ldm.data.openimages.OpenImagesBBoxValidation
params:
size: 256

View File

@ -1,70 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: class_label
image_size: 64
channels: 3
cond_stage_trainable: false
concat_mode: false
monitor: val/loss
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 224
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 4
num_head_channels: 32
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: __is_unconditional__
data:
target: main.DataModuleFromConfig
params:
batch_size: 48
num_workers: 5
wrap: false
train:
target: ldm.data.lsun.LSUNBedroomsTrain
params:
size: 256
validation:
target: ldm.data.lsun.LSUNBedroomsValidation
params:
size: 256

View File

@ -1,92 +0,0 @@
model:
base_learning_rate: 5.0e-05
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0155
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
loss_type: l1
first_stage_key: image
cond_stage_key: image
image_size: 32
channels: 4
cond_stage_trainable: false
concat_mode: false
scale_by_std: true
monitor: val/loss_simple_ema
scheduler_config:
target: ldm.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps:
- 10000
cycle_lengths:
- 10000000000000
f_start:
- 1.0e-06
f_max:
- 1.0
f_min:
- 1.0
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 4
out_channels: 4
model_channels: 192
attention_resolutions:
- 1
- 2
- 4
- 8
num_res_blocks: 2
channel_mult:
- 1
- 2
- 2
- 4
- 4
num_heads: 8
use_scale_shift_norm: true
resblock_updown: true
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config: '__is_unconditional__'
data:
target: main.DataModuleFromConfig
params:
batch_size: 96
num_workers: 5
wrap: false
train:
target: ldm.data.lsun.LSUNChurchesTrain
params:
size: 256
validation:
target: ldm.data.lsun.LSUNChurchesValidation
params:
size: 256

View File

@ -1,59 +0,0 @@
model:
base_learning_rate: 1.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0205
log_every_t: 100
timesteps: 1000
loss_type: l1
first_stage_key: image
cond_stage_key: segmentation
image_size: 64
channels: 3
concat_mode: true
cond_stage_trainable: true
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 6
out_channels: 3
model_channels: 128
attention_resolutions:
- 32
- 16
- 8
num_res_blocks: 2
channel_mult:
- 1
- 4
- 8
num_heads: 8
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.SpatialRescaler
params:
n_stages: 2
in_channels: 182
out_channels: 3

View File

@ -1,78 +0,0 @@
model:
base_learning_rate: 1.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0205
log_every_t: 100
timesteps: 1000
loss_type: l1
first_stage_key: image
cond_stage_key: segmentation
image_size: 128
channels: 3
concat_mode: true
cond_stage_trainable: true
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 128
in_channels: 6
out_channels: 3
model_channels: 128
attention_resolutions:
- 32
- 16
- 8
num_res_blocks: 2
channel_mult:
- 1
- 4
- 8
num_heads: 8
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
monitor: val/rec_loss
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.SpatialRescaler
params:
n_stages: 2
in_channels: 182
out_channels: 3
data:
target: main.DataModuleFromConfig
params:
batch_size: 8
wrap: false
num_workers: 10
train:
target: ldm.data.landscapes.RFWTrain
params:
size: 768
crop_size: 512
segmentation_to_float32: true
validation:
target: ldm.data.landscapes.RFWValidation
params:
size: 768
crop_size: 512
segmentation_to_float32: true

View File

@ -0,0 +1,2 @@
See docs/features/INSTALLING_MODELS.md for how to populate this
directory with one or more Stable Diffusion model weight files.

View File

@ -1,77 +0,0 @@
model:
base_learning_rate: 2.0e-06
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.0015
linear_end: 0.0195
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 64
channels: 3
cond_stage_trainable: true
conditioning_key: crossattn
monitor: val/loss_simple_ema
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 64
in_channels: 3
out_channels: 3
model_channels: 192
attention_resolutions:
- 8
- 4
- 2
num_res_blocks: 2
channel_mult:
- 1
- 2
- 3
- 5
num_head_channels: 32
use_spatial_transformer: true
transformer_depth: 1
context_dim: 640
first_stage_config:
target: ldm.models.autoencoder.VQModelInterface
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.BERTEmbedder
params:
n_embed: 640
n_layer: 32
data:
target: main.DataModuleFromConfig
params:
batch_size: 28
num_workers: 10
wrap: false
train:
target: ldm.data.previews.pytorch_dataset.PreviewsTrain
params:
size: 256
validation:
target: ldm.data.previews.pytorch_dataset.PreviewsValidation
params:
size: 256

View File

@ -3,20 +3,369 @@
# Before running stable-diffusion on an internet-isolated machine,
# run this script from one with internet connectivity. The
# two machines must share a common .cache directory.
from transformers import CLIPTokenizer, CLIPTextModel
#
# Coauthor: Kevin Turner http://github.com/keturn
#
print('Loading Python libraries...\n')
import argparse
import clip
from transformers import BertTokenizerFast, AutoFeatureExtractor
import sys
import transformers
import os
import warnings
import torch
import urllib.request
import zipfile
import traceback
import getpass
import requests
from urllib import request
from tqdm import tqdm
from omegaconf import OmegaConf
from pathlib import Path
from transformers import CLIPTokenizer, CLIPTextModel
from transformers import BertTokenizerFast, AutoFeatureExtractor
from huggingface_hub import hf_hub_download, HfFolder, hf_hub_url
transformers.logging.set_verbosity_error()
#--------------------------globals--
Model_dir = './models/ldm/stable-diffusion-v1/'
Config_file = './configs/models.yaml'
SD_Configs = './configs/stable-diffusion'
Datasets = {
'stable-diffusion-1.5': {
'description': 'The newest Stable Diffusion version 1.5 weight file (4.27 GB)',
'repo_id': 'runwayml/stable-diffusion-v1-5',
'config': 'v1-inference.yaml',
'file': 'v1-5-pruned-emaonly.ckpt',
'recommended': True,
'width': 512,
'height': 512,
},
'inpainting-1.5': {
'description': 'RunwayML SD 1.5 model optimized for inpainting (4.27 GB)',
'repo_id': 'runwayml/stable-diffusion-inpainting',
'config': 'v1-inpainting-inference.yaml',
'file': 'sd-v1-5-inpainting.ckpt',
'recommended': True,
'width': 512,
'height': 512,
},
'stable-diffusion-1.4': {
'description': 'The original Stable Diffusion version 1.4 weight file (4.27 GB)',
'repo_id': 'CompVis/stable-diffusion-v-1-4-original',
'config': 'v1-inference.yaml',
'file': 'sd-v1-4.ckpt',
'recommended': False,
'width': 512,
'height': 512,
},
'waifu-diffusion-1.3': {
'description': 'Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)',
'repo_id': 'hakurei/waifu-diffusion-v1-3',
'config': 'v1-inference.yaml',
'file': 'model-epoch09-float32.ckpt',
'recommended': False,
'width': 512,
'height': 512,
},
'ft-mse-improved-autoencoder-840000': {
'description': 'StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB)',
'repo_id': 'stabilityai/sd-vae-ft-mse-original',
'config': 'VAE',
'file': 'vae-ft-mse-840000-ema-pruned.ckpt',
'recommended': True,
'width': 512,
'height': 512,
},
}
Config_preamble = '''# This file describes the alternative machine learning models
# available to InvokeAI script.
#
# To add a new model, follow the examples below. Each
# model requires a model config file, a weights file,
# and the width and height of the images it
# was trained on.
'''
#---------------------------------------------
def introduction():
print(
'''Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
and other large models that are needed for text to image generation. At any point you may interrupt
this program and resume later.\n'''
)
#--------------------------------------------
def postscript():
print(
'''You're all set! You may now launch InvokeAI using one of these two commands:
Web version:
python scripts/invoke.py --web (connect to http://localhost:9090)
Command-line version:
python scripts/invoke.py
Have fun!
'''
)
#---------------------------------------------
def yes_or_no(prompt:str, default_yes=True):
default = "y" if default_yes else 'n'
response = input(f'{prompt} [{default}] ') or default
if default_yes:
return response[0] not in ('n','N')
else:
return response[0] in ('y','Y')
#---------------------------------------------
def user_wants_to_download_weights()->str:
'''
Returns one of "skip", "recommended" or "customized"
'''
print('''You can download and configure the weights files manually or let this
script do it for you. Manual installation is described at:
https://github.com/invoke-ai/InvokeAI/blob/main/docs/installation/INSTALLING_MODELS.md
You may download the recommended models (about 10GB total), select a customized set, or
completely skip this step.
'''
)
selection = None
while selection is None:
choice = input('Download <r>ecommended models, <c>ustomize the list, or <s>kip this step? [r]: ')
if choice.startswith(('r','R')) or len(choice)==0:
selection = 'recommended'
elif choice.startswith(('c','C')):
selection = 'customized'
elif choice.startswith(('s','S')):
selection = 'skip'
return selection
#---------------------------------------------
def select_datasets(action:str):
done = False
while not done:
datasets = dict()
dflt = None # the first model selected will be the default; TODO let user change
counter = 1
if action == 'customized':
print('''
Choose the weight file(s) you wish to download. Before downloading you
will be given the option to view and change your selections.
'''
)
for ds in Datasets.keys():
recommended = '(recommended)' if Datasets[ds]['recommended'] else ''
print(f'[{counter}] {ds}:\n {Datasets[ds]["description"]} {recommended}')
if yes_or_no(' Download?',default_yes=Datasets[ds]['recommended']):
datasets[ds]=counter
counter += 1
else:
for ds in Datasets.keys():
if Datasets[ds]['recommended']:
datasets[ds]=counter
counter += 1
print('The following weight files will be downloaded:')
for ds in datasets:
dflt = '*' if dflt is None else ''
print(f' [{datasets[ds]}] {ds}{dflt}')
print("*default")
ok_to_download = yes_or_no('Ok to download?')
if not ok_to_download:
if yes_or_no('Change your selection?'):
action = 'customized'
pass
else:
done = True
else:
done = True
return datasets if ok_to_download else None
#-------------------------------Authenticate against Hugging Face
def authenticate():
print('''
To download the Stable Diffusion weight files from the official Hugging Face
repository, you need to read and accept the CreativeML Responsible AI license.
This involves a few easy steps.
1. If you have not already done so, create an account on Hugging Face's web site
using the "Sign Up" button:
https://huggingface.co/join
You will need to verify your email address as part of the HuggingFace
registration process.
2. Log into your Hugging Face account:
https://huggingface.co/login
3. Accept the license terms located here:
https://huggingface.co/runwayml/stable-diffusion-v1-5
and here:
https://huggingface.co/runwayml/stable-diffusion-inpainting
(Yes, you have to accept two slightly different license agreements)
'''
)
input('Press <enter> when you are ready to continue:')
access_token = HfFolder.get_token()
if access_token is None:
print('''
4. Thank you! The last step is to enter your HuggingFace access token so that
this script is authorized to initiate the download. Go to the access tokens
page of your Hugging Face account and create a token by clicking the
"New token" button:
https://huggingface.co/settings/tokens
(You can enter anything you like in the token creation field marked "Name".
"Role" should be "read").
Now copy the token to your clipboard and paste it here: '''
)
access_token = getpass.getpass()
HfFolder.save_token(access_token)
return access_token
#---------------------------------------------
# look for legacy model.ckpt in models directory and offer to
# normalize its name
def migrate_models_ckpt():
if not os.path.exists(os.path.join(Model_dir,'model.ckpt')):
return
new_name = Datasets['stable-diffusion-1.4']['file']
print('You seem to have the Stable Diffusion v4.1 "model.ckpt" already installed.')
rename = yes_or_no(f'Ok to rename it to "{new_name}" for future reference?')
if rename:
print(f'model.ckpt => {new_name}')
os.rename(os.path.join(Model_dir,'model.ckpt'),os.path.join(Model_dir,new_name))
#---------------------------------------------
def download_weight_datasets(models:dict, access_token:str):
migrate_models_ckpt()
successful = dict()
for mod in models.keys():
repo_id = Datasets[mod]['repo_id']
filename = Datasets[mod]['file']
success = download_with_resume(
repo_id=repo_id,
model_name=filename,
access_token=access_token
)
if success:
successful[mod] = True
keys = ', '.join(successful.keys())
print(f'Successfully installed {keys}')
return successful
#---------------------------------------------
def download_with_resume(repo_id:str, model_name:str, access_token:str)->bool:
model_dest = os.path.join(Model_dir, model_name)
os.makedirs(os.path.dirname(model_dest), exist_ok=True)
url = hf_hub_url(repo_id, model_name)
header = {"Authorization": f'Bearer {access_token}'}
open_mode = 'wb'
exist_size = 0
if os.path.exists(model_dest):
exist_size = os.path.getsize(model_dest)
header['Range'] = f'bytes={exist_size}-'
open_mode = 'ab'
resp = requests.get(url, headers=header, stream=True)
total = int(resp.headers.get('content-length', 0))
if resp.status_code==416: # "range not satisfiable", which means nothing to return
print(f'* {model_name}: complete file found. Skipping.')
return True
elif exist_size > 0:
print(f'* {model_name}: partial file found. Resuming...')
else:
print(f'* {model_name}: Downloading...')
try:
if total < 2000:
print(f'* {model_name}: {resp.text}')
return False
with open(model_dest, open_mode) as file, tqdm(
desc=model_name,
initial=exist_size,
total=total+exist_size,
unit='iB',
unit_scale=True,
unit_divisor=1000,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
except Exception as e:
print(f'An error occurred while downloading {model_name}: {str(e)}')
return False
return True
#---------------------------------------------
def update_config_file(successfully_downloaded:dict):
try:
yaml = new_config_file_contents(successfully_downloaded)
tmpfile = os.path.join(os.path.dirname(Config_file),'new_config.tmp')
with open(tmpfile, 'w') as outfile:
outfile.write(Config_preamble)
outfile.write(yaml)
os.rename(tmpfile,Config_file)
except Exception as e:
print(f'**Error creating config file {Config_file}: {str(e)} **')
return
print(f'Successfully created new configuration file {Config_file}')
#---------------------------------------------
def new_config_file_contents(successfully_downloaded:dict)->str:
conf = OmegaConf.load(Config_file)
# find the VAE file, if there is one
vae = None
default_selected = False
for model in successfully_downloaded:
if Datasets[model]['config'] == 'VAE':
vae = Datasets[model]['file']
for model in successfully_downloaded:
if Datasets[model]['config'] == 'VAE': # skip VAE entries
continue
stanza = conf[model] if model in conf else { }
stanza['description'] = Datasets[model]['description']
stanza['weights'] = os.path.join(Model_dir,Datasets[model]['file'])
stanza['config'] =os.path.join(SD_Configs, Datasets[model]['config'])
stanza['width'] = Datasets[model]['width']
stanza['height'] = Datasets[model]['height']
stanza.pop('default',None) # this will be set later
if vae:
stanza['vae'] = os.path.join(Model_dir,vae)
# BUG - the first stanza is always the default. User should select.
if not default_selected:
stanza['default'] = True
default_selected = True
conf[model] = stanza
return OmegaConf.to_yaml(conf)
#---------------------------------------------
# this will preload the Bert tokenizer fles
def download_bert():
@ -66,7 +415,6 @@ def download_gfpgan():
print(traceback.format_exc())
print('Loading models from GFPGAN')
import urllib.request
for model in (
[
'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth',
@ -152,6 +500,31 @@ def download_safety_checker():
#-------------------------------------
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='InvokeAI model downloader')
parser.add_argument('--interactive',
dest='interactive',
action=argparse.BooleanOptionalAction,
default=True,
help='run in interactive mode (default)')
opt = parser.parse_args()
try:
if opt.interactive:
introduction()
print('** WEIGHT SELECTION **')
choice = user_wants_to_download_weights()
if choice != 'skip':
models = select_datasets(choice)
if models is None:
if yes_or_no('Quit?',default_yes=False):
sys.exit(0)
print('** LICENSE AGREEMENT FOR WEIGHT FILES **')
access_token = authenticate()
print('\n** DOWNLOADING WEIGHTS **')
successfully_downloaded = download_weight_datasets(models, access_token)
update_config_file(successfully_downloaded)
else:
print('\n** DOWNLOADING SUPPORT MODELS **')
download_bert()
download_kornia()
download_clip()
@ -159,5 +532,11 @@ if __name__ == '__main__':
download_codeformer()
download_clipseg()
download_safety_checker()
postscript()
except KeyboardInterrupt:
print('\nGoodbye! Come back soon.')
except Exception as e:
print(f'\nA problem occurred during download.\nThe error was: "{str(e)}"')