mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
preload_models interactively downloads sd model files
This commit is contained in:
commit
e7368d7231
4
.github/workflows/test-invoke-conda.yml
vendored
4
.github/workflows/test-invoke-conda.yml
vendored
@ -84,7 +84,9 @@ jobs:
|
||||
|
||||
- name: run preload_models.py
|
||||
id: run-preload-models
|
||||
run: python scripts/preload_models.py
|
||||
run: |
|
||||
python scripts/preload_models.py \
|
||||
--no-interactive
|
||||
|
||||
- name: Run the tests
|
||||
id: run-tests
|
||||
|
8
.gitignore
vendored
8
.gitignore
vendored
@ -199,7 +199,13 @@ checkpoints
|
||||
.scratch/
|
||||
.vscode/
|
||||
gfpgan/
|
||||
models/ldm/stable-diffusion-v1/model.sha256
|
||||
models/ldm/stable-diffusion-v1/*.sha256
|
||||
|
||||
# GFPGAN model files
|
||||
gfpgan/
|
||||
|
||||
# config file (will be created by installer)
|
||||
configs/models.yaml
|
||||
|
||||
# weights (will be created by installer)
|
||||
models/ldm/stable-diffusion-v1/*.ckpt
|
@ -1,54 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-6
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: "val/rec_loss"
|
||||
embed_dim: 16
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 0.000001
|
||||
disc_weight: 0.5
|
||||
|
||||
ddconfig:
|
||||
double_z: True
|
||||
z_channels: 16
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: [16]
|
||||
dropout: 0.0
|
||||
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 12
|
||||
wrap: True
|
||||
train:
|
||||
target: ldm.data.imagenet.ImageNetSRTrain
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
validation:
|
||||
target: ldm.data.imagenet.ImageNetSRValidation
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 1000
|
||||
max_images: 8
|
||||
increase_log_steps: True
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
||||
accumulate_grad_batches: 2
|
@ -1,53 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-6
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: "val/rec_loss"
|
||||
embed_dim: 4
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 0.000001
|
||||
disc_weight: 0.5
|
||||
|
||||
ddconfig:
|
||||
double_z: True
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: [ ]
|
||||
dropout: 0.0
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 12
|
||||
wrap: True
|
||||
train:
|
||||
target: ldm.data.imagenet.ImageNetSRTrain
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
validation:
|
||||
target: ldm.data.imagenet.ImageNetSRValidation
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 1000
|
||||
max_images: 8
|
||||
increase_log_steps: True
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
||||
accumulate_grad_batches: 2
|
@ -1,54 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-6
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: "val/rec_loss"
|
||||
embed_dim: 3
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 0.000001
|
||||
disc_weight: 0.5
|
||||
|
||||
ddconfig:
|
||||
double_z: True
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: [ ]
|
||||
dropout: 0.0
|
||||
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 12
|
||||
wrap: True
|
||||
train:
|
||||
target: ldm.data.imagenet.ImageNetSRTrain
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
validation:
|
||||
target: ldm.data.imagenet.ImageNetSRValidation
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 1000
|
||||
max_images: 8
|
||||
increase_log_steps: True
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
||||
accumulate_grad_batches: 2
|
@ -1,53 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-6
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: "val/rec_loss"
|
||||
embed_dim: 64
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 0.000001
|
||||
disc_weight: 0.5
|
||||
|
||||
ddconfig:
|
||||
double_z: True
|
||||
z_channels: 64
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult: [ 1,1,2,2,4,4] # num_down = len(ch_mult)-1
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: [16,8]
|
||||
dropout: 0.0
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 12
|
||||
wrap: True
|
||||
train:
|
||||
target: ldm.data.imagenet.ImageNetSRTrain
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
validation:
|
||||
target: ldm.data.imagenet.ImageNetSRValidation
|
||||
params:
|
||||
size: 256
|
||||
degradation: pil_nearest
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 1000
|
||||
max_images: 8
|
||||
increase_log_steps: True
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
||||
accumulate_grad_batches: 2
|
@ -1,86 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
image_size: 64
|
||||
channels: 3
|
||||
monitor: val/loss_simple_ema
|
||||
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 224
|
||||
attention_resolutions:
|
||||
# note: this isn\t actually the resolution but
|
||||
# the downsampling factor, i.e. this corresnponds to
|
||||
# attention on spatial resolution 8,16,32, as the
|
||||
# spatial reolution of the latents is 64 for f4
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ckpt_path: models/first_stage_models/vq-f4/model.ckpt
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config: __is_unconditional__
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 48
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: taming.data.faceshq.CelebAHQTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: taming.data.faceshq.CelebAHQValidation
|
||||
params:
|
||||
size: 256
|
||||
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 5000
|
||||
max_images: 8
|
||||
increase_log_steps: False
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
@ -1,98 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: class_label
|
||||
image_size: 32
|
||||
channels: 4
|
||||
cond_stage_trainable: true
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 32
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 256
|
||||
attention_resolutions:
|
||||
#note: this isn\t actually the resolution but
|
||||
# the downsampling factor, i.e. this corresnponds to
|
||||
# attention on spatial resolution 8,16,32, as the
|
||||
# spatial reolution of the latents is 32 for f8
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 1
|
||||
context_dim: 512
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 4
|
||||
n_embed: 16384
|
||||
ckpt_path: configs/first_stage_models/vq-f8/model.yaml
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 32
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.ClassEmbedder
|
||||
params:
|
||||
embed_dim: 512
|
||||
key: class_label
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 64
|
||||
num_workers: 12
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.imagenet.ImageNetTrain
|
||||
params:
|
||||
config:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.imagenet.ImageNetValidation
|
||||
params:
|
||||
config:
|
||||
size: 256
|
||||
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 5000
|
||||
max_images: 8
|
||||
increase_log_steps: False
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
@ -1,68 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 0.0001
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: class_label
|
||||
image_size: 64
|
||||
channels: 3
|
||||
cond_stage_trainable: true
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss
|
||||
use_ema: False
|
||||
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 192
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 5
|
||||
num_heads: 1
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 1
|
||||
context_dim: 512
|
||||
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.ClassEmbedder
|
||||
params:
|
||||
n_classes: 1001
|
||||
embed_dim: 512
|
||||
key: class_label
|
@ -1,85 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
image_size: 64
|
||||
channels: 3
|
||||
monitor: val/loss_simple_ema
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 224
|
||||
attention_resolutions:
|
||||
# note: this isn\t actually the resolution but
|
||||
# the downsampling factor, i.e. this corresnponds to
|
||||
# attention on spatial resolution 8,16,32, as the
|
||||
# spatial reolution of the latents is 64 for f4
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ckpt_path: configs/first_stage_models/vq-f4/model.yaml
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config: __is_unconditional__
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 42
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: taming.data.faceshq.FFHQTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: taming.data.faceshq.FFHQValidation
|
||||
params:
|
||||
size: 256
|
||||
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 5000
|
||||
max_images: 8
|
||||
increase_log_steps: False
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
@ -1,85 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
image_size: 64
|
||||
channels: 3
|
||||
monitor: val/loss_simple_ema
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 224
|
||||
attention_resolutions:
|
||||
# note: this isn\t actually the resolution but
|
||||
# the downsampling factor, i.e. this corresnponds to
|
||||
# attention on spatial resolution 8,16,32, as the
|
||||
# spatial reolution of the latents is 64 for f4
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
ckpt_path: configs/first_stage_models/vq-f4/model.yaml
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config: __is_unconditional__
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 48
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.lsun.LSUNBedroomsTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.lsun.LSUNBedroomsValidation
|
||||
params:
|
||||
size: 256
|
||||
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 5000
|
||||
max_images: 8
|
||||
increase_log_steps: False
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
@ -1,91 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 5.0e-5 # set to target_lr by starting main.py with '--scale_lr False'
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0155
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
loss_type: l1
|
||||
first_stage_key: "image"
|
||||
cond_stage_key: "image"
|
||||
image_size: 32
|
||||
channels: 4
|
||||
cond_stage_trainable: False
|
||||
concat_mode: False
|
||||
scale_by_std: True
|
||||
monitor: 'val/loss_simple_ema'
|
||||
|
||||
scheduler_config: # 10000 warmup steps
|
||||
target: ldm.lr_scheduler.LambdaLinearScheduler
|
||||
params:
|
||||
warm_up_steps: [10000]
|
||||
cycle_lengths: [10000000000000]
|
||||
f_start: [1.e-6]
|
||||
f_max: [1.]
|
||||
f_min: [ 1.]
|
||||
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 32
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 192
|
||||
attention_resolutions: [ 1, 2, 4, 8 ] # 32, 16, 8, 4
|
||||
num_res_blocks: 2
|
||||
channel_mult: [ 1,2,2,4,4 ] # 32, 16, 8, 4, 2
|
||||
num_heads: 8
|
||||
use_scale_shift_norm: True
|
||||
resblock_updown: True
|
||||
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
embed_dim: 4
|
||||
monitor: "val/rec_loss"
|
||||
ckpt_path: "models/first_stage_models/kl-f8/model.ckpt"
|
||||
ddconfig:
|
||||
double_z: True
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: [ ]
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config: "__is_unconditional__"
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 96
|
||||
num_workers: 5
|
||||
wrap: False
|
||||
train:
|
||||
target: ldm.data.lsun.LSUNChurchesTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.lsun.LSUNChurchesValidation
|
||||
params:
|
||||
size: 256
|
||||
|
||||
lightning:
|
||||
callbacks:
|
||||
image_logger:
|
||||
target: main.ImageLogger
|
||||
params:
|
||||
batch_frequency: 5000
|
||||
max_images: 8
|
||||
increase_log_steps: False
|
||||
|
||||
|
||||
trainer:
|
||||
benchmark: True
|
@ -1,71 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 5.0e-05
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.00085
|
||||
linear_end: 0.012
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: caption
|
||||
image_size: 32
|
||||
channels: 4
|
||||
cond_stage_trainable: true
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
scale_factor: 0.18215
|
||||
use_ema: False
|
||||
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 32
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 320
|
||||
attention_resolutions:
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_heads: 8
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 1
|
||||
context_dim: 1280
|
||||
use_checkpoint: true
|
||||
legacy: False
|
||||
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
embed_dim: 4
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.BERTEmbedder
|
||||
params:
|
||||
n_embed: 1280
|
||||
n_layer: 32
|
@ -1,29 +1,36 @@
|
||||
# This file describes the alternative machine learning models
|
||||
# available to the dream script.
|
||||
# available to InvokeAI script.
|
||||
#
|
||||
# To add a new model, follow the examples below. Each
|
||||
# model requires a model config file, a weights file,
|
||||
# and the width and height of the images it
|
||||
# was trained on.
|
||||
stable-diffusion-1.4:
|
||||
config: configs/stable-diffusion/v1-inference.yaml
|
||||
weights: models/ldm/stable-diffusion-v1/model.ckpt
|
||||
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
description: Stable Diffusion inference model version 1.4
|
||||
width: 512
|
||||
height: 512
|
||||
default: true
|
||||
inpainting-1.5:
|
||||
description: runwayML tuned inpainting model v1.5
|
||||
weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
|
||||
config: configs/stable-diffusion/v1-inpainting-inference.yaml
|
||||
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||
weights: ./models/ldm/stable-diffusion-v1/sd-v1-4.ckpt
|
||||
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
description: The original Stable Diffusion version 1.4 weight file (4.27 GB)
|
||||
width: 512
|
||||
height: 512
|
||||
stable-diffusion-1.5:
|
||||
config: configs/stable-diffusion/v1-inference.yaml
|
||||
weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
|
||||
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
description: Stable Diffusion inference model version 1.5
|
||||
description: The newest Stable Diffusion version 1.5 weight file (4.27 GB)
|
||||
weights: ./models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
|
||||
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||
width: 512
|
||||
height: 512
|
||||
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
default: true
|
||||
inpainting-1.5:
|
||||
description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB)
|
||||
weights: ./models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
|
||||
config: ./configs/stable-diffusion/v1-inpainting-inference.yaml
|
||||
width: 512
|
||||
height: 512
|
||||
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
waifu-diffusion-1.3:
|
||||
description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)
|
||||
weights: ./models/ldm/stable-diffusion-v1/model-epoch09-float32.ckpt
|
||||
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||
width: 512
|
||||
height: 512
|
||||
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
|
@ -1,68 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 0.0001
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.015
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: jpg
|
||||
cond_stage_key: nix
|
||||
image_size: 48
|
||||
channels: 16
|
||||
cond_stage_trainable: false
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
scale_by_std: false
|
||||
scale_factor: 0.22765929
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 48
|
||||
in_channels: 16
|
||||
out_channels: 16
|
||||
model_channels: 448
|
||||
attention_resolutions:
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
use_scale_shift_norm: false
|
||||
resblock_updown: false
|
||||
num_head_channels: 32
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 1
|
||||
context_dim: 768
|
||||
use_checkpoint: true
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: val/rec_loss
|
||||
embed_dim: 16
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 16
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 16
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: torch.nn.Identity
|
@ -385,7 +385,7 @@ automatically.
|
||||
Example:
|
||||
|
||||
<pre>
|
||||
invoke> <b>!import_model models/ldm/stable-diffusion-v1/ model-epoch08-float16.ckpt</b>
|
||||
invoke> <b>!import_model models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt</b>
|
||||
>> Model import in process. Please enter the values needed to configure this model:
|
||||
|
||||
Name for this model: <b>waifu-diffusion</b>
|
||||
|
267
docs/installation/INSTALLING_MODELS.md
Normal file
267
docs/installation/INSTALLING_MODELS.md
Normal file
@ -0,0 +1,267 @@
|
||||
---
|
||||
title: Installing Models
|
||||
---
|
||||
|
||||
# :octicons-paintbrush-16: Installing Models
|
||||
|
||||
## Model Weight Files
|
||||
|
||||
The model weight files ('*.ckpt') are the Stable Diffusion "secret
|
||||
sauce". They are the product of training the AI on millions of
|
||||
captioned images gathered from multiple sources.
|
||||
|
||||
Originally there was only a single Stable Diffusion weights file,
|
||||
which many people named `model.ckpt`. Now there are dozens or more
|
||||
that have been "fine tuned" to provide particulary styles, genres, or
|
||||
other features. InvokeAI allows you to install and run multiple model
|
||||
weight files and switch between them quickly in the command-line and
|
||||
web interfaces.
|
||||
|
||||
This manual will guide you through installing and configuring model
|
||||
weight files.
|
||||
|
||||
## Base Models
|
||||
|
||||
InvokeAI comes with support for a good initial set of models listed in
|
||||
the model configuration file `configs/models.yaml`. They are:
|
||||
|
||||
| Model | Weight File | Description | DOWNLOAD FROM |
|
||||
| ---------------------- | ----------------------------- |--------------------------------- | ----------------|
|
||||
| stable-diffusion-1.5 | v1-5-pruned-emaonly.ckpt | Most recent version of base Stable Diffusion model| https://huggingface.co/runwayml/stable-diffusion-v1-5 |
|
||||
| stable-diffusion-1.4 | sd-v1-4.ckpt | Previous version of base Stable Diffusion model | https://huggingface.co/CompVis/stable-diffusion-v-1-4-original |
|
||||
| inpainting-1.5 | sd-v1-5-inpainting.ckpt | Stable Diffusion 1.5 model specialized for inpainting | https://huggingface.co/runwayml/stable-diffusion-inpainting |
|
||||
| waifu-diffusion-1.3 | model-epoch09-float32.ckpt | Stable Diffusion 1.4 trained to produce anime images | https://huggingface.co/hakurei/waifu-diffusion-v1-3 |
|
||||
| <all models> | vae-ft-mse-840000-ema-pruned.ckpt | A fine-tune file add-on file that improves face generation | https://huggingface.co/stabilityai/sd-vae-ft-mse-original/ |
|
||||
|
||||
|
||||
Note that these files are covered by an "Ethical AI" license which
|
||||
forbids certain uses. You will need to create an account on the
|
||||
Hugging Face website and accept the license terms before you can
|
||||
access the files.
|
||||
|
||||
The predefined configuration file for InvokeAI (located at
|
||||
`configs/models.yaml`) provides entries for each of these weights
|
||||
files. `stable-diffusion-1.5` is the default model used, and we
|
||||
strongly recommend that you install this weights file if nothing else.
|
||||
|
||||
## Community-Contributed Models
|
||||
|
||||
There are too many to list here and more are being contributed every
|
||||
day. Hugging Face maintains a [fast-growing
|
||||
repository](https://huggingface.co/sd-concepts-library) of fine-tune
|
||||
(".bin") models that can be imported into InvokeAI by passing the
|
||||
`--embedding_path` option to the `invoke.py` command.
|
||||
|
||||
[This page](https://rentry.org/sdmodels) hosts a large list of
|
||||
official and unofficial Stable Diffusion models and where they can be
|
||||
obtained.
|
||||
|
||||
## Installation
|
||||
|
||||
There are three ways to install weights files:
|
||||
|
||||
1. During InvokeAI installation, the `preload_models.py` script can
|
||||
download them for you.
|
||||
|
||||
2. You can use the command-line interface (CLI) to import, configure
|
||||
and modify new models files.
|
||||
|
||||
3. You can download the files manually and add the appropriate entries
|
||||
to `models.yaml`.
|
||||
|
||||
### Installation via `preload_models.py`
|
||||
|
||||
This is the most automatic way. Run `scripts/preload_models.py` from
|
||||
the console. It will ask you to select which models to download and
|
||||
lead you through the steps of setting up a Hugging Face account if you
|
||||
haven't done so already.
|
||||
|
||||
To start, from within the InvokeAI directory run the command `python
|
||||
scripts/preload_models.py` (Linux/MacOS) or `python
|
||||
scripts\preload_models.py` (Windows):
|
||||
|
||||
```
|
||||
Loading Python libraries...
|
||||
|
||||
** INTRODUCTION **
|
||||
Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
|
||||
and other large models that are needed for text to image generation. At any point you may interrupt
|
||||
this program and resume later.
|
||||
|
||||
** WEIGHT SELECTION **
|
||||
Would you like to download the Stable Diffusion model weights now? [y]
|
||||
|
||||
Choose the weight file(s) you wish to download. Before downloading you
|
||||
will be given the option to view and change your selections.
|
||||
|
||||
[1] stable-diffusion-1.5:
|
||||
The newest Stable Diffusion version 1.5 weight file (4.27 GB) (recommended)
|
||||
Download? [y]
|
||||
[2] inpainting-1.5:
|
||||
RunwayML SD 1.5 model optimized for inpainting (4.27 GB) (recommended)
|
||||
Download? [y]
|
||||
[3] stable-diffusion-1.4:
|
||||
The original Stable Diffusion version 1.4 weight file (4.27 GB)
|
||||
Download? [n] n
|
||||
[4] waifu-diffusion-1.3:
|
||||
Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)
|
||||
Download? [n] y
|
||||
[5] ft-mse-improved-autoencoder-840000:
|
||||
StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) (recommended)
|
||||
Download? [y] y
|
||||
The following weight files will be downloaded:
|
||||
[1] stable-diffusion-1.5*
|
||||
[2] inpainting-1.5
|
||||
[4] waifu-diffusion-1.3
|
||||
[5] ft-mse-improved-autoencoder-840000
|
||||
*default
|
||||
Ok to download? [y]
|
||||
** LICENSE AGREEMENT FOR WEIGHT FILES **
|
||||
|
||||
1. To download the Stable Diffusion weight files you need to read and accept the
|
||||
CreativeML Responsible AI license. If you have not already done so, please
|
||||
create an account using the "Sign Up" button:
|
||||
|
||||
https://huggingface.co
|
||||
|
||||
You will need to verify your email address as part of the HuggingFace
|
||||
registration process.
|
||||
|
||||
2. After creating the account, login under your account and accept
|
||||
the license terms located here:
|
||||
|
||||
https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
|
||||
|
||||
Press <enter> when you are ready to continue:
|
||||
...
|
||||
```
|
||||
|
||||
When the script is complete, you will find the downloaded weights
|
||||
files in `models/ldm/stable-diffusion-v1` and a matching configuration
|
||||
file in `configs/models.yaml`.
|
||||
|
||||
You can run the script again to add any models you didn't select the
|
||||
first time. Note that as a safety measure the script will _never_
|
||||
remove a previously-installed weights file. You will have to do this
|
||||
manually.
|
||||
|
||||
### Installation via the CLI
|
||||
|
||||
You can install a new model, including any of the community-supported
|
||||
ones, via the command-line client's `!import_model` command.
|
||||
|
||||
1. First download the desired model weights file and place it under `models/ldm/stable-diffusion-v1/`.
|
||||
You may rename the weights file to something more memorable if you wish. Record the path of the
|
||||
weights file (e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`)
|
||||
|
||||
2. Launch the `invoke.py` CLI with `python scripts/invoke.py`.
|
||||
|
||||
3. At the `invoke>` command-line, enter the command `!import_model <path to model>`.
|
||||
For example:
|
||||
|
||||
`invoke> !import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
|
||||
|
||||
(Hint - the CLI supports file path autocompletion. Type a bit of the path
|
||||
name and hit <tab> in order to get a choice of possible completions.)
|
||||
|
||||
4. Follow the wizard's instructions to complete installation as shown in the example
|
||||
here:
|
||||
|
||||
```
|
||||
invoke> <b>!import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt</b>
|
||||
>> Model import in process. Please enter the values needed to configure this model:
|
||||
|
||||
Name for this model: <b>arabian-nights</b>
|
||||
Description of this model: <b>Arabian Nights Fine Tune v1.0</b>
|
||||
Configuration file for this model: <b>configs/stable-diffusion/v1-inference.yaml</b>
|
||||
Default image width: <b>512</b>
|
||||
Default image height: <b>512</b>
|
||||
>> New configuration:
|
||||
arabian-nights:
|
||||
config: configs/stable-diffusion/v1-inference.yaml
|
||||
description: Arabian Nights Fine Tune v1.0
|
||||
height: 512
|
||||
weights: models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
|
||||
width: 512
|
||||
OK to import [n]? <b>y</b>
|
||||
>> Caching model stable-diffusion-1.4 in system RAM
|
||||
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
|
||||
| LatentDiffusion: Running in eps-prediction mode
|
||||
| DiffusionWrapper has 859.52 M params.
|
||||
| Making attention of type 'vanilla' with 512 in_channels
|
||||
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
| Making attention of type 'vanilla' with 512 in_channels
|
||||
| Using faster float16 precision
|
||||
|
||||
```
|
||||
|
||||
If you've previously installed the fine-tune VAE file `vae-ft-mse-840000-ema-pruned.ckpt`,
|
||||
the wizard will also ask you if you want to add this VAE to the model.
|
||||
|
||||
The appropriate entry for this model will be added to `configs/models.yaml` and it will
|
||||
be available to use in the CLI immediately.
|
||||
|
||||
The CLI has additional commands for switching among, viewing, editing,
|
||||
deleting the available models. These are described in [Command Line
|
||||
Client](../features/CLI.md#model-selection-and-importation), but the two most
|
||||
frequently-used are `!models` and `!switch <name of model>`. The first
|
||||
prints a table of models that InvokeAI knows about and their load
|
||||
status. The second will load the requested model and lets you switch
|
||||
back and forth quickly among loaded models.
|
||||
|
||||
### Manually editing of `configs/models.yaml`
|
||||
|
||||
If you are comfortable with a text editor then you may simply edit
|
||||
`models.yaml` directly.
|
||||
|
||||
First you need to download the desired .ckpt file and place it in
|
||||
`models/ldm/stable-diffusion-v1` as descirbed in step #1 in the
|
||||
previous section. Record the path to the weights file,
|
||||
e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
|
||||
|
||||
Then using a **text** editor (e.g. the Windows Notepad application),
|
||||
open the file `configs/models.yaml`, and add a new stanza that follows
|
||||
this model:
|
||||
|
||||
```
|
||||
arabian-nights-1.0:
|
||||
description: A great fine-tune in Arabian Nights style
|
||||
weights: ./models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
|
||||
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||
width: 512
|
||||
height: 512
|
||||
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||
default: false
|
||||
```
|
||||
|
||||
* arabian-nights-1.0
|
||||
- This is the name of the model that you will refer to from within the
|
||||
CLI and the WebGUI when you need to load and use the model.
|
||||
|
||||
* description
|
||||
- Any description that you want to add to the model to remind you what
|
||||
it is.
|
||||
|
||||
* weights
|
||||
- Relative path to the .ckpt weights file for this model.
|
||||
|
||||
* config
|
||||
- This is the confusingly-named configuration file for the model itself.
|
||||
Use `./configs/stable-diffusion/v1-inference.yaml` unless the model happens
|
||||
to need a custom configuration, in which case the place you downloaded it
|
||||
from will tell you what to use instead. For example, the runwayML custom
|
||||
inpainting model requires the file `configs/stable-diffusion/v1-inpainting-inference.yaml`.
|
||||
This is already inclued in the InvokeAI distribution and is configured automatically
|
||||
for you by the `preload_models.py` script.
|
||||
|
||||
* vae
|
||||
- If you want to add a VAE file to the model, then enter its path here.
|
||||
|
||||
* width, height
|
||||
- This is the width and height of the images used to train the model.
|
||||
Currently they are always 512 and 512.
|
||||
|
||||
Save the `models.yaml` and relaunch InvokeAI. The new model should now be
|
||||
available for your use.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Linux
|
||||
title: Manual Installation, Linux
|
||||
---
|
||||
|
||||
# :fontawesome-brands-linux: Linux
|
||||
@ -63,24 +63,16 @@ title: Linux
|
||||
model loading scheme to allow the script to work on GPU machines that are not
|
||||
internet connected. See [Preload Models](../features/OTHER.md#preload-models)
|
||||
|
||||
7. Now you need to install the weights for the stable diffusion model.
|
||||
7. Install the weights for the stable diffusion model.
|
||||
|
||||
- For running with the released weights, you will first need to set up an acount
|
||||
with [Hugging Face](https://huggingface.co).
|
||||
- Use your credentials to log in, and then point your browser [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original).
|
||||
- You may be asked to sign a license agreement at this point.
|
||||
- Click on "Files and versions" near the top of the page, and then click on the
|
||||
file named "sd-v1-4.ckpt". You'll be taken to a page that prompts you to click
|
||||
the "download" link. Save the file somewhere safe on your local machine.
|
||||
- Sign up at https://huggingface.co
|
||||
- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
||||
- Accept the terms and click Access Repository
|
||||
- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
|
||||
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
|
||||
|
||||
Now run the following commands from within the stable-diffusion directory.
|
||||
This will create a symbolic link from the stable-diffusion model.ckpt file, to
|
||||
the true location of the `sd-v1-4.ckpt` file.
|
||||
|
||||
```bash
|
||||
(invokeai) ~/InvokeAI$ mkdir -p models/ldm/stable-diffusion-v1
|
||||
(invokeai) ~/InvokeAI$ ln -sf /path/to/sd-v1-4.ckpt models/ldm/stable-diffusion-v1/model.ckpt
|
||||
```
|
||||
There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
|
||||
for details.
|
||||
|
||||
8. Start generating images!
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
title: macOS
|
||||
title: Manual Installation, macOS
|
||||
---
|
||||
|
||||
# :fontawesome-brands-apple: macOS
|
||||
@ -24,9 +24,15 @@ First you need to download a large checkpoint file.
|
||||
1. Sign up at https://huggingface.co
|
||||
2. Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
||||
3. Accept the terms and click Access Repository
|
||||
4. Download [sd-v1-4.ckpt (4.27 GB)](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt) and note where you have saved it (probably the Downloads folder). You may want to move it somewhere else for longer term storage - SD needs this file to run.
|
||||
4. Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
|
||||
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
|
||||
|
||||
While that is downloading, open Terminal and run the following commands one at a time, reading the comments and taking care to run the appropriate command for your Mac's architecture (Intel or M1).
|
||||
There are many other models that you can try. Please see [../features/INSTALLING_MODELS.md]
|
||||
for details.
|
||||
|
||||
While that is downloading, open Terminal and run the following
|
||||
commands one at a time, reading the comments and taking care to run
|
||||
the appropriate command for your Mac's architecture (Intel or M1).
|
||||
|
||||
!!! todo "Homebrew"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
title: Windows
|
||||
title: Manual Installation, Windows
|
||||
---
|
||||
|
||||
# :fontawesome-brands-windows: Windows
|
||||
@ -83,23 +83,14 @@ in the wiki
|
||||
|
||||
8. Now you need to install the weights for the big stable diffusion model.
|
||||
|
||||
1. For running with the released weights, you will first need to set up an acount with Hugging Face (https://huggingface.co).
|
||||
2. Use your credentials to log in, and then point your browser at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
|
||||
3. You may be asked to sign a license agreement at this point.
|
||||
4. Click on "Files and versions" near the top of the page, and then click on the file named `sd-v1-4.ckpt`. You'll be taken to a page that
|
||||
prompts you to click the "download" link. Now save the file somewhere safe on your local machine.
|
||||
5. The weight file is >4 GB in size, so
|
||||
downloading may take a while.
|
||||
- Sign up at https://huggingface.co
|
||||
- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
||||
- Accept the terms and click Access Repository
|
||||
- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
|
||||
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
|
||||
|
||||
Now run the following commands from **within the InvokeAI directory** to copy the weights file to the right place:
|
||||
|
||||
```batch
|
||||
mkdir -p models\ldm\stable-diffusion-v1
|
||||
copy C:\path\to\sd-v1-4.ckpt models\ldm\stable-diffusion-v1\model.ckpt
|
||||
```
|
||||
|
||||
Please replace `C:\path\to\sd-v1.4.ckpt` with the correct path to wherever you stashed this file. If you prefer not to copy or move the .ckpt file,
|
||||
you may instead create a shortcut to it from within `models\ldm\stable-diffusion-v1\`.
|
||||
There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
|
||||
for details.
|
||||
|
||||
9. Start generating images!
|
||||
|
||||
|
@ -227,11 +227,14 @@ class ModelCache(object):
|
||||
print(' | Using more accurate float32 precision')
|
||||
|
||||
# look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
|
||||
if vae and os.path.exists(vae):
|
||||
print(f' | Loading VAE weights from: {vae}')
|
||||
vae_ckpt = torch.load(vae, map_location="cpu")
|
||||
vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
|
||||
model.first_stage_model.load_state_dict(vae_dict, strict=False)
|
||||
if vae:
|
||||
if os.path.exists(vae):
|
||||
print(f' | Loading VAE weights from: {vae}')
|
||||
vae_ckpt = torch.load(vae, map_location="cpu")
|
||||
vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
|
||||
model.first_stage_model.load_state_dict(vae_dict, strict=False)
|
||||
else:
|
||||
print(f' | VAE file {vae} not found. Skipping.')
|
||||
|
||||
model.to(self.device)
|
||||
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
|
||||
@ -281,7 +284,7 @@ class ModelCache(object):
|
||||
Returns the preamble for the config file.
|
||||
'''
|
||||
return '''# This file describes the alternative machine learning models
|
||||
# available to the dream script.
|
||||
# available to InvokeAI script.
|
||||
#
|
||||
# To add a new model, follow the examples below. Each
|
||||
# model requires a model config file, a weights file,
|
||||
|
@ -1,44 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: val/rec_loss
|
||||
embed_dim: 16
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 1.0e-06
|
||||
disc_weight: 0.5
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 16
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 16
|
||||
dropout: 0.0
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 6
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,46 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: val/rec_loss
|
||||
embed_dim: 64
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 1.0e-06
|
||||
disc_weight: 0.5
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 64
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 16
|
||||
- 8
|
||||
dropout: 0.0
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 6
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,41 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: val/rec_loss
|
||||
embed_dim: 3
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 1.0e-06
|
||||
disc_weight: 0.5
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 10
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,42 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
monitor: val/rec_loss
|
||||
embed_dim: 4
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
||||
params:
|
||||
disc_start: 50001
|
||||
kl_weight: 1.0e-06
|
||||
disc_weight: 0.5
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 4
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,49 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.VQModel
|
||||
params:
|
||||
embed_dim: 8
|
||||
n_embed: 16384
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 8
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 16
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
||||
params:
|
||||
disc_conditional: false
|
||||
disc_in_channels: 3
|
||||
disc_start: 250001
|
||||
disc_weight: 0.75
|
||||
disc_num_layers: 2
|
||||
codebook_weight: 1.0
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 14
|
||||
num_workers: 20
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,46 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.VQModel
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
monitor: val/rec_loss
|
||||
|
||||
ddconfig:
|
||||
attn_type: none
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
||||
params:
|
||||
disc_conditional: false
|
||||
disc_in_channels: 3
|
||||
disc_start: 11
|
||||
disc_weight: 0.75
|
||||
codebook_weight: 1.0
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 8
|
||||
num_workers: 12
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
crop_size: 256
|
@ -1,45 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.VQModel
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
monitor: val/rec_loss
|
||||
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
||||
params:
|
||||
disc_conditional: false
|
||||
disc_in_channels: 3
|
||||
disc_start: 0
|
||||
disc_weight: 0.75
|
||||
codebook_weight: 1.0
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 8
|
||||
num_workers: 16
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
crop_size: 256
|
@ -1,48 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.VQModel
|
||||
params:
|
||||
embed_dim: 4
|
||||
n_embed: 256
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 32
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
||||
params:
|
||||
disc_conditional: false
|
||||
disc_in_channels: 3
|
||||
disc_start: 250001
|
||||
disc_weight: 0.75
|
||||
codebook_weight: 1.0
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 10
|
||||
num_workers: 20
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,48 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 4.5e-06
|
||||
target: ldm.models.autoencoder.VQModel
|
||||
params:
|
||||
embed_dim: 4
|
||||
n_embed: 16384
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 32
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
||||
params:
|
||||
disc_conditional: false
|
||||
disc_in_channels: 3
|
||||
disc_num_layers: 2
|
||||
disc_start: 1
|
||||
disc_weight: 0.6
|
||||
codebook_weight: 1.0
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 10
|
||||
num_workers: 20
|
||||
wrap: true
|
||||
train:
|
||||
target: ldm.data.openimages.FullOpenImagesTrain
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.FullOpenImagesValidation
|
||||
params:
|
||||
size: 384
|
||||
crop_size: 256
|
@ -1,80 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0155
|
||||
log_every_t: 100
|
||||
timesteps: 1000
|
||||
loss_type: l2
|
||||
first_stage_key: image
|
||||
cond_stage_key: LR_image
|
||||
image_size: 64
|
||||
channels: 3
|
||||
concat_mode: true
|
||||
cond_stage_trainable: false
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 6
|
||||
out_channels: 3
|
||||
model_channels: 160
|
||||
attention_resolutions:
|
||||
- 16
|
||||
- 8
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: torch.nn.Identity
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 64
|
||||
wrap: false
|
||||
num_workers: 12
|
||||
train:
|
||||
target: ldm.data.openimages.SuperresOpenImagesAdvancedTrain
|
||||
params:
|
||||
size: 256
|
||||
degradation: bsrgan_light
|
||||
downscale_f: 4
|
||||
min_crop_f: 0.5
|
||||
max_crop_f: 1.0
|
||||
random_crop: true
|
||||
validation:
|
||||
target: ldm.data.openimages.SuperresOpenImagesAdvancedValidation
|
||||
params:
|
||||
size: 256
|
||||
degradation: bsrgan_light
|
||||
downscale_f: 4
|
||||
min_crop_f: 0.5
|
||||
max_crop_f: 1.0
|
||||
random_crop: true
|
@ -1,70 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: class_label
|
||||
image_size: 64
|
||||
channels: 3
|
||||
cond_stage_trainable: false
|
||||
concat_mode: false
|
||||
monitor: val/loss
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 224
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config: __is_unconditional__
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 48
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.faceshq.CelebAHQTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.faceshq.CelebAHQValidation
|
||||
params:
|
||||
size: 256
|
@ -1,80 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: class_label
|
||||
image_size: 32
|
||||
channels: 4
|
||||
cond_stage_trainable: true
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 32
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 256
|
||||
attention_resolutions:
|
||||
- 4
|
||||
- 2
|
||||
- 1
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 1
|
||||
context_dim: 512
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 4
|
||||
n_embed: 16384
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions:
|
||||
- 32
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.ClassEmbedder
|
||||
params:
|
||||
embed_dim: 512
|
||||
key: class_label
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 64
|
||||
num_workers: 12
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.imagenet.ImageNetTrain
|
||||
params:
|
||||
config:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.imagenet.ImageNetValidation
|
||||
params:
|
||||
config:
|
||||
size: 256
|
@ -1,70 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: class_label
|
||||
image_size: 64
|
||||
channels: 3
|
||||
cond_stage_trainable: false
|
||||
concat_mode: false
|
||||
monitor: val/loss
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 224
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config: __is_unconditional__
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 42
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.faceshq.FFHQTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.faceshq.FFHQValidation
|
||||
params:
|
||||
size: 256
|
@ -1,67 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0205
|
||||
log_every_t: 100
|
||||
timesteps: 1000
|
||||
loss_type: l1
|
||||
first_stage_key: image
|
||||
cond_stage_key: masked_image
|
||||
image_size: 64
|
||||
channels: 3
|
||||
concat_mode: true
|
||||
monitor: val/loss
|
||||
scheduler_config:
|
||||
target: ldm.lr_scheduler.LambdaWarmUpCosineScheduler
|
||||
params:
|
||||
verbosity_interval: 0
|
||||
warm_up_steps: 1000
|
||||
max_decay_steps: 50000
|
||||
lr_start: 0.001
|
||||
lr_max: 0.1
|
||||
lr_min: 0.0001
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 7
|
||||
out_channels: 3
|
||||
model_channels: 256
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_heads: 8
|
||||
resblock_updown: true
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
attn_type: none
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: ldm.modules.losses.contperceptual.DummyLoss
|
||||
cond_stage_config: __is_first_stage__
|
@ -1,81 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0205
|
||||
log_every_t: 100
|
||||
timesteps: 1000
|
||||
loss_type: l1
|
||||
first_stage_key: image
|
||||
cond_stage_key: coordinates_bbox
|
||||
image_size: 64
|
||||
channels: 3
|
||||
conditioning_key: crossattn
|
||||
cond_stage_trainable: true
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 128
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 3
|
||||
context_dim: 512
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.BERTEmbedder
|
||||
params:
|
||||
n_embed: 512
|
||||
n_layer: 16
|
||||
vocab_size: 8192
|
||||
max_seq_len: 92
|
||||
use_tokenizer: false
|
||||
monitor: val/loss_simple_ema
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 24
|
||||
wrap: false
|
||||
num_workers: 10
|
||||
train:
|
||||
target: ldm.data.openimages.OpenImagesBBoxTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.openimages.OpenImagesBBoxValidation
|
||||
params:
|
||||
size: 256
|
@ -1,70 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: class_label
|
||||
image_size: 64
|
||||
channels: 3
|
||||
cond_stage_trainable: false
|
||||
concat_mode: false
|
||||
monitor: val/loss
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 224
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
num_head_channels: 32
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config: __is_unconditional__
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 48
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.lsun.LSUNBedroomsTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.lsun.LSUNBedroomsValidation
|
||||
params:
|
||||
size: 256
|
@ -1,92 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 5.0e-05
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0155
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
loss_type: l1
|
||||
first_stage_key: image
|
||||
cond_stage_key: image
|
||||
image_size: 32
|
||||
channels: 4
|
||||
cond_stage_trainable: false
|
||||
concat_mode: false
|
||||
scale_by_std: true
|
||||
monitor: val/loss_simple_ema
|
||||
scheduler_config:
|
||||
target: ldm.lr_scheduler.LambdaLinearScheduler
|
||||
params:
|
||||
warm_up_steps:
|
||||
- 10000
|
||||
cycle_lengths:
|
||||
- 10000000000000
|
||||
f_start:
|
||||
- 1.0e-06
|
||||
f_max:
|
||||
- 1.0
|
||||
f_min:
|
||||
- 1.0
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 32
|
||||
in_channels: 4
|
||||
out_channels: 4
|
||||
model_channels: 192
|
||||
attention_resolutions:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 8
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_heads: 8
|
||||
use_scale_shift_norm: true
|
||||
resblock_updown: true
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.AutoencoderKL
|
||||
params:
|
||||
embed_dim: 4
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: true
|
||||
z_channels: 4
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
|
||||
cond_stage_config: '__is_unconditional__'
|
||||
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 96
|
||||
num_workers: 5
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.lsun.LSUNChurchesTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.lsun.LSUNChurchesValidation
|
||||
params:
|
||||
size: 256
|
@ -1,59 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0205
|
||||
log_every_t: 100
|
||||
timesteps: 1000
|
||||
loss_type: l1
|
||||
first_stage_key: image
|
||||
cond_stage_key: segmentation
|
||||
image_size: 64
|
||||
channels: 3
|
||||
concat_mode: true
|
||||
cond_stage_trainable: true
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 6
|
||||
out_channels: 3
|
||||
model_channels: 128
|
||||
attention_resolutions:
|
||||
- 32
|
||||
- 16
|
||||
- 8
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 4
|
||||
- 8
|
||||
num_heads: 8
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.SpatialRescaler
|
||||
params:
|
||||
n_stages: 2
|
||||
in_channels: 182
|
||||
out_channels: 3
|
@ -1,78 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 1.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0205
|
||||
log_every_t: 100
|
||||
timesteps: 1000
|
||||
loss_type: l1
|
||||
first_stage_key: image
|
||||
cond_stage_key: segmentation
|
||||
image_size: 128
|
||||
channels: 3
|
||||
concat_mode: true
|
||||
cond_stage_trainable: true
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 128
|
||||
in_channels: 6
|
||||
out_channels: 3
|
||||
model_channels: 128
|
||||
attention_resolutions:
|
||||
- 32
|
||||
- 16
|
||||
- 8
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 4
|
||||
- 8
|
||||
num_heads: 8
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
monitor: val/rec_loss
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.SpatialRescaler
|
||||
params:
|
||||
n_stages: 2
|
||||
in_channels: 182
|
||||
out_channels: 3
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 8
|
||||
wrap: false
|
||||
num_workers: 10
|
||||
train:
|
||||
target: ldm.data.landscapes.RFWTrain
|
||||
params:
|
||||
size: 768
|
||||
crop_size: 512
|
||||
segmentation_to_float32: true
|
||||
validation:
|
||||
target: ldm.data.landscapes.RFWValidation
|
||||
params:
|
||||
size: 768
|
||||
crop_size: 512
|
||||
segmentation_to_float32: true
|
2
models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt
Normal file
2
models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt
Normal file
@ -0,0 +1,2 @@
|
||||
See docs/features/INSTALLING_MODELS.md for how to populate this
|
||||
directory with one or more Stable Diffusion model weight files.
|
@ -1,77 +0,0 @@
|
||||
model:
|
||||
base_learning_rate: 2.0e-06
|
||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||
params:
|
||||
linear_start: 0.0015
|
||||
linear_end: 0.0195
|
||||
num_timesteps_cond: 1
|
||||
log_every_t: 200
|
||||
timesteps: 1000
|
||||
first_stage_key: image
|
||||
cond_stage_key: caption
|
||||
image_size: 64
|
||||
channels: 3
|
||||
cond_stage_trainable: true
|
||||
conditioning_key: crossattn
|
||||
monitor: val/loss_simple_ema
|
||||
unet_config:
|
||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||
params:
|
||||
image_size: 64
|
||||
in_channels: 3
|
||||
out_channels: 3
|
||||
model_channels: 192
|
||||
attention_resolutions:
|
||||
- 8
|
||||
- 4
|
||||
- 2
|
||||
num_res_blocks: 2
|
||||
channel_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 5
|
||||
num_head_channels: 32
|
||||
use_spatial_transformer: true
|
||||
transformer_depth: 1
|
||||
context_dim: 640
|
||||
first_stage_config:
|
||||
target: ldm.models.autoencoder.VQModelInterface
|
||||
params:
|
||||
embed_dim: 3
|
||||
n_embed: 8192
|
||||
ddconfig:
|
||||
double_z: false
|
||||
z_channels: 3
|
||||
resolution: 256
|
||||
in_channels: 3
|
||||
out_ch: 3
|
||||
ch: 128
|
||||
ch_mult:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
num_res_blocks: 2
|
||||
attn_resolutions: []
|
||||
dropout: 0.0
|
||||
lossconfig:
|
||||
target: torch.nn.Identity
|
||||
cond_stage_config:
|
||||
target: ldm.modules.encoders.modules.BERTEmbedder
|
||||
params:
|
||||
n_embed: 640
|
||||
n_layer: 32
|
||||
data:
|
||||
target: main.DataModuleFromConfig
|
||||
params:
|
||||
batch_size: 28
|
||||
num_workers: 10
|
||||
wrap: false
|
||||
train:
|
||||
target: ldm.data.previews.pytorch_dataset.PreviewsTrain
|
||||
params:
|
||||
size: 256
|
||||
validation:
|
||||
target: ldm.data.previews.pytorch_dataset.PreviewsValidation
|
||||
params:
|
||||
size: 256
|
@ -3,20 +3,369 @@
|
||||
# Before running stable-diffusion on an internet-isolated machine,
|
||||
# run this script from one with internet connectivity. The
|
||||
# two machines must share a common .cache directory.
|
||||
from transformers import CLIPTokenizer, CLIPTextModel
|
||||
#
|
||||
# Coauthor: Kevin Turner http://github.com/keturn
|
||||
#
|
||||
print('Loading Python libraries...\n')
|
||||
import argparse
|
||||
import clip
|
||||
from transformers import BertTokenizerFast, AutoFeatureExtractor
|
||||
import sys
|
||||
import transformers
|
||||
import os
|
||||
import warnings
|
||||
import torch
|
||||
import urllib.request
|
||||
import zipfile
|
||||
import traceback
|
||||
import getpass
|
||||
import requests
|
||||
from urllib import request
|
||||
from tqdm import tqdm
|
||||
from omegaconf import OmegaConf
|
||||
from pathlib import Path
|
||||
from transformers import CLIPTokenizer, CLIPTextModel
|
||||
from transformers import BertTokenizerFast, AutoFeatureExtractor
|
||||
from huggingface_hub import hf_hub_download, HfFolder, hf_hub_url
|
||||
|
||||
transformers.logging.set_verbosity_error()
|
||||
|
||||
#--------------------------globals--
|
||||
Model_dir = './models/ldm/stable-diffusion-v1/'
|
||||
Config_file = './configs/models.yaml'
|
||||
SD_Configs = './configs/stable-diffusion'
|
||||
Datasets = {
|
||||
'stable-diffusion-1.5': {
|
||||
'description': 'The newest Stable Diffusion version 1.5 weight file (4.27 GB)',
|
||||
'repo_id': 'runwayml/stable-diffusion-v1-5',
|
||||
'config': 'v1-inference.yaml',
|
||||
'file': 'v1-5-pruned-emaonly.ckpt',
|
||||
'recommended': True,
|
||||
'width': 512,
|
||||
'height': 512,
|
||||
},
|
||||
'inpainting-1.5': {
|
||||
'description': 'RunwayML SD 1.5 model optimized for inpainting (4.27 GB)',
|
||||
'repo_id': 'runwayml/stable-diffusion-inpainting',
|
||||
'config': 'v1-inpainting-inference.yaml',
|
||||
'file': 'sd-v1-5-inpainting.ckpt',
|
||||
'recommended': True,
|
||||
'width': 512,
|
||||
'height': 512,
|
||||
},
|
||||
'stable-diffusion-1.4': {
|
||||
'description': 'The original Stable Diffusion version 1.4 weight file (4.27 GB)',
|
||||
'repo_id': 'CompVis/stable-diffusion-v-1-4-original',
|
||||
'config': 'v1-inference.yaml',
|
||||
'file': 'sd-v1-4.ckpt',
|
||||
'recommended': False,
|
||||
'width': 512,
|
||||
'height': 512,
|
||||
},
|
||||
'waifu-diffusion-1.3': {
|
||||
'description': 'Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)',
|
||||
'repo_id': 'hakurei/waifu-diffusion-v1-3',
|
||||
'config': 'v1-inference.yaml',
|
||||
'file': 'model-epoch09-float32.ckpt',
|
||||
'recommended': False,
|
||||
'width': 512,
|
||||
'height': 512,
|
||||
},
|
||||
'ft-mse-improved-autoencoder-840000': {
|
||||
'description': 'StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB)',
|
||||
'repo_id': 'stabilityai/sd-vae-ft-mse-original',
|
||||
'config': 'VAE',
|
||||
'file': 'vae-ft-mse-840000-ema-pruned.ckpt',
|
||||
'recommended': True,
|
||||
'width': 512,
|
||||
'height': 512,
|
||||
},
|
||||
}
|
||||
Config_preamble = '''# This file describes the alternative machine learning models
|
||||
# available to InvokeAI script.
|
||||
#
|
||||
# To add a new model, follow the examples below. Each
|
||||
# model requires a model config file, a weights file,
|
||||
# and the width and height of the images it
|
||||
# was trained on.
|
||||
'''
|
||||
|
||||
#---------------------------------------------
|
||||
def introduction():
|
||||
print(
|
||||
'''Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
|
||||
and other large models that are needed for text to image generation. At any point you may interrupt
|
||||
this program and resume later.\n'''
|
||||
)
|
||||
|
||||
#--------------------------------------------
|
||||
def postscript():
|
||||
print(
|
||||
'''You're all set! You may now launch InvokeAI using one of these two commands:
|
||||
Web version:
|
||||
|
||||
python scripts/invoke.py --web (connect to http://localhost:9090)
|
||||
|
||||
Command-line version:
|
||||
|
||||
python scripts/invoke.py
|
||||
|
||||
Have fun!
|
||||
'''
|
||||
)
|
||||
|
||||
#---------------------------------------------
|
||||
def yes_or_no(prompt:str, default_yes=True):
|
||||
default = "y" if default_yes else 'n'
|
||||
response = input(f'{prompt} [{default}] ') or default
|
||||
if default_yes:
|
||||
return response[0] not in ('n','N')
|
||||
else:
|
||||
return response[0] in ('y','Y')
|
||||
|
||||
#---------------------------------------------
|
||||
def user_wants_to_download_weights()->str:
|
||||
'''
|
||||
Returns one of "skip", "recommended" or "customized"
|
||||
'''
|
||||
print('''You can download and configure the weights files manually or let this
|
||||
script do it for you. Manual installation is described at:
|
||||
|
||||
https://github.com/invoke-ai/InvokeAI/blob/main/docs/installation/INSTALLING_MODELS.md
|
||||
|
||||
You may download the recommended models (about 10GB total), select a customized set, or
|
||||
completely skip this step.
|
||||
'''
|
||||
)
|
||||
selection = None
|
||||
while selection is None:
|
||||
choice = input('Download <r>ecommended models, <c>ustomize the list, or <s>kip this step? [r]: ')
|
||||
if choice.startswith(('r','R')) or len(choice)==0:
|
||||
selection = 'recommended'
|
||||
elif choice.startswith(('c','C')):
|
||||
selection = 'customized'
|
||||
elif choice.startswith(('s','S')):
|
||||
selection = 'skip'
|
||||
return selection
|
||||
|
||||
#---------------------------------------------
|
||||
def select_datasets(action:str):
|
||||
done = False
|
||||
while not done:
|
||||
datasets = dict()
|
||||
dflt = None # the first model selected will be the default; TODO let user change
|
||||
counter = 1
|
||||
|
||||
if action == 'customized':
|
||||
print('''
|
||||
Choose the weight file(s) you wish to download. Before downloading you
|
||||
will be given the option to view and change your selections.
|
||||
'''
|
||||
)
|
||||
for ds in Datasets.keys():
|
||||
recommended = '(recommended)' if Datasets[ds]['recommended'] else ''
|
||||
print(f'[{counter}] {ds}:\n {Datasets[ds]["description"]} {recommended}')
|
||||
if yes_or_no(' Download?',default_yes=Datasets[ds]['recommended']):
|
||||
datasets[ds]=counter
|
||||
counter += 1
|
||||
else:
|
||||
for ds in Datasets.keys():
|
||||
if Datasets[ds]['recommended']:
|
||||
datasets[ds]=counter
|
||||
counter += 1
|
||||
|
||||
print('The following weight files will be downloaded:')
|
||||
for ds in datasets:
|
||||
dflt = '*' if dflt is None else ''
|
||||
print(f' [{datasets[ds]}] {ds}{dflt}')
|
||||
print("*default")
|
||||
ok_to_download = yes_or_no('Ok to download?')
|
||||
if not ok_to_download:
|
||||
if yes_or_no('Change your selection?'):
|
||||
action = 'customized'
|
||||
pass
|
||||
else:
|
||||
done = True
|
||||
else:
|
||||
done = True
|
||||
return datasets if ok_to_download else None
|
||||
|
||||
|
||||
#-------------------------------Authenticate against Hugging Face
|
||||
def authenticate():
|
||||
print('''
|
||||
To download the Stable Diffusion weight files from the official Hugging Face
|
||||
repository, you need to read and accept the CreativeML Responsible AI license.
|
||||
|
||||
This involves a few easy steps.
|
||||
|
||||
1. If you have not already done so, create an account on Hugging Face's web site
|
||||
using the "Sign Up" button:
|
||||
|
||||
https://huggingface.co/join
|
||||
|
||||
You will need to verify your email address as part of the HuggingFace
|
||||
registration process.
|
||||
|
||||
2. Log into your Hugging Face account:
|
||||
|
||||
https://huggingface.co/login
|
||||
|
||||
3. Accept the license terms located here:
|
||||
|
||||
https://huggingface.co/runwayml/stable-diffusion-v1-5
|
||||
|
||||
and here:
|
||||
|
||||
https://huggingface.co/runwayml/stable-diffusion-inpainting
|
||||
|
||||
(Yes, you have to accept two slightly different license agreements)
|
||||
'''
|
||||
)
|
||||
input('Press <enter> when you are ready to continue:')
|
||||
access_token = HfFolder.get_token()
|
||||
if access_token is None:
|
||||
print('''
|
||||
4. Thank you! The last step is to enter your HuggingFace access token so that
|
||||
this script is authorized to initiate the download. Go to the access tokens
|
||||
page of your Hugging Face account and create a token by clicking the
|
||||
"New token" button:
|
||||
|
||||
https://huggingface.co/settings/tokens
|
||||
|
||||
(You can enter anything you like in the token creation field marked "Name".
|
||||
"Role" should be "read").
|
||||
|
||||
Now copy the token to your clipboard and paste it here: '''
|
||||
)
|
||||
access_token = getpass.getpass()
|
||||
HfFolder.save_token(access_token)
|
||||
return access_token
|
||||
|
||||
#---------------------------------------------
|
||||
# look for legacy model.ckpt in models directory and offer to
|
||||
# normalize its name
|
||||
def migrate_models_ckpt():
|
||||
if not os.path.exists(os.path.join(Model_dir,'model.ckpt')):
|
||||
return
|
||||
new_name = Datasets['stable-diffusion-1.4']['file']
|
||||
print('You seem to have the Stable Diffusion v4.1 "model.ckpt" already installed.')
|
||||
rename = yes_or_no(f'Ok to rename it to "{new_name}" for future reference?')
|
||||
if rename:
|
||||
print(f'model.ckpt => {new_name}')
|
||||
os.rename(os.path.join(Model_dir,'model.ckpt'),os.path.join(Model_dir,new_name))
|
||||
|
||||
#---------------------------------------------
|
||||
def download_weight_datasets(models:dict, access_token:str):
|
||||
migrate_models_ckpt()
|
||||
successful = dict()
|
||||
for mod in models.keys():
|
||||
repo_id = Datasets[mod]['repo_id']
|
||||
filename = Datasets[mod]['file']
|
||||
success = download_with_resume(
|
||||
repo_id=repo_id,
|
||||
model_name=filename,
|
||||
access_token=access_token
|
||||
)
|
||||
if success:
|
||||
successful[mod] = True
|
||||
keys = ', '.join(successful.keys())
|
||||
print(f'Successfully installed {keys}')
|
||||
return successful
|
||||
|
||||
#---------------------------------------------
|
||||
def download_with_resume(repo_id:str, model_name:str, access_token:str)->bool:
|
||||
|
||||
model_dest = os.path.join(Model_dir, model_name)
|
||||
os.makedirs(os.path.dirname(model_dest), exist_ok=True)
|
||||
url = hf_hub_url(repo_id, model_name)
|
||||
|
||||
header = {"Authorization": f'Bearer {access_token}'}
|
||||
open_mode = 'wb'
|
||||
exist_size = 0
|
||||
|
||||
if os.path.exists(model_dest):
|
||||
exist_size = os.path.getsize(model_dest)
|
||||
header['Range'] = f'bytes={exist_size}-'
|
||||
open_mode = 'ab'
|
||||
|
||||
resp = requests.get(url, headers=header, stream=True)
|
||||
total = int(resp.headers.get('content-length', 0))
|
||||
|
||||
if resp.status_code==416: # "range not satisfiable", which means nothing to return
|
||||
print(f'* {model_name}: complete file found. Skipping.')
|
||||
return True
|
||||
elif exist_size > 0:
|
||||
print(f'* {model_name}: partial file found. Resuming...')
|
||||
else:
|
||||
print(f'* {model_name}: Downloading...')
|
||||
|
||||
try:
|
||||
if total < 2000:
|
||||
print(f'* {model_name}: {resp.text}')
|
||||
return False
|
||||
|
||||
with open(model_dest, open_mode) as file, tqdm(
|
||||
desc=model_name,
|
||||
initial=exist_size,
|
||||
total=total+exist_size,
|
||||
unit='iB',
|
||||
unit_scale=True,
|
||||
unit_divisor=1000,
|
||||
) as bar:
|
||||
for data in resp.iter_content(chunk_size=1024):
|
||||
size = file.write(data)
|
||||
bar.update(size)
|
||||
except Exception as e:
|
||||
print(f'An error occurred while downloading {model_name}: {str(e)}')
|
||||
return False
|
||||
return True
|
||||
|
||||
#---------------------------------------------
|
||||
def update_config_file(successfully_downloaded:dict):
|
||||
try:
|
||||
yaml = new_config_file_contents(successfully_downloaded)
|
||||
tmpfile = os.path.join(os.path.dirname(Config_file),'new_config.tmp')
|
||||
with open(tmpfile, 'w') as outfile:
|
||||
outfile.write(Config_preamble)
|
||||
outfile.write(yaml)
|
||||
os.rename(tmpfile,Config_file)
|
||||
except Exception as e:
|
||||
print(f'**Error creating config file {Config_file}: {str(e)} **')
|
||||
return
|
||||
print(f'Successfully created new configuration file {Config_file}')
|
||||
|
||||
|
||||
#---------------------------------------------
|
||||
def new_config_file_contents(successfully_downloaded:dict)->str:
|
||||
conf = OmegaConf.load(Config_file)
|
||||
|
||||
# find the VAE file, if there is one
|
||||
vae = None
|
||||
default_selected = False
|
||||
|
||||
for model in successfully_downloaded:
|
||||
if Datasets[model]['config'] == 'VAE':
|
||||
vae = Datasets[model]['file']
|
||||
|
||||
for model in successfully_downloaded:
|
||||
if Datasets[model]['config'] == 'VAE': # skip VAE entries
|
||||
continue
|
||||
stanza = conf[model] if model in conf else { }
|
||||
|
||||
stanza['description'] = Datasets[model]['description']
|
||||
stanza['weights'] = os.path.join(Model_dir,Datasets[model]['file'])
|
||||
stanza['config'] =os.path.join(SD_Configs, Datasets[model]['config'])
|
||||
stanza['width'] = Datasets[model]['width']
|
||||
stanza['height'] = Datasets[model]['height']
|
||||
stanza.pop('default',None) # this will be set later
|
||||
if vae:
|
||||
stanza['vae'] = os.path.join(Model_dir,vae)
|
||||
# BUG - the first stanza is always the default. User should select.
|
||||
if not default_selected:
|
||||
stanza['default'] = True
|
||||
default_selected = True
|
||||
conf[model] = stanza
|
||||
return OmegaConf.to_yaml(conf)
|
||||
|
||||
#---------------------------------------------
|
||||
# this will preload the Bert tokenizer fles
|
||||
def download_bert():
|
||||
@ -66,7 +415,6 @@ def download_gfpgan():
|
||||
print(traceback.format_exc())
|
||||
|
||||
print('Loading models from GFPGAN')
|
||||
import urllib.request
|
||||
for model in (
|
||||
[
|
||||
'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth',
|
||||
@ -149,15 +497,46 @@ def download_safety_checker():
|
||||
safety_feature_extractor = AutoFeatureExtractor.from_pretrained(safety_model_id)
|
||||
safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id)
|
||||
print('...success')
|
||||
|
||||
|
||||
#-------------------------------------
|
||||
if __name__ == '__main__':
|
||||
download_bert()
|
||||
download_kornia()
|
||||
download_clip()
|
||||
download_gfpgan()
|
||||
download_codeformer()
|
||||
download_clipseg()
|
||||
download_safety_checker()
|
||||
parser = argparse.ArgumentParser(description='InvokeAI model downloader')
|
||||
parser.add_argument('--interactive',
|
||||
dest='interactive',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help='run in interactive mode (default)')
|
||||
opt = parser.parse_args()
|
||||
|
||||
try:
|
||||
if opt.interactive:
|
||||
introduction()
|
||||
print('** WEIGHT SELECTION **')
|
||||
choice = user_wants_to_download_weights()
|
||||
if choice != 'skip':
|
||||
models = select_datasets(choice)
|
||||
if models is None:
|
||||
if yes_or_no('Quit?',default_yes=False):
|
||||
sys.exit(0)
|
||||
print('** LICENSE AGREEMENT FOR WEIGHT FILES **')
|
||||
access_token = authenticate()
|
||||
print('\n** DOWNLOADING WEIGHTS **')
|
||||
successfully_downloaded = download_weight_datasets(models, access_token)
|
||||
update_config_file(successfully_downloaded)
|
||||
else:
|
||||
print('\n** DOWNLOADING SUPPORT MODELS **')
|
||||
download_bert()
|
||||
download_kornia()
|
||||
download_clip()
|
||||
download_gfpgan()
|
||||
download_codeformer()
|
||||
download_clipseg()
|
||||
download_safety_checker()
|
||||
postscript()
|
||||
except KeyboardInterrupt:
|
||||
print('\nGoodbye! Come back soon.')
|
||||
except Exception as e:
|
||||
print(f'\nA problem occurred during download.\nThe error was: "{str(e)}"')
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user