mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
preload_models interactively downloads sd model files
This commit is contained in:
commit
e7368d7231
4
.github/workflows/test-invoke-conda.yml
vendored
4
.github/workflows/test-invoke-conda.yml
vendored
@ -84,7 +84,9 @@ jobs:
|
|||||||
|
|
||||||
- name: run preload_models.py
|
- name: run preload_models.py
|
||||||
id: run-preload-models
|
id: run-preload-models
|
||||||
run: python scripts/preload_models.py
|
run: |
|
||||||
|
python scripts/preload_models.py \
|
||||||
|
--no-interactive
|
||||||
|
|
||||||
- name: Run the tests
|
- name: Run the tests
|
||||||
id: run-tests
|
id: run-tests
|
||||||
|
8
.gitignore
vendored
8
.gitignore
vendored
@ -199,7 +199,13 @@ checkpoints
|
|||||||
.scratch/
|
.scratch/
|
||||||
.vscode/
|
.vscode/
|
||||||
gfpgan/
|
gfpgan/
|
||||||
models/ldm/stable-diffusion-v1/model.sha256
|
models/ldm/stable-diffusion-v1/*.sha256
|
||||||
|
|
||||||
# GFPGAN model files
|
# GFPGAN model files
|
||||||
gfpgan/
|
gfpgan/
|
||||||
|
|
||||||
|
# config file (will be created by installer)
|
||||||
|
configs/models.yaml
|
||||||
|
|
||||||
|
# weights (will be created by installer)
|
||||||
|
models/ldm/stable-diffusion-v1/*.ckpt
|
@ -1,54 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-6
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: "val/rec_loss"
|
|
||||||
embed_dim: 16
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 0.000001
|
|
||||||
disc_weight: 0.5
|
|
||||||
|
|
||||||
ddconfig:
|
|
||||||
double_z: True
|
|
||||||
z_channels: 16
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: [16]
|
|
||||||
dropout: 0.0
|
|
||||||
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 12
|
|
||||||
wrap: True
|
|
||||||
train:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
validation:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 1000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: True
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
||||||
accumulate_grad_batches: 2
|
|
@ -1,53 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-6
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: "val/rec_loss"
|
|
||||||
embed_dim: 4
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 0.000001
|
|
||||||
disc_weight: 0.5
|
|
||||||
|
|
||||||
ddconfig:
|
|
||||||
double_z: True
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: [ ]
|
|
||||||
dropout: 0.0
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 12
|
|
||||||
wrap: True
|
|
||||||
train:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
validation:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 1000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: True
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
||||||
accumulate_grad_batches: 2
|
|
@ -1,54 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-6
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: "val/rec_loss"
|
|
||||||
embed_dim: 3
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 0.000001
|
|
||||||
disc_weight: 0.5
|
|
||||||
|
|
||||||
ddconfig:
|
|
||||||
double_z: True
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult: [ 1,2,4 ] # num_down = len(ch_mult)-1
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: [ ]
|
|
||||||
dropout: 0.0
|
|
||||||
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 12
|
|
||||||
wrap: True
|
|
||||||
train:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
validation:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 1000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: True
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
||||||
accumulate_grad_batches: 2
|
|
@ -1,53 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-6
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: "val/rec_loss"
|
|
||||||
embed_dim: 64
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 0.000001
|
|
||||||
disc_weight: 0.5
|
|
||||||
|
|
||||||
ddconfig:
|
|
||||||
double_z: True
|
|
||||||
z_channels: 64
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult: [ 1,1,2,2,4,4] # num_down = len(ch_mult)-1
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: [16,8]
|
|
||||||
dropout: 0.0
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 12
|
|
||||||
wrap: True
|
|
||||||
train:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
validation:
|
|
||||||
target: ldm.data.imagenet.ImageNetSRValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: pil_nearest
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 1000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: True
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
||||||
accumulate_grad_batches: 2
|
|
@ -1,86 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 224
|
|
||||||
attention_resolutions:
|
|
||||||
# note: this isn\t actually the resolution but
|
|
||||||
# the downsampling factor, i.e. this corresnponds to
|
|
||||||
# attention on spatial resolution 8,16,32, as the
|
|
||||||
# spatial reolution of the latents is 64 for f4
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ckpt_path: models/first_stage_models/vq-f4/model.ckpt
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config: __is_unconditional__
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 48
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: taming.data.faceshq.CelebAHQTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: taming.data.faceshq.CelebAHQValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 5000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: False
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
@ -1,98 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 1.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: class_label
|
|
||||||
image_size: 32
|
|
||||||
channels: 4
|
|
||||||
cond_stage_trainable: true
|
|
||||||
conditioning_key: crossattn
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 32
|
|
||||||
in_channels: 4
|
|
||||||
out_channels: 4
|
|
||||||
model_channels: 256
|
|
||||||
attention_resolutions:
|
|
||||||
#note: this isn\t actually the resolution but
|
|
||||||
# the downsampling factor, i.e. this corresnponds to
|
|
||||||
# attention on spatial resolution 8,16,32, as the
|
|
||||||
# spatial reolution of the latents is 32 for f8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
- 1
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 1
|
|
||||||
context_dim: 512
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
n_embed: 16384
|
|
||||||
ckpt_path: configs/first_stage_models/vq-f8/model.yaml
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 32
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.ClassEmbedder
|
|
||||||
params:
|
|
||||||
embed_dim: 512
|
|
||||||
key: class_label
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 64
|
|
||||||
num_workers: 12
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.imagenet.ImageNetTrain
|
|
||||||
params:
|
|
||||||
config:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.imagenet.ImageNetValidation
|
|
||||||
params:
|
|
||||||
config:
|
|
||||||
size: 256
|
|
||||||
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 5000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: False
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
@ -1,68 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 0.0001
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: class_label
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
cond_stage_trainable: true
|
|
||||||
conditioning_key: crossattn
|
|
||||||
monitor: val/loss
|
|
||||||
use_ema: False
|
|
||||||
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 192
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 5
|
|
||||||
num_heads: 1
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 1
|
|
||||||
context_dim: 512
|
|
||||||
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.ClassEmbedder
|
|
||||||
params:
|
|
||||||
n_classes: 1001
|
|
||||||
embed_dim: 512
|
|
||||||
key: class_label
|
|
@ -1,85 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 224
|
|
||||||
attention_resolutions:
|
|
||||||
# note: this isn\t actually the resolution but
|
|
||||||
# the downsampling factor, i.e. this corresnponds to
|
|
||||||
# attention on spatial resolution 8,16,32, as the
|
|
||||||
# spatial reolution of the latents is 64 for f4
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ckpt_path: configs/first_stage_models/vq-f4/model.yaml
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config: __is_unconditional__
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 42
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: taming.data.faceshq.FFHQTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: taming.data.faceshq.FFHQValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 5000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: False
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
@ -1,85 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 224
|
|
||||||
attention_resolutions:
|
|
||||||
# note: this isn\t actually the resolution but
|
|
||||||
# the downsampling factor, i.e. this corresnponds to
|
|
||||||
# attention on spatial resolution 8,16,32, as the
|
|
||||||
# spatial reolution of the latents is 64 for f4
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
ckpt_path: configs/first_stage_models/vq-f4/model.yaml
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config: __is_unconditional__
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 48
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.lsun.LSUNBedroomsTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.lsun.LSUNBedroomsValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 5000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: False
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
@ -1,91 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 5.0e-5 # set to target_lr by starting main.py with '--scale_lr False'
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0155
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l1
|
|
||||||
first_stage_key: "image"
|
|
||||||
cond_stage_key: "image"
|
|
||||||
image_size: 32
|
|
||||||
channels: 4
|
|
||||||
cond_stage_trainable: False
|
|
||||||
concat_mode: False
|
|
||||||
scale_by_std: True
|
|
||||||
monitor: 'val/loss_simple_ema'
|
|
||||||
|
|
||||||
scheduler_config: # 10000 warmup steps
|
|
||||||
target: ldm.lr_scheduler.LambdaLinearScheduler
|
|
||||||
params:
|
|
||||||
warm_up_steps: [10000]
|
|
||||||
cycle_lengths: [10000000000000]
|
|
||||||
f_start: [1.e-6]
|
|
||||||
f_max: [1.]
|
|
||||||
f_min: [ 1.]
|
|
||||||
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 32
|
|
||||||
in_channels: 4
|
|
||||||
out_channels: 4
|
|
||||||
model_channels: 192
|
|
||||||
attention_resolutions: [ 1, 2, 4, 8 ] # 32, 16, 8, 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult: [ 1,2,2,4,4 ] # 32, 16, 8, 4, 2
|
|
||||||
num_heads: 8
|
|
||||||
use_scale_shift_norm: True
|
|
||||||
resblock_updown: True
|
|
||||||
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
monitor: "val/rec_loss"
|
|
||||||
ckpt_path: "models/first_stage_models/kl-f8/model.ckpt"
|
|
||||||
ddconfig:
|
|
||||||
double_z: True
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult: [ 1,2,4,4 ] # num_down = len(ch_mult)-1
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: [ ]
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
|
|
||||||
cond_stage_config: "__is_unconditional__"
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 96
|
|
||||||
num_workers: 5
|
|
||||||
wrap: False
|
|
||||||
train:
|
|
||||||
target: ldm.data.lsun.LSUNChurchesTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.lsun.LSUNChurchesValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
|
|
||||||
lightning:
|
|
||||||
callbacks:
|
|
||||||
image_logger:
|
|
||||||
target: main.ImageLogger
|
|
||||||
params:
|
|
||||||
batch_frequency: 5000
|
|
||||||
max_images: 8
|
|
||||||
increase_log_steps: False
|
|
||||||
|
|
||||||
|
|
||||||
trainer:
|
|
||||||
benchmark: True
|
|
@ -1,71 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 5.0e-05
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.00085
|
|
||||||
linear_end: 0.012
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: caption
|
|
||||||
image_size: 32
|
|
||||||
channels: 4
|
|
||||||
cond_stage_trainable: true
|
|
||||||
conditioning_key: crossattn
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
scale_factor: 0.18215
|
|
||||||
use_ema: False
|
|
||||||
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 32
|
|
||||||
in_channels: 4
|
|
||||||
out_channels: 4
|
|
||||||
model_channels: 320
|
|
||||||
attention_resolutions:
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
- 1
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 4
|
|
||||||
num_heads: 8
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 1
|
|
||||||
context_dim: 1280
|
|
||||||
use_checkpoint: true
|
|
||||||
legacy: False
|
|
||||||
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.BERTEmbedder
|
|
||||||
params:
|
|
||||||
n_embed: 1280
|
|
||||||
n_layer: 32
|
|
@ -1,29 +1,36 @@
|
|||||||
# This file describes the alternative machine learning models
|
# This file describes the alternative machine learning models
|
||||||
# available to the dream script.
|
# available to InvokeAI script.
|
||||||
#
|
#
|
||||||
# To add a new model, follow the examples below. Each
|
# To add a new model, follow the examples below. Each
|
||||||
# model requires a model config file, a weights file,
|
# model requires a model config file, a weights file,
|
||||||
# and the width and height of the images it
|
# and the width and height of the images it
|
||||||
# was trained on.
|
# was trained on.
|
||||||
stable-diffusion-1.4:
|
stable-diffusion-1.4:
|
||||||
config: configs/stable-diffusion/v1-inference.yaml
|
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||||
weights: models/ldm/stable-diffusion-v1/model.ckpt
|
weights: ./models/ldm/stable-diffusion-v1/sd-v1-4.ckpt
|
||||||
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||||
description: Stable Diffusion inference model version 1.4
|
description: The original Stable Diffusion version 1.4 weight file (4.27 GB)
|
||||||
width: 512
|
|
||||||
height: 512
|
|
||||||
default: true
|
|
||||||
inpainting-1.5:
|
|
||||||
description: runwayML tuned inpainting model v1.5
|
|
||||||
weights: models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
|
|
||||||
config: configs/stable-diffusion/v1-inpainting-inference.yaml
|
|
||||||
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
|
||||||
width: 512
|
width: 512
|
||||||
height: 512
|
height: 512
|
||||||
stable-diffusion-1.5:
|
stable-diffusion-1.5:
|
||||||
config: configs/stable-diffusion/v1-inference.yaml
|
description: The newest Stable Diffusion version 1.5 weight file (4.27 GB)
|
||||||
weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
|
weights: ./models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt
|
||||||
# vae: models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||||
description: Stable Diffusion inference model version 1.5
|
|
||||||
width: 512
|
width: 512
|
||||||
height: 512
|
height: 512
|
||||||
|
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||||
|
default: true
|
||||||
|
inpainting-1.5:
|
||||||
|
description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB)
|
||||||
|
weights: ./models/ldm/stable-diffusion-v1/sd-v1-5-inpainting.ckpt
|
||||||
|
config: ./configs/stable-diffusion/v1-inpainting-inference.yaml
|
||||||
|
width: 512
|
||||||
|
height: 512
|
||||||
|
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||||
|
waifu-diffusion-1.3:
|
||||||
|
description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)
|
||||||
|
weights: ./models/ldm/stable-diffusion-v1/model-epoch09-float32.ckpt
|
||||||
|
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||||
|
width: 512
|
||||||
|
height: 512
|
||||||
|
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||||
|
@ -1,68 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 0.0001
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.015
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: jpg
|
|
||||||
cond_stage_key: nix
|
|
||||||
image_size: 48
|
|
||||||
channels: 16
|
|
||||||
cond_stage_trainable: false
|
|
||||||
conditioning_key: crossattn
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
scale_by_std: false
|
|
||||||
scale_factor: 0.22765929
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 48
|
|
||||||
in_channels: 16
|
|
||||||
out_channels: 16
|
|
||||||
model_channels: 448
|
|
||||||
attention_resolutions:
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
- 1
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
use_scale_shift_norm: false
|
|
||||||
resblock_updown: false
|
|
||||||
num_head_channels: 32
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 1
|
|
||||||
context_dim: 768
|
|
||||||
use_checkpoint: true
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: val/rec_loss
|
|
||||||
embed_dim: 16
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 16
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 16
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: torch.nn.Identity
|
|
@ -385,7 +385,7 @@ automatically.
|
|||||||
Example:
|
Example:
|
||||||
|
|
||||||
<pre>
|
<pre>
|
||||||
invoke> <b>!import_model models/ldm/stable-diffusion-v1/ model-epoch08-float16.ckpt</b>
|
invoke> <b>!import_model models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt</b>
|
||||||
>> Model import in process. Please enter the values needed to configure this model:
|
>> Model import in process. Please enter the values needed to configure this model:
|
||||||
|
|
||||||
Name for this model: <b>waifu-diffusion</b>
|
Name for this model: <b>waifu-diffusion</b>
|
||||||
|
267
docs/installation/INSTALLING_MODELS.md
Normal file
267
docs/installation/INSTALLING_MODELS.md
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
---
|
||||||
|
title: Installing Models
|
||||||
|
---
|
||||||
|
|
||||||
|
# :octicons-paintbrush-16: Installing Models
|
||||||
|
|
||||||
|
## Model Weight Files
|
||||||
|
|
||||||
|
The model weight files ('*.ckpt') are the Stable Diffusion "secret
|
||||||
|
sauce". They are the product of training the AI on millions of
|
||||||
|
captioned images gathered from multiple sources.
|
||||||
|
|
||||||
|
Originally there was only a single Stable Diffusion weights file,
|
||||||
|
which many people named `model.ckpt`. Now there are dozens or more
|
||||||
|
that have been "fine tuned" to provide particulary styles, genres, or
|
||||||
|
other features. InvokeAI allows you to install and run multiple model
|
||||||
|
weight files and switch between them quickly in the command-line and
|
||||||
|
web interfaces.
|
||||||
|
|
||||||
|
This manual will guide you through installing and configuring model
|
||||||
|
weight files.
|
||||||
|
|
||||||
|
## Base Models
|
||||||
|
|
||||||
|
InvokeAI comes with support for a good initial set of models listed in
|
||||||
|
the model configuration file `configs/models.yaml`. They are:
|
||||||
|
|
||||||
|
| Model | Weight File | Description | DOWNLOAD FROM |
|
||||||
|
| ---------------------- | ----------------------------- |--------------------------------- | ----------------|
|
||||||
|
| stable-diffusion-1.5 | v1-5-pruned-emaonly.ckpt | Most recent version of base Stable Diffusion model| https://huggingface.co/runwayml/stable-diffusion-v1-5 |
|
||||||
|
| stable-diffusion-1.4 | sd-v1-4.ckpt | Previous version of base Stable Diffusion model | https://huggingface.co/CompVis/stable-diffusion-v-1-4-original |
|
||||||
|
| inpainting-1.5 | sd-v1-5-inpainting.ckpt | Stable Diffusion 1.5 model specialized for inpainting | https://huggingface.co/runwayml/stable-diffusion-inpainting |
|
||||||
|
| waifu-diffusion-1.3 | model-epoch09-float32.ckpt | Stable Diffusion 1.4 trained to produce anime images | https://huggingface.co/hakurei/waifu-diffusion-v1-3 |
|
||||||
|
| <all models> | vae-ft-mse-840000-ema-pruned.ckpt | A fine-tune file add-on file that improves face generation | https://huggingface.co/stabilityai/sd-vae-ft-mse-original/ |
|
||||||
|
|
||||||
|
|
||||||
|
Note that these files are covered by an "Ethical AI" license which
|
||||||
|
forbids certain uses. You will need to create an account on the
|
||||||
|
Hugging Face website and accept the license terms before you can
|
||||||
|
access the files.
|
||||||
|
|
||||||
|
The predefined configuration file for InvokeAI (located at
|
||||||
|
`configs/models.yaml`) provides entries for each of these weights
|
||||||
|
files. `stable-diffusion-1.5` is the default model used, and we
|
||||||
|
strongly recommend that you install this weights file if nothing else.
|
||||||
|
|
||||||
|
## Community-Contributed Models
|
||||||
|
|
||||||
|
There are too many to list here and more are being contributed every
|
||||||
|
day. Hugging Face maintains a [fast-growing
|
||||||
|
repository](https://huggingface.co/sd-concepts-library) of fine-tune
|
||||||
|
(".bin") models that can be imported into InvokeAI by passing the
|
||||||
|
`--embedding_path` option to the `invoke.py` command.
|
||||||
|
|
||||||
|
[This page](https://rentry.org/sdmodels) hosts a large list of
|
||||||
|
official and unofficial Stable Diffusion models and where they can be
|
||||||
|
obtained.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
There are three ways to install weights files:
|
||||||
|
|
||||||
|
1. During InvokeAI installation, the `preload_models.py` script can
|
||||||
|
download them for you.
|
||||||
|
|
||||||
|
2. You can use the command-line interface (CLI) to import, configure
|
||||||
|
and modify new models files.
|
||||||
|
|
||||||
|
3. You can download the files manually and add the appropriate entries
|
||||||
|
to `models.yaml`.
|
||||||
|
|
||||||
|
### Installation via `preload_models.py`
|
||||||
|
|
||||||
|
This is the most automatic way. Run `scripts/preload_models.py` from
|
||||||
|
the console. It will ask you to select which models to download and
|
||||||
|
lead you through the steps of setting up a Hugging Face account if you
|
||||||
|
haven't done so already.
|
||||||
|
|
||||||
|
To start, from within the InvokeAI directory run the command `python
|
||||||
|
scripts/preload_models.py` (Linux/MacOS) or `python
|
||||||
|
scripts\preload_models.py` (Windows):
|
||||||
|
|
||||||
|
```
|
||||||
|
Loading Python libraries...
|
||||||
|
|
||||||
|
** INTRODUCTION **
|
||||||
|
Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
|
||||||
|
and other large models that are needed for text to image generation. At any point you may interrupt
|
||||||
|
this program and resume later.
|
||||||
|
|
||||||
|
** WEIGHT SELECTION **
|
||||||
|
Would you like to download the Stable Diffusion model weights now? [y]
|
||||||
|
|
||||||
|
Choose the weight file(s) you wish to download. Before downloading you
|
||||||
|
will be given the option to view and change your selections.
|
||||||
|
|
||||||
|
[1] stable-diffusion-1.5:
|
||||||
|
The newest Stable Diffusion version 1.5 weight file (4.27 GB) (recommended)
|
||||||
|
Download? [y]
|
||||||
|
[2] inpainting-1.5:
|
||||||
|
RunwayML SD 1.5 model optimized for inpainting (4.27 GB) (recommended)
|
||||||
|
Download? [y]
|
||||||
|
[3] stable-diffusion-1.4:
|
||||||
|
The original Stable Diffusion version 1.4 weight file (4.27 GB)
|
||||||
|
Download? [n] n
|
||||||
|
[4] waifu-diffusion-1.3:
|
||||||
|
Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)
|
||||||
|
Download? [n] y
|
||||||
|
[5] ft-mse-improved-autoencoder-840000:
|
||||||
|
StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) (recommended)
|
||||||
|
Download? [y] y
|
||||||
|
The following weight files will be downloaded:
|
||||||
|
[1] stable-diffusion-1.5*
|
||||||
|
[2] inpainting-1.5
|
||||||
|
[4] waifu-diffusion-1.3
|
||||||
|
[5] ft-mse-improved-autoencoder-840000
|
||||||
|
*default
|
||||||
|
Ok to download? [y]
|
||||||
|
** LICENSE AGREEMENT FOR WEIGHT FILES **
|
||||||
|
|
||||||
|
1. To download the Stable Diffusion weight files you need to read and accept the
|
||||||
|
CreativeML Responsible AI license. If you have not already done so, please
|
||||||
|
create an account using the "Sign Up" button:
|
||||||
|
|
||||||
|
https://huggingface.co
|
||||||
|
|
||||||
|
You will need to verify your email address as part of the HuggingFace
|
||||||
|
registration process.
|
||||||
|
|
||||||
|
2. After creating the account, login under your account and accept
|
||||||
|
the license terms located here:
|
||||||
|
|
||||||
|
https://huggingface.co/CompVis/stable-diffusion-v-1-4-original
|
||||||
|
|
||||||
|
Press <enter> when you are ready to continue:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
When the script is complete, you will find the downloaded weights
|
||||||
|
files in `models/ldm/stable-diffusion-v1` and a matching configuration
|
||||||
|
file in `configs/models.yaml`.
|
||||||
|
|
||||||
|
You can run the script again to add any models you didn't select the
|
||||||
|
first time. Note that as a safety measure the script will _never_
|
||||||
|
remove a previously-installed weights file. You will have to do this
|
||||||
|
manually.
|
||||||
|
|
||||||
|
### Installation via the CLI
|
||||||
|
|
||||||
|
You can install a new model, including any of the community-supported
|
||||||
|
ones, via the command-line client's `!import_model` command.
|
||||||
|
|
||||||
|
1. First download the desired model weights file and place it under `models/ldm/stable-diffusion-v1/`.
|
||||||
|
You may rename the weights file to something more memorable if you wish. Record the path of the
|
||||||
|
weights file (e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`)
|
||||||
|
|
||||||
|
2. Launch the `invoke.py` CLI with `python scripts/invoke.py`.
|
||||||
|
|
||||||
|
3. At the `invoke>` command-line, enter the command `!import_model <path to model>`.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
`invoke> !import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
|
||||||
|
|
||||||
|
(Hint - the CLI supports file path autocompletion. Type a bit of the path
|
||||||
|
name and hit <tab> in order to get a choice of possible completions.)
|
||||||
|
|
||||||
|
4. Follow the wizard's instructions to complete installation as shown in the example
|
||||||
|
here:
|
||||||
|
|
||||||
|
```
|
||||||
|
invoke> <b>!import_model models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt</b>
|
||||||
|
>> Model import in process. Please enter the values needed to configure this model:
|
||||||
|
|
||||||
|
Name for this model: <b>arabian-nights</b>
|
||||||
|
Description of this model: <b>Arabian Nights Fine Tune v1.0</b>
|
||||||
|
Configuration file for this model: <b>configs/stable-diffusion/v1-inference.yaml</b>
|
||||||
|
Default image width: <b>512</b>
|
||||||
|
Default image height: <b>512</b>
|
||||||
|
>> New configuration:
|
||||||
|
arabian-nights:
|
||||||
|
config: configs/stable-diffusion/v1-inference.yaml
|
||||||
|
description: Arabian Nights Fine Tune v1.0
|
||||||
|
height: 512
|
||||||
|
weights: models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
|
||||||
|
width: 512
|
||||||
|
OK to import [n]? <b>y</b>
|
||||||
|
>> Caching model stable-diffusion-1.4 in system RAM
|
||||||
|
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
|
||||||
|
| LatentDiffusion: Running in eps-prediction mode
|
||||||
|
| DiffusionWrapper has 859.52 M params.
|
||||||
|
| Making attention of type 'vanilla' with 512 in_channels
|
||||||
|
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||||
|
| Making attention of type 'vanilla' with 512 in_channels
|
||||||
|
| Using faster float16 precision
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
If you've previously installed the fine-tune VAE file `vae-ft-mse-840000-ema-pruned.ckpt`,
|
||||||
|
the wizard will also ask you if you want to add this VAE to the model.
|
||||||
|
|
||||||
|
The appropriate entry for this model will be added to `configs/models.yaml` and it will
|
||||||
|
be available to use in the CLI immediately.
|
||||||
|
|
||||||
|
The CLI has additional commands for switching among, viewing, editing,
|
||||||
|
deleting the available models. These are described in [Command Line
|
||||||
|
Client](../features/CLI.md#model-selection-and-importation), but the two most
|
||||||
|
frequently-used are `!models` and `!switch <name of model>`. The first
|
||||||
|
prints a table of models that InvokeAI knows about and their load
|
||||||
|
status. The second will load the requested model and lets you switch
|
||||||
|
back and forth quickly among loaded models.
|
||||||
|
|
||||||
|
### Manually editing of `configs/models.yaml`
|
||||||
|
|
||||||
|
If you are comfortable with a text editor then you may simply edit
|
||||||
|
`models.yaml` directly.
|
||||||
|
|
||||||
|
First you need to download the desired .ckpt file and place it in
|
||||||
|
`models/ldm/stable-diffusion-v1` as descirbed in step #1 in the
|
||||||
|
previous section. Record the path to the weights file,
|
||||||
|
e.g. `models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt`
|
||||||
|
|
||||||
|
Then using a **text** editor (e.g. the Windows Notepad application),
|
||||||
|
open the file `configs/models.yaml`, and add a new stanza that follows
|
||||||
|
this model:
|
||||||
|
|
||||||
|
```
|
||||||
|
arabian-nights-1.0:
|
||||||
|
description: A great fine-tune in Arabian Nights style
|
||||||
|
weights: ./models/ldm/stable-diffusion-v1/arabian-nights-1.0.ckpt
|
||||||
|
config: ./configs/stable-diffusion/v1-inference.yaml
|
||||||
|
width: 512
|
||||||
|
height: 512
|
||||||
|
vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt
|
||||||
|
default: false
|
||||||
|
```
|
||||||
|
|
||||||
|
* arabian-nights-1.0
|
||||||
|
- This is the name of the model that you will refer to from within the
|
||||||
|
CLI and the WebGUI when you need to load and use the model.
|
||||||
|
|
||||||
|
* description
|
||||||
|
- Any description that you want to add to the model to remind you what
|
||||||
|
it is.
|
||||||
|
|
||||||
|
* weights
|
||||||
|
- Relative path to the .ckpt weights file for this model.
|
||||||
|
|
||||||
|
* config
|
||||||
|
- This is the confusingly-named configuration file for the model itself.
|
||||||
|
Use `./configs/stable-diffusion/v1-inference.yaml` unless the model happens
|
||||||
|
to need a custom configuration, in which case the place you downloaded it
|
||||||
|
from will tell you what to use instead. For example, the runwayML custom
|
||||||
|
inpainting model requires the file `configs/stable-diffusion/v1-inpainting-inference.yaml`.
|
||||||
|
This is already inclued in the InvokeAI distribution and is configured automatically
|
||||||
|
for you by the `preload_models.py` script.
|
||||||
|
|
||||||
|
* vae
|
||||||
|
- If you want to add a VAE file to the model, then enter its path here.
|
||||||
|
|
||||||
|
* width, height
|
||||||
|
- This is the width and height of the images used to train the model.
|
||||||
|
Currently they are always 512 and 512.
|
||||||
|
|
||||||
|
Save the `models.yaml` and relaunch InvokeAI. The new model should now be
|
||||||
|
available for your use.
|
||||||
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: Linux
|
title: Manual Installation, Linux
|
||||||
---
|
---
|
||||||
|
|
||||||
# :fontawesome-brands-linux: Linux
|
# :fontawesome-brands-linux: Linux
|
||||||
@ -63,24 +63,16 @@ title: Linux
|
|||||||
model loading scheme to allow the script to work on GPU machines that are not
|
model loading scheme to allow the script to work on GPU machines that are not
|
||||||
internet connected. See [Preload Models](../features/OTHER.md#preload-models)
|
internet connected. See [Preload Models](../features/OTHER.md#preload-models)
|
||||||
|
|
||||||
7. Now you need to install the weights for the stable diffusion model.
|
7. Install the weights for the stable diffusion model.
|
||||||
|
|
||||||
- For running with the released weights, you will first need to set up an acount
|
- Sign up at https://huggingface.co
|
||||||
with [Hugging Face](https://huggingface.co).
|
- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
||||||
- Use your credentials to log in, and then point your browser [here](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original).
|
- Accept the terms and click Access Repository
|
||||||
- You may be asked to sign a license agreement at this point.
|
- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
|
||||||
- Click on "Files and versions" near the top of the page, and then click on the
|
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
|
||||||
file named "sd-v1-4.ckpt". You'll be taken to a page that prompts you to click
|
|
||||||
the "download" link. Save the file somewhere safe on your local machine.
|
|
||||||
|
|
||||||
Now run the following commands from within the stable-diffusion directory.
|
There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
|
||||||
This will create a symbolic link from the stable-diffusion model.ckpt file, to
|
for details.
|
||||||
the true location of the `sd-v1-4.ckpt` file.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
(invokeai) ~/InvokeAI$ mkdir -p models/ldm/stable-diffusion-v1
|
|
||||||
(invokeai) ~/InvokeAI$ ln -sf /path/to/sd-v1-4.ckpt models/ldm/stable-diffusion-v1/model.ckpt
|
|
||||||
```
|
|
||||||
|
|
||||||
8. Start generating images!
|
8. Start generating images!
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: macOS
|
title: Manual Installation, macOS
|
||||||
---
|
---
|
||||||
|
|
||||||
# :fontawesome-brands-apple: macOS
|
# :fontawesome-brands-apple: macOS
|
||||||
@ -24,9 +24,15 @@ First you need to download a large checkpoint file.
|
|||||||
1. Sign up at https://huggingface.co
|
1. Sign up at https://huggingface.co
|
||||||
2. Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
2. Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
||||||
3. Accept the terms and click Access Repository
|
3. Accept the terms and click Access Repository
|
||||||
4. Download [sd-v1-4.ckpt (4.27 GB)](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/blob/main/sd-v1-4.ckpt) and note where you have saved it (probably the Downloads folder). You may want to move it somewhere else for longer term storage - SD needs this file to run.
|
4. Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
|
||||||
|
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
|
||||||
|
|
||||||
While that is downloading, open Terminal and run the following commands one at a time, reading the comments and taking care to run the appropriate command for your Mac's architecture (Intel or M1).
|
There are many other models that you can try. Please see [../features/INSTALLING_MODELS.md]
|
||||||
|
for details.
|
||||||
|
|
||||||
|
While that is downloading, open Terminal and run the following
|
||||||
|
commands one at a time, reading the comments and taking care to run
|
||||||
|
the appropriate command for your Mac's architecture (Intel or M1).
|
||||||
|
|
||||||
!!! todo "Homebrew"
|
!!! todo "Homebrew"
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: Windows
|
title: Manual Installation, Windows
|
||||||
---
|
---
|
||||||
|
|
||||||
# :fontawesome-brands-windows: Windows
|
# :fontawesome-brands-windows: Windows
|
||||||
@ -83,23 +83,14 @@ in the wiki
|
|||||||
|
|
||||||
8. Now you need to install the weights for the big stable diffusion model.
|
8. Now you need to install the weights for the big stable diffusion model.
|
||||||
|
|
||||||
1. For running with the released weights, you will first need to set up an acount with Hugging Face (https://huggingface.co).
|
- Sign up at https://huggingface.co
|
||||||
2. Use your credentials to log in, and then point your browser at https://huggingface.co/CompVis/stable-diffusion-v-1-4-original.
|
- Go to the [Stable diffusion diffusion model page](https://huggingface.co/CompVis/stable-diffusion-v-1-4-original)
|
||||||
3. You may be asked to sign a license agreement at this point.
|
- Accept the terms and click Access Repository
|
||||||
4. Click on "Files and versions" near the top of the page, and then click on the file named `sd-v1-4.ckpt`. You'll be taken to a page that
|
- Download [v1-5-pruned-emaonly.ckpt (4.27 GB)](https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt)
|
||||||
prompts you to click the "download" link. Now save the file somewhere safe on your local machine.
|
and move it into this directory under `models/ldm/stable_diffusion_v1/v1-5-pruned-emaonly.ckpt`
|
||||||
5. The weight file is >4 GB in size, so
|
|
||||||
downloading may take a while.
|
|
||||||
|
|
||||||
Now run the following commands from **within the InvokeAI directory** to copy the weights file to the right place:
|
There are many other models that you can use. Please see [../features/INSTALLING_MODELS.md]
|
||||||
|
for details.
|
||||||
```batch
|
|
||||||
mkdir -p models\ldm\stable-diffusion-v1
|
|
||||||
copy C:\path\to\sd-v1-4.ckpt models\ldm\stable-diffusion-v1\model.ckpt
|
|
||||||
```
|
|
||||||
|
|
||||||
Please replace `C:\path\to\sd-v1.4.ckpt` with the correct path to wherever you stashed this file. If you prefer not to copy or move the .ckpt file,
|
|
||||||
you may instead create a shortcut to it from within `models\ldm\stable-diffusion-v1\`.
|
|
||||||
|
|
||||||
9. Start generating images!
|
9. Start generating images!
|
||||||
|
|
||||||
|
@ -227,11 +227,14 @@ class ModelCache(object):
|
|||||||
print(' | Using more accurate float32 precision')
|
print(' | Using more accurate float32 precision')
|
||||||
|
|
||||||
# look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
|
# look and load a matching vae file. Code borrowed from AUTOMATIC1111 modules/sd_models.py
|
||||||
if vae and os.path.exists(vae):
|
if vae:
|
||||||
|
if os.path.exists(vae):
|
||||||
print(f' | Loading VAE weights from: {vae}')
|
print(f' | Loading VAE weights from: {vae}')
|
||||||
vae_ckpt = torch.load(vae, map_location="cpu")
|
vae_ckpt = torch.load(vae, map_location="cpu")
|
||||||
vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
|
vae_dict = {k: v for k, v in vae_ckpt["state_dict"].items() if k[0:4] != "loss"}
|
||||||
model.first_stage_model.load_state_dict(vae_dict, strict=False)
|
model.first_stage_model.load_state_dict(vae_dict, strict=False)
|
||||||
|
else:
|
||||||
|
print(f' | VAE file {vae} not found. Skipping.')
|
||||||
|
|
||||||
model.to(self.device)
|
model.to(self.device)
|
||||||
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
|
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
|
||||||
@ -281,7 +284,7 @@ class ModelCache(object):
|
|||||||
Returns the preamble for the config file.
|
Returns the preamble for the config file.
|
||||||
'''
|
'''
|
||||||
return '''# This file describes the alternative machine learning models
|
return '''# This file describes the alternative machine learning models
|
||||||
# available to the dream script.
|
# available to InvokeAI script.
|
||||||
#
|
#
|
||||||
# To add a new model, follow the examples below. Each
|
# To add a new model, follow the examples below. Each
|
||||||
# model requires a model config file, a weights file,
|
# model requires a model config file, a weights file,
|
||||||
|
@ -1,44 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: val/rec_loss
|
|
||||||
embed_dim: 16
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 1.0e-06
|
|
||||||
disc_weight: 0.5
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 16
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 16
|
|
||||||
dropout: 0.0
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 6
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,46 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: val/rec_loss
|
|
||||||
embed_dim: 64
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 1.0e-06
|
|
||||||
disc_weight: 0.5
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 64
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 16
|
|
||||||
- 8
|
|
||||||
dropout: 0.0
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 6
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,41 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: val/rec_loss
|
|
||||||
embed_dim: 3
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 1.0e-06
|
|
||||||
disc_weight: 0.5
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 10
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,42 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
monitor: val/rec_loss
|
|
||||||
embed_dim: 4
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.LPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_start: 50001
|
|
||||||
kl_weight: 1.0e-06
|
|
||||||
disc_weight: 0.5
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 4
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,49 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.VQModel
|
|
||||||
params:
|
|
||||||
embed_dim: 8
|
|
||||||
n_embed: 16384
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 8
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 16
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_conditional: false
|
|
||||||
disc_in_channels: 3
|
|
||||||
disc_start: 250001
|
|
||||||
disc_weight: 0.75
|
|
||||||
disc_num_layers: 2
|
|
||||||
codebook_weight: 1.0
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 14
|
|
||||||
num_workers: 20
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,46 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.VQModel
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
monitor: val/rec_loss
|
|
||||||
|
|
||||||
ddconfig:
|
|
||||||
attn_type: none
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_conditional: false
|
|
||||||
disc_in_channels: 3
|
|
||||||
disc_start: 11
|
|
||||||
disc_weight: 0.75
|
|
||||||
codebook_weight: 1.0
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 8
|
|
||||||
num_workers: 12
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
crop_size: 256
|
|
@ -1,45 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.VQModel
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
monitor: val/rec_loss
|
|
||||||
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_conditional: false
|
|
||||||
disc_in_channels: 3
|
|
||||||
disc_start: 0
|
|
||||||
disc_weight: 0.75
|
|
||||||
codebook_weight: 1.0
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 8
|
|
||||||
num_workers: 16
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
crop_size: 256
|
|
@ -1,48 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.VQModel
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
n_embed: 256
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 32
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_conditional: false
|
|
||||||
disc_in_channels: 3
|
|
||||||
disc_start: 250001
|
|
||||||
disc_weight: 0.75
|
|
||||||
codebook_weight: 1.0
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 10
|
|
||||||
num_workers: 20
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,48 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 4.5e-06
|
|
||||||
target: ldm.models.autoencoder.VQModel
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
n_embed: 16384
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 32
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
|
|
||||||
params:
|
|
||||||
disc_conditional: false
|
|
||||||
disc_in_channels: 3
|
|
||||||
disc_num_layers: 2
|
|
||||||
disc_start: 1
|
|
||||||
disc_weight: 0.6
|
|
||||||
codebook_weight: 1.0
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 10
|
|
||||||
num_workers: 20
|
|
||||||
wrap: true
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesTrain
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.FullOpenImagesValidation
|
|
||||||
params:
|
|
||||||
size: 384
|
|
||||||
crop_size: 256
|
|
@ -1,80 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 1.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0155
|
|
||||||
log_every_t: 100
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l2
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: LR_image
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
concat_mode: true
|
|
||||||
cond_stage_trainable: false
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 6
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 160
|
|
||||||
attention_resolutions:
|
|
||||||
- 16
|
|
||||||
- 8
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 64
|
|
||||||
wrap: false
|
|
||||||
num_workers: 12
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.SuperresOpenImagesAdvancedTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: bsrgan_light
|
|
||||||
downscale_f: 4
|
|
||||||
min_crop_f: 0.5
|
|
||||||
max_crop_f: 1.0
|
|
||||||
random_crop: true
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.SuperresOpenImagesAdvancedValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
degradation: bsrgan_light
|
|
||||||
downscale_f: 4
|
|
||||||
min_crop_f: 0.5
|
|
||||||
max_crop_f: 1.0
|
|
||||||
random_crop: true
|
|
@ -1,70 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: class_label
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
cond_stage_trainable: false
|
|
||||||
concat_mode: false
|
|
||||||
monitor: val/loss
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 224
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config: __is_unconditional__
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 48
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.faceshq.CelebAHQTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.faceshq.CelebAHQValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
@ -1,80 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 1.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: class_label
|
|
||||||
image_size: 32
|
|
||||||
channels: 4
|
|
||||||
cond_stage_trainable: true
|
|
||||||
conditioning_key: crossattn
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 32
|
|
||||||
in_channels: 4
|
|
||||||
out_channels: 4
|
|
||||||
model_channels: 256
|
|
||||||
attention_resolutions:
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
- 1
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 1
|
|
||||||
context_dim: 512
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
n_embed: 16384
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions:
|
|
||||||
- 32
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.ClassEmbedder
|
|
||||||
params:
|
|
||||||
embed_dim: 512
|
|
||||||
key: class_label
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 64
|
|
||||||
num_workers: 12
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.imagenet.ImageNetTrain
|
|
||||||
params:
|
|
||||||
config:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.imagenet.ImageNetValidation
|
|
||||||
params:
|
|
||||||
config:
|
|
||||||
size: 256
|
|
@ -1,70 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: class_label
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
cond_stage_trainable: false
|
|
||||||
concat_mode: false
|
|
||||||
monitor: val/loss
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 224
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config: __is_unconditional__
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 42
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.faceshq.FFHQTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.faceshq.FFHQValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
@ -1,67 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 1.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0205
|
|
||||||
log_every_t: 100
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l1
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: masked_image
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
concat_mode: true
|
|
||||||
monitor: val/loss
|
|
||||||
scheduler_config:
|
|
||||||
target: ldm.lr_scheduler.LambdaWarmUpCosineScheduler
|
|
||||||
params:
|
|
||||||
verbosity_interval: 0
|
|
||||||
warm_up_steps: 1000
|
|
||||||
max_decay_steps: 50000
|
|
||||||
lr_start: 0.001
|
|
||||||
lr_max: 0.1
|
|
||||||
lr_min: 0.0001
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 7
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 256
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_heads: 8
|
|
||||||
resblock_updown: true
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
attn_type: none
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: ldm.modules.losses.contperceptual.DummyLoss
|
|
||||||
cond_stage_config: __is_first_stage__
|
|
@ -1,81 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0205
|
|
||||||
log_every_t: 100
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l1
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: coordinates_bbox
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
conditioning_key: crossattn
|
|
||||||
cond_stage_trainable: true
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 128
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 3
|
|
||||||
context_dim: 512
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.BERTEmbedder
|
|
||||||
params:
|
|
||||||
n_embed: 512
|
|
||||||
n_layer: 16
|
|
||||||
vocab_size: 8192
|
|
||||||
max_seq_len: 92
|
|
||||||
use_tokenizer: false
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 24
|
|
||||||
wrap: false
|
|
||||||
num_workers: 10
|
|
||||||
train:
|
|
||||||
target: ldm.data.openimages.OpenImagesBBoxTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.openimages.OpenImagesBBoxValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
@ -1,70 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: class_label
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
cond_stage_trainable: false
|
|
||||||
concat_mode: false
|
|
||||||
monitor: val/loss
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 224
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 4
|
|
||||||
num_head_channels: 32
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config: __is_unconditional__
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 48
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.lsun.LSUNBedroomsTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.lsun.LSUNBedroomsValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
@ -1,92 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 5.0e-05
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0155
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l1
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: image
|
|
||||||
image_size: 32
|
|
||||||
channels: 4
|
|
||||||
cond_stage_trainable: false
|
|
||||||
concat_mode: false
|
|
||||||
scale_by_std: true
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
scheduler_config:
|
|
||||||
target: ldm.lr_scheduler.LambdaLinearScheduler
|
|
||||||
params:
|
|
||||||
warm_up_steps:
|
|
||||||
- 10000
|
|
||||||
cycle_lengths:
|
|
||||||
- 10000000000000
|
|
||||||
f_start:
|
|
||||||
- 1.0e-06
|
|
||||||
f_max:
|
|
||||||
- 1.0
|
|
||||||
f_min:
|
|
||||||
- 1.0
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 32
|
|
||||||
in_channels: 4
|
|
||||||
out_channels: 4
|
|
||||||
model_channels: 192
|
|
||||||
attention_resolutions:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 8
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 4
|
|
||||||
num_heads: 8
|
|
||||||
use_scale_shift_norm: true
|
|
||||||
resblock_updown: true
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.AutoencoderKL
|
|
||||||
params:
|
|
||||||
embed_dim: 4
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: true
|
|
||||||
z_channels: 4
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
|
|
||||||
cond_stage_config: '__is_unconditional__'
|
|
||||||
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 96
|
|
||||||
num_workers: 5
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.lsun.LSUNChurchesTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.lsun.LSUNChurchesValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
@ -1,59 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 1.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0205
|
|
||||||
log_every_t: 100
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l1
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: segmentation
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
concat_mode: true
|
|
||||||
cond_stage_trainable: true
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 6
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 128
|
|
||||||
attention_resolutions:
|
|
||||||
- 32
|
|
||||||
- 16
|
|
||||||
- 8
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 4
|
|
||||||
- 8
|
|
||||||
num_heads: 8
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.SpatialRescaler
|
|
||||||
params:
|
|
||||||
n_stages: 2
|
|
||||||
in_channels: 182
|
|
||||||
out_channels: 3
|
|
@ -1,78 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 1.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0205
|
|
||||||
log_every_t: 100
|
|
||||||
timesteps: 1000
|
|
||||||
loss_type: l1
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: segmentation
|
|
||||||
image_size: 128
|
|
||||||
channels: 3
|
|
||||||
concat_mode: true
|
|
||||||
cond_stage_trainable: true
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 128
|
|
||||||
in_channels: 6
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 128
|
|
||||||
attention_resolutions:
|
|
||||||
- 32
|
|
||||||
- 16
|
|
||||||
- 8
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 4
|
|
||||||
- 8
|
|
||||||
num_heads: 8
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
monitor: val/rec_loss
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.SpatialRescaler
|
|
||||||
params:
|
|
||||||
n_stages: 2
|
|
||||||
in_channels: 182
|
|
||||||
out_channels: 3
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 8
|
|
||||||
wrap: false
|
|
||||||
num_workers: 10
|
|
||||||
train:
|
|
||||||
target: ldm.data.landscapes.RFWTrain
|
|
||||||
params:
|
|
||||||
size: 768
|
|
||||||
crop_size: 512
|
|
||||||
segmentation_to_float32: true
|
|
||||||
validation:
|
|
||||||
target: ldm.data.landscapes.RFWValidation
|
|
||||||
params:
|
|
||||||
size: 768
|
|
||||||
crop_size: 512
|
|
||||||
segmentation_to_float32: true
|
|
2
models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt
Normal file
2
models/ldm/stable-diffusion-v1/place-ckpt-files-here.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
See docs/features/INSTALLING_MODELS.md for how to populate this
|
||||||
|
directory with one or more Stable Diffusion model weight files.
|
@ -1,77 +0,0 @@
|
|||||||
model:
|
|
||||||
base_learning_rate: 2.0e-06
|
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
||||||
params:
|
|
||||||
linear_start: 0.0015
|
|
||||||
linear_end: 0.0195
|
|
||||||
num_timesteps_cond: 1
|
|
||||||
log_every_t: 200
|
|
||||||
timesteps: 1000
|
|
||||||
first_stage_key: image
|
|
||||||
cond_stage_key: caption
|
|
||||||
image_size: 64
|
|
||||||
channels: 3
|
|
||||||
cond_stage_trainable: true
|
|
||||||
conditioning_key: crossattn
|
|
||||||
monitor: val/loss_simple_ema
|
|
||||||
unet_config:
|
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
||||||
params:
|
|
||||||
image_size: 64
|
|
||||||
in_channels: 3
|
|
||||||
out_channels: 3
|
|
||||||
model_channels: 192
|
|
||||||
attention_resolutions:
|
|
||||||
- 8
|
|
||||||
- 4
|
|
||||||
- 2
|
|
||||||
num_res_blocks: 2
|
|
||||||
channel_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 3
|
|
||||||
- 5
|
|
||||||
num_head_channels: 32
|
|
||||||
use_spatial_transformer: true
|
|
||||||
transformer_depth: 1
|
|
||||||
context_dim: 640
|
|
||||||
first_stage_config:
|
|
||||||
target: ldm.models.autoencoder.VQModelInterface
|
|
||||||
params:
|
|
||||||
embed_dim: 3
|
|
||||||
n_embed: 8192
|
|
||||||
ddconfig:
|
|
||||||
double_z: false
|
|
||||||
z_channels: 3
|
|
||||||
resolution: 256
|
|
||||||
in_channels: 3
|
|
||||||
out_ch: 3
|
|
||||||
ch: 128
|
|
||||||
ch_mult:
|
|
||||||
- 1
|
|
||||||
- 2
|
|
||||||
- 4
|
|
||||||
num_res_blocks: 2
|
|
||||||
attn_resolutions: []
|
|
||||||
dropout: 0.0
|
|
||||||
lossconfig:
|
|
||||||
target: torch.nn.Identity
|
|
||||||
cond_stage_config:
|
|
||||||
target: ldm.modules.encoders.modules.BERTEmbedder
|
|
||||||
params:
|
|
||||||
n_embed: 640
|
|
||||||
n_layer: 32
|
|
||||||
data:
|
|
||||||
target: main.DataModuleFromConfig
|
|
||||||
params:
|
|
||||||
batch_size: 28
|
|
||||||
num_workers: 10
|
|
||||||
wrap: false
|
|
||||||
train:
|
|
||||||
target: ldm.data.previews.pytorch_dataset.PreviewsTrain
|
|
||||||
params:
|
|
||||||
size: 256
|
|
||||||
validation:
|
|
||||||
target: ldm.data.previews.pytorch_dataset.PreviewsValidation
|
|
||||||
params:
|
|
||||||
size: 256
|
|
@ -3,20 +3,369 @@
|
|||||||
# Before running stable-diffusion on an internet-isolated machine,
|
# Before running stable-diffusion on an internet-isolated machine,
|
||||||
# run this script from one with internet connectivity. The
|
# run this script from one with internet connectivity. The
|
||||||
# two machines must share a common .cache directory.
|
# two machines must share a common .cache directory.
|
||||||
from transformers import CLIPTokenizer, CLIPTextModel
|
#
|
||||||
|
# Coauthor: Kevin Turner http://github.com/keturn
|
||||||
|
#
|
||||||
|
print('Loading Python libraries...\n')
|
||||||
|
import argparse
|
||||||
import clip
|
import clip
|
||||||
from transformers import BertTokenizerFast, AutoFeatureExtractor
|
|
||||||
import sys
|
import sys
|
||||||
import transformers
|
import transformers
|
||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
import torch
|
import torch
|
||||||
import urllib.request
|
|
||||||
import zipfile
|
import zipfile
|
||||||
import traceback
|
import traceback
|
||||||
|
import getpass
|
||||||
|
import requests
|
||||||
|
from urllib import request
|
||||||
|
from tqdm import tqdm
|
||||||
|
from omegaconf import OmegaConf
|
||||||
|
from pathlib import Path
|
||||||
|
from transformers import CLIPTokenizer, CLIPTextModel
|
||||||
|
from transformers import BertTokenizerFast, AutoFeatureExtractor
|
||||||
|
from huggingface_hub import hf_hub_download, HfFolder, hf_hub_url
|
||||||
|
|
||||||
transformers.logging.set_verbosity_error()
|
transformers.logging.set_verbosity_error()
|
||||||
|
|
||||||
|
#--------------------------globals--
|
||||||
|
Model_dir = './models/ldm/stable-diffusion-v1/'
|
||||||
|
Config_file = './configs/models.yaml'
|
||||||
|
SD_Configs = './configs/stable-diffusion'
|
||||||
|
Datasets = {
|
||||||
|
'stable-diffusion-1.5': {
|
||||||
|
'description': 'The newest Stable Diffusion version 1.5 weight file (4.27 GB)',
|
||||||
|
'repo_id': 'runwayml/stable-diffusion-v1-5',
|
||||||
|
'config': 'v1-inference.yaml',
|
||||||
|
'file': 'v1-5-pruned-emaonly.ckpt',
|
||||||
|
'recommended': True,
|
||||||
|
'width': 512,
|
||||||
|
'height': 512,
|
||||||
|
},
|
||||||
|
'inpainting-1.5': {
|
||||||
|
'description': 'RunwayML SD 1.5 model optimized for inpainting (4.27 GB)',
|
||||||
|
'repo_id': 'runwayml/stable-diffusion-inpainting',
|
||||||
|
'config': 'v1-inpainting-inference.yaml',
|
||||||
|
'file': 'sd-v1-5-inpainting.ckpt',
|
||||||
|
'recommended': True,
|
||||||
|
'width': 512,
|
||||||
|
'height': 512,
|
||||||
|
},
|
||||||
|
'stable-diffusion-1.4': {
|
||||||
|
'description': 'The original Stable Diffusion version 1.4 weight file (4.27 GB)',
|
||||||
|
'repo_id': 'CompVis/stable-diffusion-v-1-4-original',
|
||||||
|
'config': 'v1-inference.yaml',
|
||||||
|
'file': 'sd-v1-4.ckpt',
|
||||||
|
'recommended': False,
|
||||||
|
'width': 512,
|
||||||
|
'height': 512,
|
||||||
|
},
|
||||||
|
'waifu-diffusion-1.3': {
|
||||||
|
'description': 'Stable Diffusion 1.4 fine tuned on anime-styled images (4.27)',
|
||||||
|
'repo_id': 'hakurei/waifu-diffusion-v1-3',
|
||||||
|
'config': 'v1-inference.yaml',
|
||||||
|
'file': 'model-epoch09-float32.ckpt',
|
||||||
|
'recommended': False,
|
||||||
|
'width': 512,
|
||||||
|
'height': 512,
|
||||||
|
},
|
||||||
|
'ft-mse-improved-autoencoder-840000': {
|
||||||
|
'description': 'StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB)',
|
||||||
|
'repo_id': 'stabilityai/sd-vae-ft-mse-original',
|
||||||
|
'config': 'VAE',
|
||||||
|
'file': 'vae-ft-mse-840000-ema-pruned.ckpt',
|
||||||
|
'recommended': True,
|
||||||
|
'width': 512,
|
||||||
|
'height': 512,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
Config_preamble = '''# This file describes the alternative machine learning models
|
||||||
|
# available to InvokeAI script.
|
||||||
|
#
|
||||||
|
# To add a new model, follow the examples below. Each
|
||||||
|
# model requires a model config file, a weights file,
|
||||||
|
# and the width and height of the images it
|
||||||
|
# was trained on.
|
||||||
|
'''
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def introduction():
|
||||||
|
print(
|
||||||
|
'''Welcome to InvokeAI. This script will help download the Stable Diffusion weight files
|
||||||
|
and other large models that are needed for text to image generation. At any point you may interrupt
|
||||||
|
this program and resume later.\n'''
|
||||||
|
)
|
||||||
|
|
||||||
|
#--------------------------------------------
|
||||||
|
def postscript():
|
||||||
|
print(
|
||||||
|
'''You're all set! You may now launch InvokeAI using one of these two commands:
|
||||||
|
Web version:
|
||||||
|
|
||||||
|
python scripts/invoke.py --web (connect to http://localhost:9090)
|
||||||
|
|
||||||
|
Command-line version:
|
||||||
|
|
||||||
|
python scripts/invoke.py
|
||||||
|
|
||||||
|
Have fun!
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def yes_or_no(prompt:str, default_yes=True):
|
||||||
|
default = "y" if default_yes else 'n'
|
||||||
|
response = input(f'{prompt} [{default}] ') or default
|
||||||
|
if default_yes:
|
||||||
|
return response[0] not in ('n','N')
|
||||||
|
else:
|
||||||
|
return response[0] in ('y','Y')
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def user_wants_to_download_weights()->str:
|
||||||
|
'''
|
||||||
|
Returns one of "skip", "recommended" or "customized"
|
||||||
|
'''
|
||||||
|
print('''You can download and configure the weights files manually or let this
|
||||||
|
script do it for you. Manual installation is described at:
|
||||||
|
|
||||||
|
https://github.com/invoke-ai/InvokeAI/blob/main/docs/installation/INSTALLING_MODELS.md
|
||||||
|
|
||||||
|
You may download the recommended models (about 10GB total), select a customized set, or
|
||||||
|
completely skip this step.
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
selection = None
|
||||||
|
while selection is None:
|
||||||
|
choice = input('Download <r>ecommended models, <c>ustomize the list, or <s>kip this step? [r]: ')
|
||||||
|
if choice.startswith(('r','R')) or len(choice)==0:
|
||||||
|
selection = 'recommended'
|
||||||
|
elif choice.startswith(('c','C')):
|
||||||
|
selection = 'customized'
|
||||||
|
elif choice.startswith(('s','S')):
|
||||||
|
selection = 'skip'
|
||||||
|
return selection
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def select_datasets(action:str):
|
||||||
|
done = False
|
||||||
|
while not done:
|
||||||
|
datasets = dict()
|
||||||
|
dflt = None # the first model selected will be the default; TODO let user change
|
||||||
|
counter = 1
|
||||||
|
|
||||||
|
if action == 'customized':
|
||||||
|
print('''
|
||||||
|
Choose the weight file(s) you wish to download. Before downloading you
|
||||||
|
will be given the option to view and change your selections.
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
for ds in Datasets.keys():
|
||||||
|
recommended = '(recommended)' if Datasets[ds]['recommended'] else ''
|
||||||
|
print(f'[{counter}] {ds}:\n {Datasets[ds]["description"]} {recommended}')
|
||||||
|
if yes_or_no(' Download?',default_yes=Datasets[ds]['recommended']):
|
||||||
|
datasets[ds]=counter
|
||||||
|
counter += 1
|
||||||
|
else:
|
||||||
|
for ds in Datasets.keys():
|
||||||
|
if Datasets[ds]['recommended']:
|
||||||
|
datasets[ds]=counter
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
print('The following weight files will be downloaded:')
|
||||||
|
for ds in datasets:
|
||||||
|
dflt = '*' if dflt is None else ''
|
||||||
|
print(f' [{datasets[ds]}] {ds}{dflt}')
|
||||||
|
print("*default")
|
||||||
|
ok_to_download = yes_or_no('Ok to download?')
|
||||||
|
if not ok_to_download:
|
||||||
|
if yes_or_no('Change your selection?'):
|
||||||
|
action = 'customized'
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
done = True
|
||||||
|
else:
|
||||||
|
done = True
|
||||||
|
return datasets if ok_to_download else None
|
||||||
|
|
||||||
|
|
||||||
|
#-------------------------------Authenticate against Hugging Face
|
||||||
|
def authenticate():
|
||||||
|
print('''
|
||||||
|
To download the Stable Diffusion weight files from the official Hugging Face
|
||||||
|
repository, you need to read and accept the CreativeML Responsible AI license.
|
||||||
|
|
||||||
|
This involves a few easy steps.
|
||||||
|
|
||||||
|
1. If you have not already done so, create an account on Hugging Face's web site
|
||||||
|
using the "Sign Up" button:
|
||||||
|
|
||||||
|
https://huggingface.co/join
|
||||||
|
|
||||||
|
You will need to verify your email address as part of the HuggingFace
|
||||||
|
registration process.
|
||||||
|
|
||||||
|
2. Log into your Hugging Face account:
|
||||||
|
|
||||||
|
https://huggingface.co/login
|
||||||
|
|
||||||
|
3. Accept the license terms located here:
|
||||||
|
|
||||||
|
https://huggingface.co/runwayml/stable-diffusion-v1-5
|
||||||
|
|
||||||
|
and here:
|
||||||
|
|
||||||
|
https://huggingface.co/runwayml/stable-diffusion-inpainting
|
||||||
|
|
||||||
|
(Yes, you have to accept two slightly different license agreements)
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
input('Press <enter> when you are ready to continue:')
|
||||||
|
access_token = HfFolder.get_token()
|
||||||
|
if access_token is None:
|
||||||
|
print('''
|
||||||
|
4. Thank you! The last step is to enter your HuggingFace access token so that
|
||||||
|
this script is authorized to initiate the download. Go to the access tokens
|
||||||
|
page of your Hugging Face account and create a token by clicking the
|
||||||
|
"New token" button:
|
||||||
|
|
||||||
|
https://huggingface.co/settings/tokens
|
||||||
|
|
||||||
|
(You can enter anything you like in the token creation field marked "Name".
|
||||||
|
"Role" should be "read").
|
||||||
|
|
||||||
|
Now copy the token to your clipboard and paste it here: '''
|
||||||
|
)
|
||||||
|
access_token = getpass.getpass()
|
||||||
|
HfFolder.save_token(access_token)
|
||||||
|
return access_token
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
# look for legacy model.ckpt in models directory and offer to
|
||||||
|
# normalize its name
|
||||||
|
def migrate_models_ckpt():
|
||||||
|
if not os.path.exists(os.path.join(Model_dir,'model.ckpt')):
|
||||||
|
return
|
||||||
|
new_name = Datasets['stable-diffusion-1.4']['file']
|
||||||
|
print('You seem to have the Stable Diffusion v4.1 "model.ckpt" already installed.')
|
||||||
|
rename = yes_or_no(f'Ok to rename it to "{new_name}" for future reference?')
|
||||||
|
if rename:
|
||||||
|
print(f'model.ckpt => {new_name}')
|
||||||
|
os.rename(os.path.join(Model_dir,'model.ckpt'),os.path.join(Model_dir,new_name))
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def download_weight_datasets(models:dict, access_token:str):
|
||||||
|
migrate_models_ckpt()
|
||||||
|
successful = dict()
|
||||||
|
for mod in models.keys():
|
||||||
|
repo_id = Datasets[mod]['repo_id']
|
||||||
|
filename = Datasets[mod]['file']
|
||||||
|
success = download_with_resume(
|
||||||
|
repo_id=repo_id,
|
||||||
|
model_name=filename,
|
||||||
|
access_token=access_token
|
||||||
|
)
|
||||||
|
if success:
|
||||||
|
successful[mod] = True
|
||||||
|
keys = ', '.join(successful.keys())
|
||||||
|
print(f'Successfully installed {keys}')
|
||||||
|
return successful
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def download_with_resume(repo_id:str, model_name:str, access_token:str)->bool:
|
||||||
|
|
||||||
|
model_dest = os.path.join(Model_dir, model_name)
|
||||||
|
os.makedirs(os.path.dirname(model_dest), exist_ok=True)
|
||||||
|
url = hf_hub_url(repo_id, model_name)
|
||||||
|
|
||||||
|
header = {"Authorization": f'Bearer {access_token}'}
|
||||||
|
open_mode = 'wb'
|
||||||
|
exist_size = 0
|
||||||
|
|
||||||
|
if os.path.exists(model_dest):
|
||||||
|
exist_size = os.path.getsize(model_dest)
|
||||||
|
header['Range'] = f'bytes={exist_size}-'
|
||||||
|
open_mode = 'ab'
|
||||||
|
|
||||||
|
resp = requests.get(url, headers=header, stream=True)
|
||||||
|
total = int(resp.headers.get('content-length', 0))
|
||||||
|
|
||||||
|
if resp.status_code==416: # "range not satisfiable", which means nothing to return
|
||||||
|
print(f'* {model_name}: complete file found. Skipping.')
|
||||||
|
return True
|
||||||
|
elif exist_size > 0:
|
||||||
|
print(f'* {model_name}: partial file found. Resuming...')
|
||||||
|
else:
|
||||||
|
print(f'* {model_name}: Downloading...')
|
||||||
|
|
||||||
|
try:
|
||||||
|
if total < 2000:
|
||||||
|
print(f'* {model_name}: {resp.text}')
|
||||||
|
return False
|
||||||
|
|
||||||
|
with open(model_dest, open_mode) as file, tqdm(
|
||||||
|
desc=model_name,
|
||||||
|
initial=exist_size,
|
||||||
|
total=total+exist_size,
|
||||||
|
unit='iB',
|
||||||
|
unit_scale=True,
|
||||||
|
unit_divisor=1000,
|
||||||
|
) as bar:
|
||||||
|
for data in resp.iter_content(chunk_size=1024):
|
||||||
|
size = file.write(data)
|
||||||
|
bar.update(size)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'An error occurred while downloading {model_name}: {str(e)}')
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def update_config_file(successfully_downloaded:dict):
|
||||||
|
try:
|
||||||
|
yaml = new_config_file_contents(successfully_downloaded)
|
||||||
|
tmpfile = os.path.join(os.path.dirname(Config_file),'new_config.tmp')
|
||||||
|
with open(tmpfile, 'w') as outfile:
|
||||||
|
outfile.write(Config_preamble)
|
||||||
|
outfile.write(yaml)
|
||||||
|
os.rename(tmpfile,Config_file)
|
||||||
|
except Exception as e:
|
||||||
|
print(f'**Error creating config file {Config_file}: {str(e)} **')
|
||||||
|
return
|
||||||
|
print(f'Successfully created new configuration file {Config_file}')
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------------------
|
||||||
|
def new_config_file_contents(successfully_downloaded:dict)->str:
|
||||||
|
conf = OmegaConf.load(Config_file)
|
||||||
|
|
||||||
|
# find the VAE file, if there is one
|
||||||
|
vae = None
|
||||||
|
default_selected = False
|
||||||
|
|
||||||
|
for model in successfully_downloaded:
|
||||||
|
if Datasets[model]['config'] == 'VAE':
|
||||||
|
vae = Datasets[model]['file']
|
||||||
|
|
||||||
|
for model in successfully_downloaded:
|
||||||
|
if Datasets[model]['config'] == 'VAE': # skip VAE entries
|
||||||
|
continue
|
||||||
|
stanza = conf[model] if model in conf else { }
|
||||||
|
|
||||||
|
stanza['description'] = Datasets[model]['description']
|
||||||
|
stanza['weights'] = os.path.join(Model_dir,Datasets[model]['file'])
|
||||||
|
stanza['config'] =os.path.join(SD_Configs, Datasets[model]['config'])
|
||||||
|
stanza['width'] = Datasets[model]['width']
|
||||||
|
stanza['height'] = Datasets[model]['height']
|
||||||
|
stanza.pop('default',None) # this will be set later
|
||||||
|
if vae:
|
||||||
|
stanza['vae'] = os.path.join(Model_dir,vae)
|
||||||
|
# BUG - the first stanza is always the default. User should select.
|
||||||
|
if not default_selected:
|
||||||
|
stanza['default'] = True
|
||||||
|
default_selected = True
|
||||||
|
conf[model] = stanza
|
||||||
|
return OmegaConf.to_yaml(conf)
|
||||||
|
|
||||||
#---------------------------------------------
|
#---------------------------------------------
|
||||||
# this will preload the Bert tokenizer fles
|
# this will preload the Bert tokenizer fles
|
||||||
def download_bert():
|
def download_bert():
|
||||||
@ -66,7 +415,6 @@ def download_gfpgan():
|
|||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
|
|
||||||
print('Loading models from GFPGAN')
|
print('Loading models from GFPGAN')
|
||||||
import urllib.request
|
|
||||||
for model in (
|
for model in (
|
||||||
[
|
[
|
||||||
'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth',
|
'https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth',
|
||||||
@ -152,6 +500,31 @@ def download_safety_checker():
|
|||||||
|
|
||||||
#-------------------------------------
|
#-------------------------------------
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='InvokeAI model downloader')
|
||||||
|
parser.add_argument('--interactive',
|
||||||
|
dest='interactive',
|
||||||
|
action=argparse.BooleanOptionalAction,
|
||||||
|
default=True,
|
||||||
|
help='run in interactive mode (default)')
|
||||||
|
opt = parser.parse_args()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if opt.interactive:
|
||||||
|
introduction()
|
||||||
|
print('** WEIGHT SELECTION **')
|
||||||
|
choice = user_wants_to_download_weights()
|
||||||
|
if choice != 'skip':
|
||||||
|
models = select_datasets(choice)
|
||||||
|
if models is None:
|
||||||
|
if yes_or_no('Quit?',default_yes=False):
|
||||||
|
sys.exit(0)
|
||||||
|
print('** LICENSE AGREEMENT FOR WEIGHT FILES **')
|
||||||
|
access_token = authenticate()
|
||||||
|
print('\n** DOWNLOADING WEIGHTS **')
|
||||||
|
successfully_downloaded = download_weight_datasets(models, access_token)
|
||||||
|
update_config_file(successfully_downloaded)
|
||||||
|
else:
|
||||||
|
print('\n** DOWNLOADING SUPPORT MODELS **')
|
||||||
download_bert()
|
download_bert()
|
||||||
download_kornia()
|
download_kornia()
|
||||||
download_clip()
|
download_clip()
|
||||||
@ -159,5 +532,11 @@ if __name__ == '__main__':
|
|||||||
download_codeformer()
|
download_codeformer()
|
||||||
download_clipseg()
|
download_clipseg()
|
||||||
download_safety_checker()
|
download_safety_checker()
|
||||||
|
postscript()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('\nGoodbye! Come back soon.')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'\nA problem occurred during download.\nThe error was: "{str(e)}"')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user