mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
58e3562652
Fixed merging embeddings based on the changes made in textual inversion. Tested and working. Inverted their logic to prioritize Stable Diffusion implementation over alternatives, but left the option for alternatives to still be used.
109 lines
2.5 KiB
YAML
109 lines
2.5 KiB
YAML
model:
|
|
base_learning_rate: 5.0e-03
|
|
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
|
params:
|
|
linear_start: 0.00085
|
|
linear_end: 0.0120
|
|
num_timesteps_cond: 1
|
|
log_every_t: 200
|
|
timesteps: 1000
|
|
first_stage_key: image
|
|
cond_stage_key: caption
|
|
image_size: 64
|
|
channels: 4
|
|
cond_stage_trainable: true # Note: different from the one we trained before
|
|
conditioning_key: crossattn
|
|
monitor: val/loss_simple_ema
|
|
scale_factor: 0.18215
|
|
use_ema: False
|
|
embedding_reg_weight: 0.0
|
|
|
|
personalization_config:
|
|
target: ldm.modules.embedding_manager.EmbeddingManager
|
|
params:
|
|
placeholder_strings: ["*"]
|
|
initializer_words: ["sculpture"]
|
|
per_image_tokens: false
|
|
num_vectors_per_token: 1
|
|
progressive_words: False
|
|
|
|
unet_config:
|
|
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
|
params:
|
|
image_size: 32 # unused
|
|
in_channels: 4
|
|
out_channels: 4
|
|
model_channels: 320
|
|
attention_resolutions: [ 4, 2, 1 ]
|
|
num_res_blocks: 2
|
|
channel_mult: [ 1, 2, 4, 4 ]
|
|
num_heads: 8
|
|
use_spatial_transformer: True
|
|
transformer_depth: 1
|
|
context_dim: 768
|
|
use_checkpoint: True
|
|
legacy: False
|
|
|
|
first_stage_config:
|
|
target: ldm.models.autoencoder.AutoencoderKL
|
|
params:
|
|
embed_dim: 4
|
|
monitor: val/rec_loss
|
|
ddconfig:
|
|
double_z: true
|
|
z_channels: 4
|
|
resolution: 256
|
|
in_channels: 3
|
|
out_ch: 3
|
|
ch: 128
|
|
ch_mult:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 4
|
|
num_res_blocks: 2
|
|
attn_resolutions: []
|
|
dropout: 0.0
|
|
lossconfig:
|
|
target: torch.nn.Identity
|
|
|
|
cond_stage_config:
|
|
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
|
|
|
data:
|
|
target: main.DataModuleFromConfig
|
|
params:
|
|
batch_size: 1
|
|
num_workers: 2
|
|
wrap: false
|
|
train:
|
|
target: ldm.data.personalized.PersonalizedBase
|
|
params:
|
|
size: 512
|
|
set: train
|
|
per_image_tokens: false
|
|
repeats: 100
|
|
validation:
|
|
target: ldm.data.personalized.PersonalizedBase
|
|
params:
|
|
size: 512
|
|
set: val
|
|
per_image_tokens: false
|
|
repeats: 10
|
|
|
|
lightning:
|
|
modelcheckpoint:
|
|
params:
|
|
every_n_train_steps: 500
|
|
callbacks:
|
|
image_logger:
|
|
target: main.ImageLogger
|
|
params:
|
|
batch_frequency: 500
|
|
max_images: 8
|
|
increase_log_steps: False
|
|
|
|
trainer:
|
|
benchmark: True
|
|
max_steps: 4000
|
|
|