mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Support conversion of controlnets from safetensors to diffusers format (#4980)
## What type of PR is this? (check all applicable) - [ ] Refactor - [X] Feature - [ ] Bug Fix - [ ] Optimization - [ ] Documentation Update - [ ] Community Node Submission ## Have you discussed this change with the InvokeAI team? - [X] Yes - [ ] No, because: ## Have you updated all relevant documentation? - [X] Yes - [ ] No ## Description This PR allows users to install checkpoint (safetensors) versions of controlnet models. The models will be converted into diffusers format and cached on the fly. This only works for sd-1 and sd-2 controlnets, as I was unable to find controlnet sdxl checkpoint models or their corresponding .yaml config files. After updating, please run `invokeai-configure --yes --default-only` to install the missing config files. Users should be instructed to select option [7] from the launcher "Re-run the configure script to fix a broken install or to complete a major upgrade". ## Related Tickets & Documents User request at https://discord.com/channels/1020123559063990373/1160318627631870092/1160318627631870092 <!-- For pull requests that relate or close an issue, please include them below. For example having the text: "closes #1234" would connect the current pull request to issue 1234. And when we merge the pull request, Github will automatically close the issue. --> - Related Issue #4743 - Closes # ## QA Instructions, Screenshots, Recordings <!-- Please provide steps on how to test changes, any hardware or software specifications as well as any other pertinent information. --> See above for instructions on updating the config files after checking out the PR.
This commit is contained in:
commit
c04099a869
@ -460,6 +460,12 @@ class ModelInstall(object):
|
|||||||
possible_conf = path.with_suffix(".yaml")
|
possible_conf = path.with_suffix(".yaml")
|
||||||
if possible_conf.exists():
|
if possible_conf.exists():
|
||||||
legacy_conf = str(self.relative_to_root(possible_conf))
|
legacy_conf = str(self.relative_to_root(possible_conf))
|
||||||
|
else:
|
||||||
|
legacy_conf = Path(
|
||||||
|
self.config.root_path,
|
||||||
|
"configs/controlnet",
|
||||||
|
("cldm_v15.yaml" if info.base_type == BaseModelType("sd-1") else "cldm_v21.yaml"),
|
||||||
|
)
|
||||||
|
|
||||||
if legacy_conf:
|
if legacy_conf:
|
||||||
attributes.update(dict(config=str(legacy_conf)))
|
attributes.update(dict(config=str(legacy_conf)))
|
||||||
|
@ -132,13 +132,14 @@ def _convert_controlnet_ckpt_and_cache(
|
|||||||
model_path: str,
|
model_path: str,
|
||||||
output_path: str,
|
output_path: str,
|
||||||
base_model: BaseModelType,
|
base_model: BaseModelType,
|
||||||
model_config: ControlNetModel.CheckpointConfig,
|
model_config: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Convert the controlnet from checkpoint format to diffusers format,
|
Convert the controlnet from checkpoint format to diffusers format,
|
||||||
cache it to disk, and return Path to converted
|
cache it to disk, and return Path to converted
|
||||||
file. If already on disk then just returns Path.
|
file. If already on disk then just returns Path.
|
||||||
"""
|
"""
|
||||||
|
print(f"DEBUG: controlnet config = {model_config}")
|
||||||
app_config = InvokeAIAppConfig.get_config()
|
app_config = InvokeAIAppConfig.get_config()
|
||||||
weights = app_config.root_path / model_path
|
weights = app_config.root_path / model_path
|
||||||
output_path = Path(output_path)
|
output_path = Path(output_path)
|
||||||
|
79
invokeai/configs/controlnet/cldm_v15.yaml
Normal file
79
invokeai/configs/controlnet/cldm_v15.yaml
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
model:
|
||||||
|
target: cldm.cldm.ControlLDM
|
||||||
|
params:
|
||||||
|
linear_start: 0.00085
|
||||||
|
linear_end: 0.0120
|
||||||
|
num_timesteps_cond: 1
|
||||||
|
log_every_t: 200
|
||||||
|
timesteps: 1000
|
||||||
|
first_stage_key: "jpg"
|
||||||
|
cond_stage_key: "txt"
|
||||||
|
control_key: "hint"
|
||||||
|
image_size: 64
|
||||||
|
channels: 4
|
||||||
|
cond_stage_trainable: false
|
||||||
|
conditioning_key: crossattn
|
||||||
|
monitor: val/loss_simple_ema
|
||||||
|
scale_factor: 0.18215
|
||||||
|
use_ema: False
|
||||||
|
only_mid_control: False
|
||||||
|
|
||||||
|
control_stage_config:
|
||||||
|
target: cldm.cldm.ControlNet
|
||||||
|
params:
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
hint_channels: 3
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_heads: 8
|
||||||
|
use_spatial_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 768
|
||||||
|
use_checkpoint: True
|
||||||
|
legacy: False
|
||||||
|
|
||||||
|
unet_config:
|
||||||
|
target: cldm.cldm.ControlledUnetModel
|
||||||
|
params:
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
out_channels: 4
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_heads: 8
|
||||||
|
use_spatial_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 768
|
||||||
|
use_checkpoint: True
|
||||||
|
legacy: False
|
||||||
|
|
||||||
|
first_stage_config:
|
||||||
|
target: ldm.models.autoencoder.AutoencoderKL
|
||||||
|
params:
|
||||||
|
embed_dim: 4
|
||||||
|
monitor: val/rec_loss
|
||||||
|
ddconfig:
|
||||||
|
double_z: true
|
||||||
|
z_channels: 4
|
||||||
|
resolution: 256
|
||||||
|
in_channels: 3
|
||||||
|
out_ch: 3
|
||||||
|
ch: 128
|
||||||
|
ch_mult:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
- 4
|
||||||
|
num_res_blocks: 2
|
||||||
|
attn_resolutions: []
|
||||||
|
dropout: 0.0
|
||||||
|
lossconfig:
|
||||||
|
target: torch.nn.Identity
|
||||||
|
|
||||||
|
cond_stage_config:
|
||||||
|
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
85
invokeai/configs/controlnet/cldm_v21.yaml
Normal file
85
invokeai/configs/controlnet/cldm_v21.yaml
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
model:
|
||||||
|
target: cldm.cldm.ControlLDM
|
||||||
|
params:
|
||||||
|
linear_start: 0.00085
|
||||||
|
linear_end: 0.0120
|
||||||
|
num_timesteps_cond: 1
|
||||||
|
log_every_t: 200
|
||||||
|
timesteps: 1000
|
||||||
|
first_stage_key: "jpg"
|
||||||
|
cond_stage_key: "txt"
|
||||||
|
control_key: "hint"
|
||||||
|
image_size: 64
|
||||||
|
channels: 4
|
||||||
|
cond_stage_trainable: false
|
||||||
|
conditioning_key: crossattn
|
||||||
|
monitor: val/loss_simple_ema
|
||||||
|
scale_factor: 0.18215
|
||||||
|
use_ema: False
|
||||||
|
only_mid_control: False
|
||||||
|
|
||||||
|
control_stage_config:
|
||||||
|
target: cldm.cldm.ControlNet
|
||||||
|
params:
|
||||||
|
use_checkpoint: True
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
hint_channels: 3
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_head_channels: 64 # need to fix for flash-attn
|
||||||
|
use_spatial_transformer: True
|
||||||
|
use_linear_in_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 1024
|
||||||
|
legacy: False
|
||||||
|
|
||||||
|
unet_config:
|
||||||
|
target: cldm.cldm.ControlledUnetModel
|
||||||
|
params:
|
||||||
|
use_checkpoint: True
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
out_channels: 4
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_head_channels: 64 # need to fix for flash-attn
|
||||||
|
use_spatial_transformer: True
|
||||||
|
use_linear_in_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 1024
|
||||||
|
legacy: False
|
||||||
|
|
||||||
|
first_stage_config:
|
||||||
|
target: ldm.models.autoencoder.AutoencoderKL
|
||||||
|
params:
|
||||||
|
embed_dim: 4
|
||||||
|
monitor: val/rec_loss
|
||||||
|
ddconfig:
|
||||||
|
#attn_type: "vanilla-xformers"
|
||||||
|
double_z: true
|
||||||
|
z_channels: 4
|
||||||
|
resolution: 256
|
||||||
|
in_channels: 3
|
||||||
|
out_ch: 3
|
||||||
|
ch: 128
|
||||||
|
ch_mult:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
- 4
|
||||||
|
num_res_blocks: 2
|
||||||
|
attn_resolutions: []
|
||||||
|
dropout: 0.0
|
||||||
|
lossconfig:
|
||||||
|
target: torch.nn.Identity
|
||||||
|
|
||||||
|
cond_stage_config:
|
||||||
|
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
|
||||||
|
params:
|
||||||
|
freeze: True
|
||||||
|
layer: "penultimate"
|
Loading…
Reference in New Issue
Block a user