Support both v2-v and v2-e legacy ckpt models in v2.3 (#2907)

# Support SD version 2 "epsilon" and "v-predict" inference
configurations in v2.3

This is a port of the `main` PR #2870 back into V2.3. It allows both
"epsilon" inference V2 models (e.g. "v2-base") and "v-predict" models
(e.g. "V2-768") to be imported and converted into correct diffusers
models. This depends on picking the right configuration file to use, and
since there is no intrinsic difference between the two types of models,
when we detect that a V2 model is being imported, we fall back to asking
the user to select the model type.
This commit is contained in:
blessedcoolant 2023-03-12 04:42:16 +13:00 committed by GitHub
commit b5bd5240b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 236 additions and 64 deletions

View File

@ -0,0 +1,67 @@
model:
base_learning_rate: 1.0e-4
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False # we set this to false because this is an inference only config
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
use_fp16: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>InvokeAI - A Stable Diffusion Toolkit</title>
<link rel="shortcut icon" type="icon" href="./assets/favicon-0d253ced.ico" />
<script type="module" crossorigin src="./assets/index-720872d1.js"></script>
<script type="module" crossorigin src="./assets/index-c09cf9ca.js"></script>
<link rel="stylesheet" href="./assets/index-14cb2922.css">
</head>

View File

@ -365,7 +365,8 @@
"convertToDiffusersHelpText6": "Do you wish to convert this model?",
"convertToDiffusersSaveLocation": "Save Location",
"v1": "v1",
"v2": "v2",
"v2_base": "v2 (512px)",
"v2_768": "v2 (768px)",
"inpainting": "v1 Inpainting",
"customConfig": "Custom Config",
"pathToCustomConfig": "Path To Custom Config",

View File

@ -365,7 +365,8 @@
"convertToDiffusersHelpText6": "Do you wish to convert this model?",
"convertToDiffusersSaveLocation": "Save Location",
"v1": "v1",
"v2": "v2",
"v2_base": "v2 (512px)",
"v2_768": "v2 (768px)",
"inpainting": "v1 Inpainting",
"customConfig": "Custom Config",
"pathToCustomConfig": "Path To Custom Config",

View File

@ -181,7 +181,8 @@ export default function SearchModels() {
const configFiles = {
v1: 'configs/stable-diffusion/v1-inference.yaml',
v2: 'configs/stable-diffusion/v2-inference-v.yaml',
v2_base: 'configs/stable-diffusion/v2-inference-v.yaml',
v2_768: 'configs/stable-diffusion/v2-inference-v.yaml',
inpainting: 'configs/stable-diffusion/v1-inpainting-inference.yaml',
custom: pathToConfig,
};
@ -385,7 +386,8 @@ export default function SearchModels() {
>
<Flex gap={4}>
<Radio value="v1">{t('modelManager.v1')}</Radio>
<Radio value="v2">{t('modelManager.v2')}</Radio>
<Radio value="v2_base">{t('modelManager.v2_base')}</Radio>
<Radio value="v2_768">{t('modelManager.v2_768')}</Radio>
<Radio value="inpainting">
{t('modelManager.inpainting')}
</Radio>

View File

@ -22,7 +22,7 @@ from ..generate import Generate
from .args import (Args, dream_cmd_from_png, metadata_dumps,
metadata_from_png)
from .generator.diffusers_pipeline import PipelineIntermediateState
from .globals import Globals
from .globals import Globals, global_config_dir
from .image_util import make_grid
from .log import write_log
from .model_manager import ModelManager
@ -33,7 +33,6 @@ from ..util import url_attachment_name
# global used in multiple functions (fix)
infile = None
def main():
"""Initialize command-line parsers and the diffusion model"""
global infile
@ -66,6 +65,9 @@ def main():
Globals.sequential_guidance = args.sequential_guidance
Globals.ckpt_convert = args.ckpt_convert
# run any post-install patches needed
run_patches()
print(f">> Internet connectivity is {Globals.internet_available}")
if not args.conf:
@ -662,7 +664,16 @@ def import_model(model_path: str, gen, opt, completer, convert=False):
)
if not imported_name:
print("** Import failed or was skipped")
if config_file := _pick_configuration_file(completer):
imported_name = gen.model_manager.heuristic_import(
model_path,
model_name=model_name,
description=model_desc,
convert=convert,
model_config_file=config_file,
)
if not imported_name:
print("** Aborting import.")
return
if not _verify_load(imported_name, gen):
@ -676,6 +687,48 @@ def import_model(model_path: str, gen, opt, completer, convert=False):
completer.update_models(gen.model_manager.list_models())
print(f">> {imported_name} successfully installed")
def _pick_configuration_file(completer)->Path:
print(
"""
Please select the type of this model:
[1] A Stable Diffusion v1.x ckpt/safetensors model
[2] A Stable Diffusion v1.x inpainting ckpt/safetensors model
[3] A Stable Diffusion v2.x base model (512 pixels)
[4] A Stable Diffusion v2.x v-predictive model (768 pixels)
[5] Other (you will be prompted to enter the config file path)
[Q] I have no idea! Skip the import.
""")
choices = [
global_config_dir() / 'stable-diffusion' / x
for x in [
'v1-inference.yaml',
'v1-inpainting-inference.yaml',
'v2-inference.yaml',
'v2-inference-v.yaml',
]
]
ok = False
while not ok:
try:
choice = input('select 0-5, Q > ').strip()
if choice.startswith(('q','Q')):
return
if choice == '5':
completer.complete_extensions(('.yaml'))
choice = Path(input('Select config file for this model> ').strip()).absolute()
completer.complete_extensions(None)
ok = choice.exists()
else:
choice = choices[int(choice)-1]
ok = True
except (ValueError, IndexError):
print(f'{choice} is not a valid choice')
except EOFError:
return
return choice
def _verify_load(model_name: str, gen) -> bool:
print(">> Verifying that new model loads...")
current_model = gen.model_name
@ -1237,6 +1290,20 @@ def check_internet() -> bool:
except:
return False
# This routine performs any patch-ups needed after installation
def run_patches():
# install ckpt configuration files that may have been added to the
# distro after original root directory configuration
import invokeai.configs as conf
from shutil import copyfile
root_configs = Path(global_config_dir(), 'stable-diffusion')
repo_configs = Path(conf.__path__[0], 'stable-diffusion')
for src in repo_configs.iterdir():
dest = root_configs / src.name
if not dest.exists():
copyfile(src,dest)
if __name__ == '__main__':
main()

View File

@ -862,12 +862,16 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
if original_config_file is None:
model_type = ModelManager.probe_model_type(checkpoint)
if model_type == SDLegacyType.V2:
if model_type == SDLegacyType.V2_v:
original_config_file = global_config_dir() / 'stable-diffusion' / 'v2-inference-v.yaml'
if global_step == 110000:
# v2.1 needs to upcast attention
upcast_attention = True
elif model_type == SDLegacyType.V2_e:
original_config_file = (
global_config_dir() / "stable-diffusion" / "v2-inference.yaml"
)
elif model_type == SDLegacyType.V1_INPAINT:
original_config_file = global_config_dir() / 'stable-diffusion' / 'v1-inpainting-inference.yaml'

View File

@ -47,6 +47,8 @@ class SDLegacyType(Enum):
V1 = 1
V1_INPAINT = 2
V2 = 3
V2_e = 4
V2_v = 5
UNKNOWN = 99
@ -724,15 +726,25 @@ class ModelManager(object):
format. Valid return values include:
SDLegacyType.V1
SDLegacyType.V1_INPAINT
SDLegacyType.V2
SDLegacyType.V2 (V2 prediction type unknown)
SDLegacyType.V2_e (V2 using 'epsilon' prediction type)
SDLegacyType.V2_v (V2 using 'v_prediction' prediction type)
SDLegacyType.UNKNOWN
"""
key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024:
return SDLegacyType.V2
global_step = checkpoint.get('global_step')
state_dict = checkpoint.get("state_dict") or checkpoint
try:
state_dict = checkpoint.get("state_dict") or checkpoint
key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
if global_step == 220000:
return SDLegacyType.V2_e
elif global_step == 110000:
return SDLegacyType.V2_v
else:
return SDLegacyType.V2
# otherwise we assume a V1 file
in_channels = state_dict[
"model.diffusion_model.input_blocks.0.0.weight"
].shape[1]
@ -746,12 +758,13 @@ class ModelManager(object):
return SDLegacyType.UNKNOWN
def heuristic_import(
self,
path_url_or_repo: str,
convert: bool = False,
model_name: str = None,
description: str = None,
commit_to_conf: Path = None,
self,
path_url_or_repo: str,
convert: bool = False,
model_name: str = None,
description: str = None,
model_config_file: Path = None,
commit_to_conf: Path = None,
) -> str:
"""
Accept a string which could be:
@ -849,7 +862,7 @@ class ModelManager(object):
if model_path.stem in self.config: # already imported
print(" | Already imported. Skipping")
return
return model_path.stem
# another round of heuristics to guess the correct config file.
checkpoint = (
@ -857,32 +870,49 @@ class ModelManager(object):
if model_path.suffix == ".safetensors"
else torch.load(model_path)
)
model_type = self.probe_model_type(checkpoint)
# additional probing needed if no config file provided
if model_config_file is None:
model_type = self.probe_model_type(checkpoint)
if model_type == SDLegacyType.V1:
print(" | SD-v1 model detected")
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v1-inference.yaml"
)
elif model_type == SDLegacyType.V1_INPAINT:
print(" | SD-v1 inpainting model detected")
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v1-inpainting-inference.yaml"
)
elif model_type == SDLegacyType.V2_v:
print(
" | SD-v2-v model detected"
)
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v2-inference-v.yaml"
)
elif model_type == SDLegacyType.V2_e:
print(
" | SD-v2-e model detected"
)
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v2-inference.yaml"
)
elif model_type == SDLegacyType.V2:
print(
f"** {thing} is a V2 checkpoint file, but its parameterization cannot be determined. Please provide configuration file path."
)
return
else:
print(
f"** {thing} is a legacy checkpoint file but not a known Stable Diffusion model. Please provide configuration file path."
)
return
model_config_file = None
if model_type == SDLegacyType.V1:
print(" | SD-v1 model detected")
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v1-inference.yaml"
)
elif model_type == SDLegacyType.V1_INPAINT:
print(" | SD-v1 inpainting model detected")
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v1-inpainting-inference.yaml"
)
elif model_type == SDLegacyType.V2:
print(
" | SD-v2 model detected; model will be converted to diffusers format"
)
model_config_file = Path(
Globals.root, "configs/stable-diffusion/v2-inference-v.yaml"
)
if model_config_file.name.startswith('v2'):
convert = True
else:
print(
f"** {thing} is a legacy checkpoint file but not in a known Stable Diffusion model. Skipping import"
" | This SD-v2 model will be converted to diffusers format for use"
)
return
if convert:
diffuser_path = Path(