mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Merge branch 'lstein/installer-for-new-model-layout' of github.com:invoke-ai/InvokeAI into lstein/installer-for-new-model-layout
This commit is contained in:
commit
c91d1eacba
@ -30,7 +30,7 @@ from invokeai.app.services.config import InvokeAIAppConfig
|
|||||||
|
|
||||||
from .model_manager import ModelManager
|
from .model_manager import ModelManager
|
||||||
from .model_cache import ModelCache
|
from .model_cache import ModelCache
|
||||||
from .models import SchedulerPredictionType, BaseModelType, ModelVariantType
|
from .models import BaseModelType, ModelVariantType
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
@ -73,7 +73,9 @@ from transformers import (
|
|||||||
|
|
||||||
from ..stable_diffusion import StableDiffusionGeneratorPipeline
|
from ..stable_diffusion import StableDiffusionGeneratorPipeline
|
||||||
|
|
||||||
MODEL_ROOT = None
|
# TODO: redo in future
|
||||||
|
#CONVERT_MODEL_ROOT = InvokeAIAppConfig.get_config().models_path / "core" / "convert"
|
||||||
|
CONVERT_MODEL_ROOT = InvokeAIAppConfig.get_config().root_path / "models" / "core" / "convert"
|
||||||
|
|
||||||
def shave_segments(path, n_shave_prefix_segments=1):
|
def shave_segments(path, n_shave_prefix_segments=1):
|
||||||
"""
|
"""
|
||||||
@ -828,7 +830,7 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
|
|||||||
|
|
||||||
|
|
||||||
def convert_ldm_clip_checkpoint(checkpoint):
|
def convert_ldm_clip_checkpoint(checkpoint):
|
||||||
text_model = CLIPTextModel.from_pretrained(MODEL_ROOT / 'clip-vit-large-patch14')
|
text_model = CLIPTextModel.from_pretrained(CONVERT_MODEL_ROOT / 'clip-vit-large-patch14')
|
||||||
keys = list(checkpoint.keys())
|
keys = list(checkpoint.keys())
|
||||||
|
|
||||||
text_model_dict = {}
|
text_model_dict = {}
|
||||||
@ -882,7 +884,7 @@ textenc_pattern = re.compile("|".join(protected.keys()))
|
|||||||
|
|
||||||
def convert_open_clip_checkpoint(checkpoint):
|
def convert_open_clip_checkpoint(checkpoint):
|
||||||
text_model = CLIPTextModel.from_pretrained(
|
text_model = CLIPTextModel.from_pretrained(
|
||||||
MODEL_ROOT / 'stable-diffusion-2-clip',
|
CONVERT_MODEL_ROOT / 'stable-diffusion-2-clip',
|
||||||
subfolder='text_encoder',
|
subfolder='text_encoder',
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -979,8 +981,6 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
original_config_file: str,
|
original_config_file: str,
|
||||||
extract_ema: bool = True,
|
extract_ema: bool = True,
|
||||||
precision: torch.dtype = torch.float32,
|
precision: torch.dtype = torch.float32,
|
||||||
upcast_attention: bool = False,
|
|
||||||
prediction_type: SchedulerPredictionType = SchedulerPredictionType.Epsilon,
|
|
||||||
scan_needed: bool = True,
|
scan_needed: bool = True,
|
||||||
) -> StableDiffusionPipeline:
|
) -> StableDiffusionPipeline:
|
||||||
"""
|
"""
|
||||||
@ -994,8 +994,6 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
:param checkpoint_path: Path to `.ckpt` file.
|
:param checkpoint_path: Path to `.ckpt` file.
|
||||||
:param original_config_file: Path to `.yaml` config file corresponding to the original architecture.
|
:param original_config_file: Path to `.yaml` config file corresponding to the original architecture.
|
||||||
If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models.
|
If `None`, will be automatically inferred by looking for a key that only exists in SD2.0 models.
|
||||||
:param prediction_type: The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion
|
|
||||||
v1.X and Stable Diffusion v2 Base. Use `'v-prediction'` for Stable Diffusion v2.
|
|
||||||
:param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler",
|
:param scheduler_type: Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler",
|
||||||
"euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of
|
"euler-ancestral", "dpm", "ddim"]`. :param model_type: The pipeline type. `None` to automatically infer, or one of
|
||||||
`["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder"]`. :param extract_ema: Only relevant for
|
`["FrozenOpenCLIPEmbedder", "FrozenCLIPEmbedder"]`. :param extract_ema: Only relevant for
|
||||||
@ -1003,17 +1001,16 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
or not. Defaults to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher
|
or not. Defaults to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher
|
||||||
quality images for inference. Non-EMA weights are usually better to continue fine-tuning.
|
quality images for inference. Non-EMA weights are usually better to continue fine-tuning.
|
||||||
:param precision: precision to use - torch.float16, torch.float32 or torch.autocast
|
:param precision: precision to use - torch.float16, torch.float32 or torch.autocast
|
||||||
:param upcast_attention: Whether the attention computation should always be upcasted. This is necessary when
|
|
||||||
running stable diffusion 2.1.
|
|
||||||
"""
|
"""
|
||||||
config = InvokeAIAppConfig.get_config()
|
if not isinstance(checkpoint_path, Path):
|
||||||
|
checkpoint_path = Path(checkpoint_path)
|
||||||
|
|
||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
warnings.simplefilter("ignore")
|
warnings.simplefilter("ignore")
|
||||||
verbosity = dlogging.get_verbosity()
|
verbosity = dlogging.get_verbosity()
|
||||||
dlogging.set_verbosity_error()
|
dlogging.set_verbosity_error()
|
||||||
|
|
||||||
if str(checkpoint_path).endswith(".safetensors"):
|
if checkpoint_path.suffix == ".safetensors":
|
||||||
checkpoint = load_file(checkpoint_path)
|
checkpoint = load_file(checkpoint_path)
|
||||||
else:
|
else:
|
||||||
if scan_needed:
|
if scan_needed:
|
||||||
@ -1026,9 +1023,13 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
|
|
||||||
original_config = OmegaConf.load(original_config_file)
|
original_config = OmegaConf.load(original_config_file)
|
||||||
|
|
||||||
if model_version == BaseModelType.StableDiffusion2 and prediction_type == SchedulerPredictionType.VPrediction:
|
if model_version == BaseModelType.StableDiffusion2 and original_config["model"]["params"]["parameterization"] == "v":
|
||||||
|
prediction_type = "v_prediction"
|
||||||
|
upcast_attention = True
|
||||||
image_size = 768
|
image_size = 768
|
||||||
else:
|
else:
|
||||||
|
prediction_type = "epsilon"
|
||||||
|
upcast_attention = False
|
||||||
image_size = 512
|
image_size = 512
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -1083,7 +1084,7 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
if model_type == "FrozenOpenCLIPEmbedder":
|
if model_type == "FrozenOpenCLIPEmbedder":
|
||||||
text_model = convert_open_clip_checkpoint(checkpoint)
|
text_model = convert_open_clip_checkpoint(checkpoint)
|
||||||
tokenizer = CLIPTokenizer.from_pretrained(
|
tokenizer = CLIPTokenizer.from_pretrained(
|
||||||
MODEL_ROOT / 'stable-diffusion-2-clip',
|
CONVERT_MODEL_ROOT / 'stable-diffusion-2-clip',
|
||||||
subfolder='tokenizer',
|
subfolder='tokenizer',
|
||||||
)
|
)
|
||||||
pipe = StableDiffusionPipeline(
|
pipe = StableDiffusionPipeline(
|
||||||
@ -1099,9 +1100,9 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
|
|
||||||
elif model_type in ["FrozenCLIPEmbedder", "WeightedFrozenCLIPEmbedder"]:
|
elif model_type in ["FrozenCLIPEmbedder", "WeightedFrozenCLIPEmbedder"]:
|
||||||
text_model = convert_ldm_clip_checkpoint(checkpoint)
|
text_model = convert_ldm_clip_checkpoint(checkpoint)
|
||||||
tokenizer = CLIPTokenizer.from_pretrained(MODEL_ROOT / 'clip-vit-large-patch14')
|
tokenizer = CLIPTokenizer.from_pretrained(CONVERT_MODEL_ROOT / 'clip-vit-large-patch14')
|
||||||
safety_checker = StableDiffusionSafetyChecker.from_pretrained(MODEL_ROOT / 'stable-diffusion-safety-checker')
|
safety_checker = StableDiffusionSafetyChecker.from_pretrained(CONVERT_MODEL_ROOT / 'stable-diffusion-safety-checker')
|
||||||
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ROOT / 'stable-diffusion-safety-checker')
|
feature_extractor = AutoFeatureExtractor.from_pretrained(CONVERT_MODEL_ROOT / 'stable-diffusion-safety-checker')
|
||||||
pipe = StableDiffusionPipeline(
|
pipe = StableDiffusionPipeline(
|
||||||
vae=vae.to(precision),
|
vae=vae.to(precision),
|
||||||
text_encoder=text_model.to(precision),
|
text_encoder=text_model.to(precision),
|
||||||
@ -1115,7 +1116,7 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
else:
|
else:
|
||||||
text_config = create_ldm_bert_config(original_config)
|
text_config = create_ldm_bert_config(original_config)
|
||||||
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
|
text_model = convert_ldm_bert_checkpoint(checkpoint, text_config)
|
||||||
tokenizer = BertTokenizerFast.from_pretrained(MODEL_ROOT / "bert-base-uncased")
|
tokenizer = BertTokenizerFast.from_pretrained(CONVERT_MODEL_ROOT / "bert-base-uncased")
|
||||||
pipe = LDMTextToImagePipeline(
|
pipe = LDMTextToImagePipeline(
|
||||||
vqvae=vae,
|
vqvae=vae,
|
||||||
bert=text_model,
|
bert=text_model,
|
||||||
@ -1131,7 +1132,6 @@ def load_pipeline_from_original_stable_diffusion_ckpt(
|
|||||||
def convert_ckpt_to_diffusers(
|
def convert_ckpt_to_diffusers(
|
||||||
checkpoint_path: Union[str, Path],
|
checkpoint_path: Union[str, Path],
|
||||||
dump_path: Union[str, Path],
|
dump_path: Union[str, Path],
|
||||||
model_root: Union[str, Path],
|
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -1139,9 +1139,6 @@ def convert_ckpt_to_diffusers(
|
|||||||
and in addition a path-like object indicating the location of the desired diffusers
|
and in addition a path-like object indicating the location of the desired diffusers
|
||||||
model to be written.
|
model to be written.
|
||||||
"""
|
"""
|
||||||
# setting global here to avoid massive changes late at night
|
|
||||||
global MODEL_ROOT
|
|
||||||
MODEL_ROOT = Path(model_root) / 'core/convert'
|
|
||||||
pipe = load_pipeline_from_original_stable_diffusion_ckpt(checkpoint_path, **kwargs)
|
pipe = load_pipeline_from_original_stable_diffusion_ckpt(checkpoint_path, **kwargs)
|
||||||
|
|
||||||
pipe.save_pretrained(
|
pipe.save_pretrained(
|
||||||
|
@ -34,7 +34,7 @@ class StableDiffusion1Model(DiffusersModel):
|
|||||||
class CheckpointConfig(ModelConfigBase):
|
class CheckpointConfig(ModelConfigBase):
|
||||||
model_format: Literal[StableDiffusion1ModelFormat.Checkpoint]
|
model_format: Literal[StableDiffusion1ModelFormat.Checkpoint]
|
||||||
vae: Optional[str] = Field(None)
|
vae: Optional[str] = Field(None)
|
||||||
config: Optional[str] = Field(None)
|
config: str
|
||||||
variant: ModelVariantType
|
variant: ModelVariantType
|
||||||
|
|
||||||
|
|
||||||
@ -81,6 +81,8 @@ class StableDiffusion1Model(DiffusersModel):
|
|||||||
else:
|
else:
|
||||||
raise Exception("Unkown stable diffusion 1.* model format")
|
raise Exception("Unkown stable diffusion 1.* model format")
|
||||||
|
|
||||||
|
if ckpt_config_path is None:
|
||||||
|
ckpt_config_path = _select_ckpt_config(BaseModelType.StableDiffusion1, variant)
|
||||||
|
|
||||||
return cls.create_config(
|
return cls.create_config(
|
||||||
path=path,
|
path=path,
|
||||||
@ -116,7 +118,7 @@ class StableDiffusion1Model(DiffusersModel):
|
|||||||
version=BaseModelType.StableDiffusion1,
|
version=BaseModelType.StableDiffusion1,
|
||||||
model_config=config,
|
model_config=config,
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
) # TODO: args
|
)
|
||||||
else:
|
else:
|
||||||
return model_path
|
return model_path
|
||||||
|
|
||||||
@ -183,13 +185,8 @@ class StableDiffusion2Model(DiffusersModel):
|
|||||||
else:
|
else:
|
||||||
raise Exception("Unkown stable diffusion 2.* model format")
|
raise Exception("Unkown stable diffusion 2.* model format")
|
||||||
|
|
||||||
if variant == ModelVariantType.Normal:
|
if ckpt_config_path is None:
|
||||||
prediction_type = SchedulerPredictionType.VPrediction
|
ckpt_config_path = _select_ckpt_config(BaseModelType.StableDiffusion2, variant)
|
||||||
upcast_attention = True
|
|
||||||
|
|
||||||
else:
|
|
||||||
prediction_type = SchedulerPredictionType.Epsilon
|
|
||||||
upcast_attention = False
|
|
||||||
|
|
||||||
return cls.create_config(
|
return cls.create_config(
|
||||||
path=path,
|
path=path,
|
||||||
@ -197,8 +194,6 @@ class StableDiffusion2Model(DiffusersModel):
|
|||||||
|
|
||||||
config=ckpt_config_path,
|
config=ckpt_config_path,
|
||||||
variant=variant,
|
variant=variant,
|
||||||
prediction_type=prediction_type,
|
|
||||||
upcast_attention=upcast_attention,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@classproperty
|
@classproperty
|
||||||
@ -227,7 +222,7 @@ class StableDiffusion2Model(DiffusersModel):
|
|||||||
version=BaseModelType.StableDiffusion2,
|
version=BaseModelType.StableDiffusion2,
|
||||||
model_config=config,
|
model_config=config,
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
) # TODO: args
|
)
|
||||||
else:
|
else:
|
||||||
return model_path
|
return model_path
|
||||||
|
|
||||||
@ -238,18 +233,18 @@ def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
|
|||||||
ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
|
ModelVariantType.Inpaint: "v1-inpainting-inference.yaml",
|
||||||
},
|
},
|
||||||
BaseModelType.StableDiffusion2: {
|
BaseModelType.StableDiffusion2: {
|
||||||
# code further will manually set upcast_attention and v_prediction
|
ModelVariantType.Normal: "v2-inference-v.yaml", # best guess, as we can't differentiate with base(512)
|
||||||
ModelVariantType.Normal: "v2-inference.yaml",
|
|
||||||
ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
|
ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
|
||||||
ModelVariantType.Depth: "v2-midas-inference.yaml",
|
ModelVariantType.Depth: "v2-midas-inference.yaml",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
app_config = InvokeAIAppConfig.get_config()
|
||||||
try:
|
try:
|
||||||
# TODO: path
|
config_path = app_config.legacy_conf_path / ckpt_configs[version][variant]
|
||||||
#model_config.config = app_config.config_dir / "stable-diffusion" / ckpt_configs[version][model_config.variant]
|
if config_path.is_relative_to(app_config.root_path):
|
||||||
#return InvokeAIAppConfig.get_config().legacy_conf_dir / ckpt_configs[version][variant]
|
config_path = config_path.relative_to(app_config.root_path)
|
||||||
return InvokeAIAppConfig.get_config().root_dir / "configs" / "stable-diffusion" / ckpt_configs[version][variant]
|
return str(config_path)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
@ -268,36 +263,14 @@ def _convert_ckpt_and_cache(
|
|||||||
"""
|
"""
|
||||||
app_config = InvokeAIAppConfig.get_config()
|
app_config = InvokeAIAppConfig.get_config()
|
||||||
|
|
||||||
if model_config.config is None:
|
|
||||||
model_config.config = _select_ckpt_config(version, model_config.variant)
|
|
||||||
if model_config.config is None:
|
|
||||||
raise Exception(f"Model variant {model_config.variant} not supported for {version}")
|
|
||||||
|
|
||||||
|
|
||||||
weights = app_config.root_dir / model_config.path
|
weights = app_config.root_dir / model_config.path
|
||||||
config_file = app_config.root_dir / model_config.config
|
config_file = app_config.root_dir / model_config.config
|
||||||
output_path = Path(output_path)
|
output_path = Path(output_path)
|
||||||
|
|
||||||
if version == BaseModelType.StableDiffusion1:
|
|
||||||
upcast_attention = False
|
|
||||||
prediction_type = SchedulerPredictionType.Epsilon
|
|
||||||
|
|
||||||
elif version == BaseModelType.StableDiffusion2:
|
|
||||||
upcast_attention = model_config.upcast_attention
|
|
||||||
prediction_type = model_config.prediction_type
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise Exception(f"Unknown model provided: {version}")
|
|
||||||
|
|
||||||
|
|
||||||
# return cached version if it exists
|
# return cached version if it exists
|
||||||
if output_path.exists():
|
if output_path.exists():
|
||||||
return output_path
|
return output_path
|
||||||
|
|
||||||
# TODO: I think that it more correctly to convert with embedded vae
|
|
||||||
# as if user will delete custom vae he will got not embedded but also custom vae
|
|
||||||
#vae_ckpt_path, vae_model = self._get_vae_for_conversion(weights, mconfig)
|
|
||||||
|
|
||||||
# to avoid circular import errors
|
# to avoid circular import errors
|
||||||
from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
|
from ..convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
|
||||||
with SilenceWarnings():
|
with SilenceWarnings():
|
||||||
@ -308,9 +281,6 @@ def _convert_ckpt_and_cache(
|
|||||||
model_variant=model_config.variant,
|
model_variant=model_config.variant,
|
||||||
original_config_file=config_file,
|
original_config_file=config_file,
|
||||||
extract_ema=True,
|
extract_ema=True,
|
||||||
upcast_attention=upcast_attention,
|
|
||||||
prediction_type=prediction_type,
|
|
||||||
scan_needed=True,
|
scan_needed=True,
|
||||||
model_root=app_config.models_path,
|
|
||||||
)
|
)
|
||||||
return output_path
|
return output_path
|
||||||
|
Loading…
Reference in New Issue
Block a user