Remove core safetensors->diffusers conversion models

- No longer install core conversion models. Use the HuggingFace cache to load them if and when needed. - Call directly into the diffusers library to perform conversions with only shallow wrappers around them to massage arguments, etc. - At root configuration time, do not create all the possible model subdirectories, but let them be created and populated at model install time. - Remove checks for missing core conversion files, since they are no longer installed.
2024-08-30 20:32:17 +00:00 · 2024-03-17 19:13:18 -04:00
parent a0420d1442
commit 71a1740740
8 changed files with 69 additions and 1822 deletions
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@ -492,6 +492,8 @@ class ModelInstallService(ModelInstallServiceBase):
            for cur_base_model in BaseModelType:
                for cur_model_type in ModelType:
                    models_dir = self._app_config.models_path / Path(cur_base_model.value, cur_model_type.value)
                    if not models_dir.exists():
                        continue
                    installed.update(self.scan_directory(models_dir))
            self._logger.info(f"{len(installed)} new models registered; {len(defunct_models)} unregistered")
--- a/invokeai/backend/install/check_root.py
+++ b/invokeai/backend/install/check_root.py
@ -11,17 +11,6 @@ def check_invokeai_root(config: InvokeAIAppConfig):
    try:
        assert config.db_path.parent.exists(), f"{config.db_path.parent} not found"
        assert config.models_path.exists(), f"{config.models_path} not found"
        if not config.ignore_missing_core_models:
            for model in [
                "CLIP-ViT-bigG-14-laion2B-39B-b160k",
                "bert-base-uncased",
                "clip-vit-large-patch14",
                "sd-vae-ft-mse",
                "stable-diffusion-2-clip",
                "stable-diffusion-safety-checker",
            ]:
                path = config.models_path / f"core/convert/{model}"
                assert path.exists(), f"{path} is missing"
    except Exception as e:
        print()
        print(f"An exception has occurred: {str(e)}")
@ -32,10 +21,5 @@ def check_invokeai_root(config: InvokeAIAppConfig):
        print(
            '** From the command line, activate the virtual environment and run "invokeai-configure --yes --skip-sd-weights" **'
        )
        print(
            '** (To skip this check completely, add "--ignore_missing_core_models" to your CLI args. Not installing '
            "these core models will prevent the loading of some or all .safetensors and .ckpt files. However, you can "
            "always come back and install these core models in the future.)"
        )
        input("Press any key to continue...")
        sys.exit(0)
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@ -25,20 +25,20 @@ import npyscreen
 import psutil
 import torch
 import transformers
-from diffusers import AutoencoderKL, ModelMixin
+from diffusers import ModelMixin
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from huggingface_hub import HfFolder
 from huggingface_hub import login as hf_hub_login
 from omegaconf import DictConfig, OmegaConf
 from pydantic.error_wrappers import ValidationError
 from tqdm import tqdm
-from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
+from transformers import AutoFeatureExtractor
 import invokeai.configs as configs
 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.install.install_helper import InstallHelper, InstallSelections
 from invokeai.backend.install.legacy_arg_parsing import legacy_parser
-from invokeai.backend.model_manager import BaseModelType, ModelType
+from invokeai.backend.model_manager import ModelType
 from invokeai.backend.util import choose_precision, choose_torch_device
 from invokeai.backend.util.logging import InvokeAILogger
 from invokeai.frontend.install.model_install import addModelsForm
@ -210,51 +210,15 @@ def download_with_progress_bar(model_url: str, model_dest: str, label: str = "th
        print(traceback.format_exc(), file=sys.stderr)
-def download_conversion_models():
+def download_safety_checker():
    target_dir = config.models_path / "core/convert"
    kwargs = {}  # for future use
    try:
        logger.info("Downloading core tokenizers and text encoders")
        # bert
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=DeprecationWarning)
            bert = BertTokenizerFast.from_pretrained("bert-base-uncased", **kwargs)
            bert.save_pretrained(target_dir / "bert-base-uncased", safe_serialization=True)
        # sd-1
        repo_id = "openai/clip-vit-large-patch14"
        hf_download_from_pretrained(CLIPTokenizer, repo_id, target_dir / "clip-vit-large-patch14")
        hf_download_from_pretrained(CLIPTextModel, repo_id, target_dir / "clip-vit-large-patch14")
        # sd-2
        repo_id = "stabilityai/stable-diffusion-2"
        pipeline = CLIPTokenizer.from_pretrained(repo_id, subfolder="tokenizer", **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-2-clip" / "tokenizer", safe_serialization=True)
        pipeline = CLIPTextModel.from_pretrained(repo_id, subfolder="text_encoder", **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-2-clip" / "text_encoder", safe_serialization=True)
        # sd-xl - tokenizer_2
        repo_id = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
        _, model_name = repo_id.split("/")
        pipeline = CLIPTokenizer.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
        pipeline = CLIPTextConfig.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
        # VAE
        logger.info("Downloading stable diffusion VAE")
        vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", **kwargs)
        vae.save_pretrained(target_dir / "sd-vae-ft-mse", safe_serialization=True)
        # safety checking
        logger.info("Downloading safety checker")
        repo_id = "CompVis/stable-diffusion-safety-checker"
        pipeline = AutoFeatureExtractor.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-safety-checker", safe_serialization=True)
        pipeline = StableDiffusionSafetyChecker.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-safety-checker", safe_serialization=True)
    except KeyboardInterrupt:
@ -307,7 +271,7 @@ def download_lama():
 def download_support_models() -> None:
    download_realesrgan()
    download_lama()
-    download_conversion_models()
+    download_safety_checker()
 # -------------------------------------
@ -744,12 +708,7 @@ def initialize_rootdir(root: Path, yes_to_all: bool = False):
        shutil.copytree(configs_src, configs_dest, dirs_exist_ok=True)
    dest = root / "models"
-    for model_base in BaseModelType:
+    dest.mkdir(parents=True, exist_ok=True)
        for model_type in ModelType:
            path = dest / model_base.value / model_type.value
            path.mkdir(parents=True, exist_ok=True)
    path = dest / "core"
    path.mkdir(parents=True, exist_ok=True)
 # -------------------------------------
--- a/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
+++ b/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
--- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py
+++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py
@ -3,9 +3,6 @@
 from pathlib import Path
 import torch
 from safetensors.torch import load_file as safetensors_load_file
 from invokeai.backend.model_manager import (
    AnyModelConfig,
    BaseModelType,
@ -37,27 +34,25 @@ class ControlNetLoader(GenericDiffusersLoader):
            return True
    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
-        if config.base not in {BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2}:
+        assert isinstance(config, CheckpointConfigBase)
-            raise Exception(f"ControlNet conversion not supported for model type: {config.base}")
+        config_file = config.config_path
        else:
            assert isinstance(config, CheckpointConfigBase)
            config_file = config.config_path
-        if model_path.suffix == ".safetensors":
+        image_size = (
-            checkpoint = safetensors_load_file(model_path, device="cpu")
+            512
-        else:
+            if config.base == BaseModelType.StableDiffusion1
-            checkpoint = torch.load(model_path, map_location="cpu")
+            else 768
-
+            if config.base == BaseModelType.StableDiffusion2
-        # sometimes weights are hidden under "state_dict", and sometimes not
+            else 1024
        if "state_dict" in checkpoint:
            checkpoint = checkpoint["state_dict"]
        convert_controlnet_to_diffusers(
            model_path,
            output_path,
            original_config_file=self._app_config.root_path / config_file,
            image_size=512,
            scan_needed=True,
            from_safetensors=model_path.suffix == ".safetensors",
        )
        self._logger.info(f"Converting {model_path} to diffusers format")
        with open(self._app_config.root_path / config_file, "r") as config_stream:
            convert_controlnet_to_diffusers(
                model_path,
                output_path,
                original_config_file=config_stream,
                image_size=image_size,
                precision=self._torch_dtype,
                from_safetensors=model_path.suffix == ".safetensors",
            )
        return output_path
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@ -4,9 +4,6 @@
 from pathlib import Path
 from typing import Optional
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
 from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@ -14,7 +11,7 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelRepoVariant,
    ModelType,
-    ModelVariantType,
+    SchedulerPredictionType,
    SubModelType,
 )
 from invokeai.backend.model_manager.config import CheckpointConfigBase, MainCheckpointConfig
@ -68,27 +65,31 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
        assert isinstance(config, MainCheckpointConfig)
        variant = config.variant
        base = config.base
        pipeline_class = (
            StableDiffusionInpaintPipeline if variant == ModelVariantType.Inpaint else StableDiffusionPipeline
        )
        config_file = config.config_path
        prediction_type = config.prediction_type.value
        upcast_attention = config.upcast_attention
        image_size = (
            1024
            if base == BaseModelType.StableDiffusionXL
            else 768
            if config.prediction_type == SchedulerPredictionType.VPrediction and base == BaseModelType.StableDiffusion2
            else 512
        )
        self._logger.info(f"Converting {model_path} to diffusers format")
        convert_ckpt_to_diffusers(
            model_path,
            output_path,
            model_type=self.model_base_to_model_type[base],
            model_version=base,
            model_variant=variant,
            original_config_file=self._app_config.root_path / config_file,
            extract_ema=True,
            scan_needed=True,
            pipeline_class=pipeline_class,
            from_safetensors=model_path.suffix == ".safetensors",
            precision=self._torch_dtype,
            prediction_type=prediction_type,
            image_size=image_size,
            upcast_attention=upcast_attention,
            load_safety_checker=False,
        )
        return output_path
--- a/invokeai/backend/model_manager/load/model_loaders/vae.py
+++ b/invokeai/backend/model_manager/load/model_loaders/vae.py
@ -57,12 +57,12 @@ class VAELoader(GenericDiffusersLoader):
        ckpt_config = OmegaConf.load(self._app_config.root_path / config_file)
        assert isinstance(ckpt_config, DictConfig)
-
+        self._logger.info(f"Converting {model_path} to diffusers format")
        vae_model = convert_ldm_vae_to_diffusers(
            checkpoint=checkpoint,
            vae_config=ckpt_config,
            image_size=512,
            precision=self._torch_dtype,
        )
        vae_model.to(self._torch_dtype)  # set precision appropriately
        vae_model.save_pretrained(output_path, safe_serialization=True)
        return output_path
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@ -319,7 +319,7 @@ class ModelProbe(object):
    @classmethod
    def _scan_and_load_checkpoint(cls, model_path: Path) -> CkptType:
        with SilenceWarnings():
-            if model_path.suffix.endswith((".ckpt", ".pt", ".bin")):
+            if model_path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
                cls._scan_model(model_path.name, model_path)
                model = torch.load(model_path)
                assert isinstance(model, dict)