Remove core safetensors->diffusers conversion models

- No longer install core conversion models. Use the HuggingFace cache to load them if and when needed. - Call directly into the diffusers library to perform conversions with only shallow wrappers around them to massage arguments, etc. - At root configuration time, do not create all the possible model subdirectories, but let them be created and populated at model install time. - Remove checks for missing core conversion files, since they are no longer installed.
2024-08-30 20:32:17 +00:00 · 2024-03-17 19:13:18 -04:00
parent a0420d1442
commit 71a1740740
8 changed files with 69 additions and 1822 deletions
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@ -492,6 +492,8 @@ class ModelInstallService(ModelInstallServiceBase):
            for cur_base_model in BaseModelType:
                for cur_model_type in ModelType:
                    models_dir = self._app_config.models_path / Path(cur_base_model.value, cur_model_type.value)
+                    if not models_dir.exists():
+                        continue
                    installed.update(self.scan_directory(models_dir))
            self._logger.info(f"{len(installed)} new models registered; {len(defunct_models)} unregistered")

--- a/invokeai/backend/install/check_root.py
+++ b/invokeai/backend/install/check_root.py
@ -11,17 +11,6 @@ def check_invokeai_root(config: InvokeAIAppConfig):
    try:
        assert config.db_path.parent.exists(), f"{config.db_path.parent} not found"
        assert config.models_path.exists(), f"{config.models_path} not found"
-        if not config.ignore_missing_core_models:
-            for model in [
-                "CLIP-ViT-bigG-14-laion2B-39B-b160k",
-                "bert-base-uncased",
-                "clip-vit-large-patch14",
-                "sd-vae-ft-mse",
-                "stable-diffusion-2-clip",
-                "stable-diffusion-safety-checker",
-            ]:
-                path = config.models_path / f"core/convert/{model}"
-                assert path.exists(), f"{path} is missing"
    except Exception as e:
        print()
        print(f"An exception has occurred: {str(e)}")
@ -32,10 +21,5 @@ def check_invokeai_root(config: InvokeAIAppConfig):
        print(
            '** From the command line, activate the virtual environment and run "invokeai-configure --yes --skip-sd-weights" **'
        )
-        print(
-            '** (To skip this check completely, add "--ignore_missing_core_models" to your CLI args. Not installing '
-            "these core models will prevent the loading of some or all .safetensors and .ckpt files. However, you can "
-            "always come back and install these core models in the future.)"
-        )
        input("Press any key to continue...")
        sys.exit(0)
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@ -25,20 +25,20 @@ import npyscreen
 import psutil
 import torch
 import transformers
-from diffusers import AutoencoderKL, ModelMixin
+from diffusers import ModelMixin
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from huggingface_hub import HfFolder
 from huggingface_hub import login as hf_hub_login
 from omegaconf import DictConfig, OmegaConf
 from pydantic.error_wrappers import ValidationError
 from tqdm import tqdm
-from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
+from transformers import AutoFeatureExtractor

 import invokeai.configs as configs
 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.install.install_helper import InstallHelper, InstallSelections
 from invokeai.backend.install.legacy_arg_parsing import legacy_parser
-from invokeai.backend.model_manager import BaseModelType, ModelType
+from invokeai.backend.model_manager import ModelType
 from invokeai.backend.util import choose_precision, choose_torch_device
 from invokeai.backend.util.logging import InvokeAILogger
 from invokeai.frontend.install.model_install import addModelsForm
@ -210,51 +210,15 @@ def download_with_progress_bar(model_url: str, model_dest: str, label: str = "th
        print(traceback.format_exc(), file=sys.stderr)


-def download_conversion_models():
+def download_safety_checker():
    target_dir = config.models_path / "core/convert"
    kwargs = {}  # for future use
    try:
-        logger.info("Downloading core tokenizers and text encoders")
-
-        # bert
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", category=DeprecationWarning)
-            bert = BertTokenizerFast.from_pretrained("bert-base-uncased", **kwargs)
-            bert.save_pretrained(target_dir / "bert-base-uncased", safe_serialization=True)
-
-        # sd-1
-        repo_id = "openai/clip-vit-large-patch14"
-        hf_download_from_pretrained(CLIPTokenizer, repo_id, target_dir / "clip-vit-large-patch14")
-        hf_download_from_pretrained(CLIPTextModel, repo_id, target_dir / "clip-vit-large-patch14")
-
-        # sd-2
-        repo_id = "stabilityai/stable-diffusion-2"
-        pipeline = CLIPTokenizer.from_pretrained(repo_id, subfolder="tokenizer", **kwargs)
-        pipeline.save_pretrained(target_dir / "stable-diffusion-2-clip" / "tokenizer", safe_serialization=True)
-
-        pipeline = CLIPTextModel.from_pretrained(repo_id, subfolder="text_encoder", **kwargs)
-        pipeline.save_pretrained(target_dir / "stable-diffusion-2-clip" / "text_encoder", safe_serialization=True)
-
-        # sd-xl - tokenizer_2
-        repo_id = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
-        _, model_name = repo_id.split("/")
-        pipeline = CLIPTokenizer.from_pretrained(repo_id, **kwargs)
-        pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
-
-        pipeline = CLIPTextConfig.from_pretrained(repo_id, **kwargs)
-        pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
-
-        # VAE
-        logger.info("Downloading stable diffusion VAE")
-        vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", **kwargs)
-        vae.save_pretrained(target_dir / "sd-vae-ft-mse", safe_serialization=True)
-
        # safety checking
        logger.info("Downloading safety checker")
        repo_id = "CompVis/stable-diffusion-safety-checker"
        pipeline = AutoFeatureExtractor.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-safety-checker", safe_serialization=True)
-
        pipeline = StableDiffusionSafetyChecker.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-safety-checker", safe_serialization=True)
    except KeyboardInterrupt:
@ -307,7 +271,7 @@ def download_lama():
 def download_support_models() -> None:
    download_realesrgan()
    download_lama()
-    download_conversion_models()
+    download_safety_checker()


 # -------------------------------------
@ -744,12 +708,7 @@ def initialize_rootdir(root: Path, yes_to_all: bool = False):
        shutil.copytree(configs_src, configs_dest, dirs_exist_ok=True)

    dest = root / "models"
-    for model_base in BaseModelType:
-        for model_type in ModelType:
-            path = dest / model_base.value / model_type.value
-            path.mkdir(parents=True, exist_ok=True)
-    path = dest / "core"
-    path.mkdir(parents=True, exist_ok=True)
+    dest.mkdir(parents=True, exist_ok=True)


 # -------------------------------------
--- a/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
+++ b/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
--- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py
+++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py
@ -3,9 +3,6 @@

 from pathlib import Path

-import torch
-from safetensors.torch import load_file as safetensors_load_file
-
 from invokeai.backend.model_manager import (
    AnyModelConfig,
    BaseModelType,
@ -37,27 +34,25 @@ class ControlNetLoader(GenericDiffusersLoader):
            return True

    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
-        if config.base not in {BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2}:
-            raise Exception(f"ControlNet conversion not supported for model type: {config.base}")
-        else:
-            assert isinstance(config, CheckpointConfigBase)
-            config_file = config.config_path
+        assert isinstance(config, CheckpointConfigBase)
+        config_file = config.config_path

-        if model_path.suffix == ".safetensors":
-            checkpoint = safetensors_load_file(model_path, device="cpu")
-        else:
-            checkpoint = torch.load(model_path, map_location="cpu")
-
-        # sometimes weights are hidden under "state_dict", and sometimes not
-        if "state_dict" in checkpoint:
-            checkpoint = checkpoint["state_dict"]
-
-        convert_controlnet_to_diffusers(
-            model_path,
-            output_path,
-            original_config_file=self._app_config.root_path / config_file,
-            image_size=512,
-            scan_needed=True,
-            from_safetensors=model_path.suffix == ".safetensors",
+        image_size = (
+            512
+            if config.base == BaseModelType.StableDiffusion1
+            else 768
+            if config.base == BaseModelType.StableDiffusion2
+            else 1024
        )
+
+        self._logger.info(f"Converting {model_path} to diffusers format")
+        with open(self._app_config.root_path / config_file, "r") as config_stream:
+            convert_controlnet_to_diffusers(
+                model_path,
+                output_path,
+                original_config_file=config_stream,
+                image_size=image_size,
+                precision=self._torch_dtype,
+                from_safetensors=model_path.suffix == ".safetensors",
+            )
        return output_path
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@ -4,9 +4,6 @@
 from pathlib import Path
 from typing import Optional

-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
-
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@ -14,7 +11,7 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelRepoVariant,
    ModelType,
-    ModelVariantType,
+    SchedulerPredictionType,
    SubModelType,
 )
 from invokeai.backend.model_manager.config import CheckpointConfigBase, MainCheckpointConfig
@ -68,27 +65,31 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):

    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
        assert isinstance(config, MainCheckpointConfig)
-        variant = config.variant
        base = config.base
-        pipeline_class = (
-            StableDiffusionInpaintPipeline if variant == ModelVariantType.Inpaint else StableDiffusionPipeline
-        )

        config_file = config.config_path
+        prediction_type = config.prediction_type.value
+        upcast_attention = config.upcast_attention
+        image_size = (
+            1024
+            if base == BaseModelType.StableDiffusionXL
+            else 768
+            if config.prediction_type == SchedulerPredictionType.VPrediction and base == BaseModelType.StableDiffusion2
+            else 512
+        )

        self._logger.info(f"Converting {model_path} to diffusers format")
        convert_ckpt_to_diffusers(
            model_path,
            output_path,
            model_type=self.model_base_to_model_type[base],
-            model_version=base,
-            model_variant=variant,
            original_config_file=self._app_config.root_path / config_file,
            extract_ema=True,
-            scan_needed=True,
-            pipeline_class=pipeline_class,
            from_safetensors=model_path.suffix == ".safetensors",
            precision=self._torch_dtype,
+            prediction_type=prediction_type,
+            image_size=image_size,
+            upcast_attention=upcast_attention,
            load_safety_checker=False,
        )
        return output_path
--- a/invokeai/backend/model_manager/load/model_loaders/vae.py
+++ b/invokeai/backend/model_manager/load/model_loaders/vae.py
@ -57,12 +57,12 @@ class VAELoader(GenericDiffusersLoader):

        ckpt_config = OmegaConf.load(self._app_config.root_path / config_file)
        assert isinstance(ckpt_config, DictConfig)
-
+        self._logger.info(f"Converting {model_path} to diffusers format")
        vae_model = convert_ldm_vae_to_diffusers(
            checkpoint=checkpoint,
            vae_config=ckpt_config,
            image_size=512,
+            precision=self._torch_dtype,
        )
-        vae_model.to(self._torch_dtype)  # set precision appropriately
        vae_model.save_pretrained(output_path, safe_serialization=True)
        return output_path
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@ -319,7 +319,7 @@ class ModelProbe(object):
    @classmethod
    def _scan_and_load_checkpoint(cls, model_path: Path) -> CkptType:
        with SilenceWarnings():
-            if model_path.suffix.endswith((".ckpt", ".pt", ".bin")):
+            if model_path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
                cls._scan_model(model_path.name, model_path)
                model = torch.load(model_path)
                assert isinstance(model, dict)