Remove core conversion models (#5981)

## What type of PR is this? (check all applicable) - [ ] Refactor - [ ] Feature - [ ] Bug Fix - [X] Optimization - [ ] Documentation Update - [ ] Community Node Submission ## Have you discussed this change with the InvokeAI team? - [X] Yes - [ ] No, because: ## Have you updated all relevant documentation? - [X] Yes - [ ] No ## Description We've been using a forked copy of the diffusers safetensors->diffusers model conversion code, which was hacked to read CLIP and the other models needed for conversion from the local invokeai root models directory. This was getting unsustainable as the code bases diverged, and also required the installation and maintenance of the "core/convert" directory. This PR gets rid of the hacked conversion code and reverts to using the native diffusers methods. Core convert models are no longer installed at root configure time. Instead we rely on the HuggingFace hub system to download the conversion models if and when they are needed. They are relatively small and the initial delay seems minor. Conversion of SD-1, SD-2 (both epsilon and v-prediction), SDXL, VAE and ControlNet SD-1/2 models has been tested. ControlNet SDXL models are still a WIP due to the need for some work on the prober. The main implication of this change is that InvokeAI is no longer internet-independent and will need an internet connection at least the first time a safetensors file needs to be converted. However, there are several other places where the "no internet" rule is already violated, and I suggest that we abandon this principle. ## Related Tickets & Documents  - Related Issue # - Closes #5964 ## QA Instructions, Screenshots, Recordings 1. Remove or move `$INVOKEAI_ROOT/models/.cache` 2. Move `$INVOKEAI/models/core/convert` 3. Try generating with an unconverted .safetensors model.  ## Merge Plan Merge when approved.  ## Added/updated tests? - [ ] Yes - [ ] No : _please replace this line with details on why tests have not been included_ ## [optional] Are there any post deployment tasks we need to perform?
2024-08-30 20:32:17 +00:00 · 2024-03-18 11:11:15 +11:00
parent b79f2f337e e74e78894f
commit a6d64f69e1
8 changed files with 69 additions and 1822 deletions
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@ -492,6 +492,8 @@ class ModelInstallService(ModelInstallServiceBase):
            for cur_base_model in BaseModelType:
                for cur_model_type in ModelType:
                    models_dir = self._app_config.models_path / Path(cur_base_model.value, cur_model_type.value)
+                    if not models_dir.exists():
+                        continue
                    installed.update(self.scan_directory(models_dir))
            self._logger.info(f"{len(installed)} new models registered; {len(defunct_models)} unregistered")

--- a/invokeai/backend/install/check_root.py
+++ b/invokeai/backend/install/check_root.py
@ -11,17 +11,6 @@ def check_invokeai_root(config: InvokeAIAppConfig):
    try:
        assert config.db_path.parent.exists(), f"{config.db_path.parent} not found"
        assert config.models_path.exists(), f"{config.models_path} not found"
-        if not config.ignore_missing_core_models:
-            for model in [
-                "CLIP-ViT-bigG-14-laion2B-39B-b160k",
-                "bert-base-uncased",
-                "clip-vit-large-patch14",
-                "sd-vae-ft-mse",
-                "stable-diffusion-2-clip",
-                "stable-diffusion-safety-checker",
-            ]:
-                path = config.models_path / f"core/convert/{model}"
-                assert path.exists(), f"{path} is missing"
    except Exception as e:
        print()
        print(f"An exception has occurred: {str(e)}")
@ -32,10 +21,5 @@ def check_invokeai_root(config: InvokeAIAppConfig):
        print(
            '** From the command line, activate the virtual environment and run "invokeai-configure --yes --skip-sd-weights" **'
        )
-        print(
-            '** (To skip this check completely, add "--ignore_missing_core_models" to your CLI args. Not installing '
-            "these core models will prevent the loading of some or all .safetensors and .ckpt files. However, you can "
-            "always come back and install these core models in the future.)"
-        )
        input("Press any key to continue...")
        sys.exit(0)
--- a/invokeai/backend/install/invokeai_configure.py
+++ b/invokeai/backend/install/invokeai_configure.py
@ -25,20 +25,20 @@ import npyscreen
 import psutil
 import torch
 import transformers
-from diffusers import AutoencoderKL, ModelMixin
+from diffusers import ModelMixin
 from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 from huggingface_hub import HfFolder
 from huggingface_hub import login as hf_hub_login
 from omegaconf import DictConfig, OmegaConf
 from pydantic.error_wrappers import ValidationError
 from tqdm import tqdm
-from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextConfig, CLIPTextModel, CLIPTokenizer
+from transformers import AutoFeatureExtractor

 import invokeai.configs as configs
 from invokeai.app.services.config import InvokeAIAppConfig
 from invokeai.backend.install.install_helper import InstallHelper, InstallSelections
 from invokeai.backend.install.legacy_arg_parsing import legacy_parser
-from invokeai.backend.model_manager import BaseModelType, ModelType
+from invokeai.backend.model_manager import ModelType
 from invokeai.backend.util import choose_precision, choose_torch_device
 from invokeai.backend.util.logging import InvokeAILogger
 from invokeai.frontend.install.model_install import addModelsForm
@ -210,51 +210,15 @@ def download_with_progress_bar(model_url: str, model_dest: str, label: str = "th
        print(traceback.format_exc(), file=sys.stderr)


-def download_conversion_models():
+def download_safety_checker():
    target_dir = config.models_path / "core/convert"
    kwargs = {}  # for future use
    try:
-        logger.info("Downloading core tokenizers and text encoders")
-
-        # bert
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", category=DeprecationWarning)
-            bert = BertTokenizerFast.from_pretrained("bert-base-uncased", **kwargs)
-            bert.save_pretrained(target_dir / "bert-base-uncased", safe_serialization=True)
-
-        # sd-1
-        repo_id = "openai/clip-vit-large-patch14"
-        hf_download_from_pretrained(CLIPTokenizer, repo_id, target_dir / "clip-vit-large-patch14")
-        hf_download_from_pretrained(CLIPTextModel, repo_id, target_dir / "clip-vit-large-patch14")
-
-        # sd-2
-        repo_id = "stabilityai/stable-diffusion-2"
-        pipeline = CLIPTokenizer.from_pretrained(repo_id, subfolder="tokenizer", **kwargs)
-        pipeline.save_pretrained(target_dir / "stable-diffusion-2-clip" / "tokenizer", safe_serialization=True)
-
-        pipeline = CLIPTextModel.from_pretrained(repo_id, subfolder="text_encoder", **kwargs)
-        pipeline.save_pretrained(target_dir / "stable-diffusion-2-clip" / "text_encoder", safe_serialization=True)
-
-        # sd-xl - tokenizer_2
-        repo_id = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
-        _, model_name = repo_id.split("/")
-        pipeline = CLIPTokenizer.from_pretrained(repo_id, **kwargs)
-        pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
-
-        pipeline = CLIPTextConfig.from_pretrained(repo_id, **kwargs)
-        pipeline.save_pretrained(target_dir / model_name, safe_serialization=True)
-
-        # VAE
-        logger.info("Downloading stable diffusion VAE")
-        vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", **kwargs)
-        vae.save_pretrained(target_dir / "sd-vae-ft-mse", safe_serialization=True)
-
        # safety checking
        logger.info("Downloading safety checker")
        repo_id = "CompVis/stable-diffusion-safety-checker"
        pipeline = AutoFeatureExtractor.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-safety-checker", safe_serialization=True)
-
        pipeline = StableDiffusionSafetyChecker.from_pretrained(repo_id, **kwargs)
        pipeline.save_pretrained(target_dir / "stable-diffusion-safety-checker", safe_serialization=True)
    except KeyboardInterrupt:
@ -307,7 +271,7 @@ def download_lama():
 def download_support_models() -> None:
    download_realesrgan()
    download_lama()
-    download_conversion_models()
+    download_safety_checker()


 # -------------------------------------
@ -744,12 +708,7 @@ def initialize_rootdir(root: Path, yes_to_all: bool = False):
        shutil.copytree(configs_src, configs_dest, dirs_exist_ok=True)

    dest = root / "models"
-    for model_base in BaseModelType:
-        for model_type in ModelType:
-            path = dest / model_base.value / model_type.value
-            path.mkdir(parents=True, exist_ok=True)
-    path = dest / "core"
-    path.mkdir(parents=True, exist_ok=True)
+    dest.mkdir(parents=True, exist_ok=True)


 # -------------------------------------
--- a/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
+++ b/invokeai/backend/model_manager/convert_ckpt_to_diffusers.py
--- a/invokeai/backend/model_manager/load/model_loaders/controlnet.py
+++ b/invokeai/backend/model_manager/load/model_loaders/controlnet.py
@ -3,9 +3,6 @@

 from pathlib import Path

-import torch
-from safetensors.torch import load_file as safetensors_load_file
-
 from invokeai.backend.model_manager import (
    AnyModelConfig,
    BaseModelType,
@ -37,27 +34,25 @@ class ControlNetLoader(GenericDiffusersLoader):
            return True

    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
-        if config.base not in {BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2}:
-            raise Exception(f"ControlNet conversion not supported for model type: {config.base}")
-        else:
        assert isinstance(config, CheckpointConfigBase)
        config_file = config.config_path

-        if model_path.suffix == ".safetensors":
-            checkpoint = safetensors_load_file(model_path, device="cpu")
-        else:
-            checkpoint = torch.load(model_path, map_location="cpu")
-
-        # sometimes weights are hidden under "state_dict", and sometimes not
-        if "state_dict" in checkpoint:
-            checkpoint = checkpoint["state_dict"]
+        image_size = (
+            512
+            if config.base == BaseModelType.StableDiffusion1
+            else 768
+            if config.base == BaseModelType.StableDiffusion2
+            else 1024
+        )

+        self._logger.info(f"Converting {model_path} to diffusers format")
+        with open(self._app_config.root_path / config_file, "r") as config_stream:
            convert_controlnet_to_diffusers(
                model_path,
                output_path,
-            original_config_file=self._app_config.root_path / config_file,
-            image_size=512,
-            scan_needed=True,
+                original_config_file=config_stream,
+                image_size=image_size,
+                precision=self._torch_dtype,
                from_safetensors=model_path.suffix == ".safetensors",
            )
        return output_path
--- a/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
+++ b/invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py
@ -4,9 +4,6 @@
 from pathlib import Path
 from typing import Optional

-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint import StableDiffusionInpaintPipeline
-
 from invokeai.backend.model_manager import (
    AnyModel,
    AnyModelConfig,
@ -14,7 +11,7 @@ from invokeai.backend.model_manager import (
    ModelFormat,
    ModelRepoVariant,
    ModelType,
-    ModelVariantType,
+    SchedulerPredictionType,
    SubModelType,
 )
 from invokeai.backend.model_manager.config import CheckpointConfigBase, MainCheckpointConfig
@ -68,27 +65,31 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):

    def _convert_model(self, config: AnyModelConfig, model_path: Path, output_path: Path) -> Path:
        assert isinstance(config, MainCheckpointConfig)
-        variant = config.variant
        base = config.base
-        pipeline_class = (
-            StableDiffusionInpaintPipeline if variant == ModelVariantType.Inpaint else StableDiffusionPipeline
-        )

        config_file = config.config_path
+        prediction_type = config.prediction_type.value
+        upcast_attention = config.upcast_attention
+        image_size = (
+            1024
+            if base == BaseModelType.StableDiffusionXL
+            else 768
+            if config.prediction_type == SchedulerPredictionType.VPrediction and base == BaseModelType.StableDiffusion2
+            else 512
+        )

        self._logger.info(f"Converting {model_path} to diffusers format")
        convert_ckpt_to_diffusers(
            model_path,
            output_path,
            model_type=self.model_base_to_model_type[base],
-            model_version=base,
-            model_variant=variant,
            original_config_file=self._app_config.root_path / config_file,
            extract_ema=True,
-            scan_needed=True,
-            pipeline_class=pipeline_class,
            from_safetensors=model_path.suffix == ".safetensors",
            precision=self._torch_dtype,
+            prediction_type=prediction_type,
+            image_size=image_size,
+            upcast_attention=upcast_attention,
            load_safety_checker=False,
        )
        return output_path
--- a/invokeai/backend/model_manager/load/model_loaders/vae.py
+++ b/invokeai/backend/model_manager/load/model_loaders/vae.py
@ -57,12 +57,12 @@ class VAELoader(GenericDiffusersLoader):

        ckpt_config = OmegaConf.load(self._app_config.root_path / config_file)
        assert isinstance(ckpt_config, DictConfig)
-
+        self._logger.info(f"Converting {model_path} to diffusers format")
        vae_model = convert_ldm_vae_to_diffusers(
            checkpoint=checkpoint,
            vae_config=ckpt_config,
            image_size=512,
+            precision=self._torch_dtype,
        )
-        vae_model.to(self._torch_dtype)  # set precision appropriately
        vae_model.save_pretrained(output_path, safe_serialization=True)
        return output_path
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@ -319,7 +319,7 @@ class ModelProbe(object):
    @classmethod
    def _scan_and_load_checkpoint(cls, model_path: Path) -> CkptType:
        with SilenceWarnings():
-            if model_path.suffix.endswith((".ckpt", ".pt", ".bin")):
+            if model_path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
                cls._scan_model(model_path.name, model_path)
                model = torch.load(model_path)
                assert isinstance(model, dict)