Migrate to new HF diffusers cache location (#2867)

# Migrate to new HF diffusers cache location This PR adjusts the model cache directory to use the layout of `diffusers 0.14`. This will automatically migrate any diffusers models located in `INVOKEAI_ROOT/models/diffusers` to `INVOKEAI_ROOT/models/hub`, and cache new downloaded diffusers files into the same location. As before, if environment variable `HF_HOME` is set, then both HuggingFace `from_pretrained()` calls as well as all InvokeAI methods will use `HF_HOME/hub` as their cache.
2025-07-26 05:17:55 +00:00 · 2023-03-06 13:05:13 +13:00
parent 8f21201c91 4b76af37ae
commit 1b21e5df54
6 changed files with 42 additions and 48 deletions
--- a/invokeai/backend/config/invokeai_configure.py
+++ b/invokeai/backend/config/invokeai_configure.py
@ -295,7 +295,7 @@ def download_vaes():
        # first the diffusers version
        repo_id = "stabilityai/sd-vae-ft-mse"
        args = dict(
-            cache_dir=global_cache_dir("diffusers"),
+            cache_dir=global_cache_dir("hub"),
        )
        if not AutoencoderKL.from_pretrained(repo_id, **args):
            raise Exception(f"download of {repo_id} failed")
--- a/invokeai/backend/config/model_install_backend.py
+++ b/invokeai/backend/config/model_install_backend.py
@ -270,7 +270,6 @@ def _download_diffusion_weights(
            path = download_from_hf(
                model_class,
                repo_id,
-                cache_subdir="diffusers",
                safety_checker=None,
                **extra_args,
            )
--- a/invokeai/backend/globals.py
+++ b/invokeai/backend/globals.py
@ -98,16 +98,13 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
    """
    Returns Path to the model cache directory. If a subdirectory
    is provided, it will be appended to the end of the path, allowing
-    for huggingface-style conventions:
-         global_cache_dir('diffusers')
+    for Hugging Face-style conventions. Currently, Hugging Face has
+    moved all models into the "hub" subfolder, so for any pretrained
+    HF model, use:
         global_cache_dir('hub')
-    Current HuggingFace documentation (mid-Jan 2023) indicates that
-    transformers models will be cached into a "transformers" subdirectory,
-    but in practice they seem to go into "hub". But if needed:
-         global_cache_dir('transformers')
-    One other caveat is that HuggingFace is moving some diffusers models
-    into the "hub" subdirectory as well, so this will need to be revisited
-    from time to time.
+
+    The legacy location for transformers used to be global_cache_dir('transformers')
+    and global_cache_dir('diffusers') for diffusers.
    """
    home: str = os.getenv("HF_HOME")

@ -115,7 +112,7 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
        home = os.getenv("XDG_CACHE_HOME")

        if home is not None:
-            # Set `home` to $XDG_CACHE_HOME/huggingface, which is the default location mentioned in HuggingFace Hub Client Library.
+            # Set `home` to $XDG_CACHE_HOME/huggingface, which is the default location mentioned in Hugging Face Hub Client Library.
            # See: https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#xdgcachehome
            home += os.sep + "huggingface"

--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -43,13 +43,11 @@ class SDLegacyType(Enum):
    V2 = 3
    UNKNOWN = 99

-
 DEFAULT_MAX_MODELS = 2
 VAE_TO_REPO_ID = {  # hack, see note in convert_and_import()
    "vae-ft-mse-840000-ema-pruned": "stabilityai/sd-vae-ft-mse",
 }

-
 class ModelManager(object):
    def __init__(
        self,
@ -369,7 +367,7 @@ class ModelManager(object):
            if vae := self._load_vae(mconfig["vae"]):
                pipeline_args.update(vae=vae)
        if not isinstance(name_or_path, Path):
-            pipeline_args.update(cache_dir=global_cache_dir("diffusers"))
+            pipeline_args.update(cache_dir=global_cache_dir("hub"))
        if using_fp16:
            pipeline_args.update(torch_dtype=torch.float16)
            fp_args_list = [{"revision": "fp16"}, {}]
@ -916,27 +914,40 @@ class ModelManager(object):
        to the 2.3.0 "diffusers" version. This should be a one-time operation, called at
        script startup time.
        """
-        # Three transformer models to check: bert, clip and safety checker
+        # Three transformer models to check: bert, clip and safety checker, and
+        # the diffusers as well
+        models_dir = Path(Globals.root, "models")
        legacy_locations = [
            Path(
+                models_dir,
                "CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
            ),
-            Path("bert-base-uncased/models--bert-base-uncased"),
+            Path(models_dir, "bert-base-uncased/models--bert-base-uncased"),
            Path(
+                models_dir,
                "openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
            ),
        ]
-        models_dir = Path(Globals.root, "models")
+        legacy_locations.extend(list(global_cache_dir("diffusers").glob('*')))
+        
        legacy_layout = False
        for model in legacy_locations:
-            legacy_layout = legacy_layout or Path(models_dir, model).exists()
+            legacy_layout = legacy_layout or model.exists()
        if not legacy_layout:
            return

        print(
-            "** Legacy version <= 2.2.5 model directory layout detected. Reorganizing."
+            """
+>> ALERT:
+>> The location of your previously-installed diffusers models needs to move from
+>> invokeai/models/diffusers to invokeai/models/hub due to a change introduced by
+>> diffusers version 0.14. InvokeAI will now move all models from the "diffusers" directory
+>> into "hub" and then remove the diffusers directory. This is a quick, safe, one-time
+>> operation. However if you have customized either of these directories and need to
+>> make adjustments, please press ctrl-C now to abort and relaunch InvokeAI when you are ready.
+>> Otherwise press <enter> to continue."""
        )
-        print("** This is a quick one-time operation.")
+        input('continue> ')

        # transformer files get moved into the hub directory
        if cls._is_huggingface_hub_directory_present():
@ -948,33 +959,20 @@ class ModelManager(object):
        for model in legacy_locations:
            source = models_dir / model
            dest = hub / model.stem
+            if dest.exists() and not source.exists():
+                continue
            print(f"** {source} => {dest}")
            if source.exists():
-                if dest.exists():
-                    rmtree(source)
+                if dest.is_symlink():
+                    print(f"** Found symlink at {dest.name}. Not migrating.")
+                elif dest.exists():
+                    if source.is_dir():
+                        rmtree(source)
+                    else:
+                        source.unlink()
                else:
                    move(source, dest)

-        # anything else gets moved into the diffusers directory
-        if cls._is_huggingface_hub_directory_present():
-            diffusers = global_cache_dir("diffusers")
-        else:
-            diffusers = models_dir / "diffusers"
-
-        os.makedirs(diffusers, exist_ok=True)
-        for root, dirs, _ in os.walk(models_dir, topdown=False):
-            for dir in dirs:
-                full_path = Path(root, dir)
-                if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
-                    continue
-                if Path(dir).match("models--*--*"):
-                    dest = diffusers / dir
-                    print(f"** {full_path} => {dest}")
-                    if dest.exists():
-                        rmtree(full_path)
-                    else:
-                        move(full_path, dest)
-
        # now clean up by removing any empty directories
        empty = [
            root
@ -1072,7 +1070,7 @@ class ModelManager(object):
            path = name_or_path
        else:
            owner, repo = name_or_path.split("/")
-            path = Path(global_cache_dir("diffusers") / f"models--{owner}--{repo}")
+            path = Path(global_cache_dir("hub") / f"models--{owner}--{repo}")
        if not path.exists():
            return None
        hashpath = path / "checksum.sha256"
@ -1133,7 +1131,7 @@ class ModelManager(object):
        using_fp16 = self.precision == "float16"

        vae_args.update(
-            cache_dir=global_cache_dir("diffusers"),
+            cache_dir=global_cache_dir("hub"),
            local_files_only=not Globals.internet_available,
        )

@ -1172,7 +1170,7 @@ class ModelManager(object):

    @staticmethod
    def _delete_model_from_cache(repo_id):
-        cache_info = scan_cache_dir(global_cache_dir("diffusers"))
+        cache_info = scan_cache_dir(global_cache_dir("hub"))

        # I'm sure there is a way to do this with comprehensions
        # but the code quickly became incomprehensible!
--- a/invokeai/backend/training/textual_inversion_training.py
+++ b/invokeai/backend/training/textual_inversion_training.py
@ -640,7 +640,7 @@ def do_textual_inversion_training(
    assert (
        pretrained_model_name_or_path
    ), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
-    pipeline_args = dict(cache_dir=global_cache_dir("diffusers"))
+    pipeline_args = dict(cache_dir=global_cache_dir("hub"))

    # Load tokenizer
    if tokenizer_name:
--- a/invokeai/frontend/merge/merge_diffusers.py
+++ b/invokeai/frontend/merge/merge_diffusers.py
@ -442,7 +442,7 @@ def main():
    args = _parse_args()
    global_set_root(args.root_dir)

-    cache_dir = str(global_cache_dir("diffusers"))
+    cache_dir = str(global_cache_dir("hub"))
    os.environ[
        "HF_HOME"
    ] = cache_dir  # because not clear the merge pipeline is honoring cache_dir