use diffusers 0.14 cache layout

This PR ports the `main` PR #2871 to the v2.3 branch. This adjusts the global diffusers model cache to work with the 0.14 diffusers layout of placing models in HF_HOME/hub rather than HF_HOME/diffusers.
2024-08-30 20:32:17 +00:00 · 2023-03-09 22:35:43 -05:00 · 2023-03-09 22:35:43 -05:00 · 023db8ac41
commit 023db8ac41
parent 8323169864
8 changed files with 40 additions and 46 deletions
--- a/ldm/invoke/CLI.py
+++ b/ldm/invoke/CLI.py
@ -960,7 +960,6 @@ def prepare_image_metadata(
        wildcards["seed"] = seed
        wildcards["model_id"] = model_id
        try:
            print(f'DEBUG: fnformat={opt.fnformat}')
            filename = opt.fnformat.format(**wildcards)
        except KeyError as e:
            print(
--- a/ldm/invoke/config/invokeai_configure.py
+++ b/ldm/invoke/config/invokeai_configure.py
@ -290,7 +290,7 @@ def download_vaes():
        # first the diffusers version
        repo_id = "stabilityai/sd-vae-ft-mse"
        args = dict(
-            cache_dir=global_cache_dir("diffusers"),
+            cache_dir=global_cache_dir("hub"),
        )
        if not AutoencoderKL.from_pretrained(repo_id, **args):
            raise Exception(f"download of {repo_id} failed")
--- a/ldm/invoke/config/model_install_backend.py
+++ b/ldm/invoke/config/model_install_backend.py
@ -262,7 +262,6 @@ def _download_diffusion_weights(
            path = download_from_hf(
                model_class,
                repo_id,
                cache_subdir="diffusers",
                safety_checker=None,
                **extra_args,
            )
--- a/ldm/invoke/globals.py
+++ b/ldm/invoke/globals.py
@ -88,16 +88,13 @@ def global_cache_dir(subdir:Union[str,Path]='')->Path:
    '''
    Returns Path to the model cache directory. If a subdirectory
    is provided, it will be appended to the end of the path, allowing
-    for huggingface-style conventions:
+    for Hugging Face-style conventions. Currently, Hugging Face has
-         global_cache_dir('diffusers')
+    moved all models into the "hub" subfolder, so for any pretrained
    HF model, use:
         global_cache_dir('hub')
-    Current HuggingFace documentation (mid-Jan 2023) indicates that
+
-    transformers models will be cached into a "transformers" subdirectory,
+    The legacy location for transformers used to be global_cache_dir('transformers')
-    but in practice they seem to go into "hub". But if needed:
+    and global_cache_dir('diffusers') for diffusers.
         global_cache_dir('transformers')
    One other caveat is that HuggingFace is moving some diffusers models
    into the "hub" subdirectory as well, so this will need to be revisited
    from time to time.
    '''
    home: str = os.getenv('HF_HOME')
--- a/ldm/invoke/merge_diffusers.py
+++ b/ldm/invoke/merge_diffusers.py
@ -437,10 +437,10 @@ def main():
    args = _parse_args()
    global_set_root(args.root_dir)
-    cache_dir = str(global_cache_dir("diffusers"))
+    cache_dir = str(global_cache_dir("hub"))
    os.environ[
        "HF_HOME"
-    ] = cache_dir  # because not clear the merge pipeline is honoring cache_dir
+    ] = str(global_cache_dir())  # because not clear the merge pipeline is honoring cache_dir
    args.cache_dir = cache_dir
    try:
--- a/ldm/invoke/model_manager.py
+++ b/ldm/invoke/model_manager.py
@ -507,7 +507,7 @@ class ModelManager(object):
            if vae := self._load_vae(mconfig["vae"]):
                pipeline_args.update(vae=vae)
        if not isinstance(name_or_path, Path):
-            pipeline_args.update(cache_dir=global_cache_dir("diffusers"))
+            pipeline_args.update(cache_dir=global_cache_dir("hub"))
        if using_fp16:
            pipeline_args.update(torch_dtype=torch.float16)
            fp_args_list = [{"revision": "fp16"}, {}]
@ -1093,9 +1093,12 @@ class ModelManager(object):
        to the 2.3.0 "diffusers" version. This should be a one-time operation, called at
        script startup time.
        """
-        # Three transformer models to check: bert, clip and safety checker
+        # Three transformer models to check: bert, clip and safety checker, and
        # the diffusers as well
        models_dir = Path(Globals.root, "models")
        legacy_locations = [
            Path(
                models_dir,
                "CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
            ),
            Path("bert-base-uncased/models--bert-base-uncased"),
@ -1103,17 +1106,26 @@ class ModelManager(object):
                "openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
            ),
        ]
-        models_dir = Path(Globals.root, "models")
+        legacy_locations.extend(list(global_cache_dir("diffusers").glob('*')))
        legacy_layout = False
        for model in legacy_locations:
-            legacy_layout = legacy_layout or Path(models_dir, model).exists()
+            legacy_layout = legacy_layout or model.exists()
        if not legacy_layout:
            return
        print(
-            "** Legacy version <= 2.2.5 model directory layout detected. Reorganizing."
+            """
 >> ALERT:
 >> The location of your previously-installed diffusers models needs to move from
 >> invokeai/models/diffusers to invokeai/models/hub due to a change introduced by
 >> diffusers version 0.14. InvokeAI will now move all models from the "diffusers" directory
 >> into "hub" and then remove the diffusers directory. This is a quick, safe, one-time
 >> operation. However if you have customized either of these directories and need to
 >> make adjustments, please press ctrl-C now to abort and relaunch InvokeAI when you are ready.
 >> Otherwise press <enter> to continue."""
        )
        print("** This is a quick one-time operation.")
        input("continue> ")
        # transformer files get moved into the hub directory
        if cls._is_huggingface_hub_directory_present():
@ -1125,33 +1137,20 @@ class ModelManager(object):
        for model in legacy_locations:
            source = models_dir / model
            dest = hub / model.stem
            if dest.exists() and not source.exists():
                continue
            print(f"** {source} => {dest}")
            if source.exists():
-                if dest.exists():
+                if dest.is_symlink():
                    print(f"** Found symlink at {dest.name}. Not migrating.")
                elif dest.exists():
                    if source.is_dir():
                        rmtree(source)
                    else:
                        source.unlink()
                else:
                    move(source, dest)
        # anything else gets moved into the diffusers directory
        if cls._is_huggingface_hub_directory_present():
            diffusers = global_cache_dir("diffusers")
        else:
            diffusers = models_dir / "diffusers"
        os.makedirs(diffusers, exist_ok=True)
        for root, dirs, _ in os.walk(models_dir, topdown=False):
            for dir in dirs:
                full_path = Path(root, dir)
                if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
                    continue
                if Path(dir).match("models--*--*"):
                    dest = diffusers / dir
                    print(f"** {full_path} => {dest}")
                    if dest.exists():
                        rmtree(full_path)
                    else:
                        move(full_path, dest)
        # now clean up by removing any empty directories
        empty = [
            root
@ -1249,7 +1248,7 @@ class ModelManager(object):
            path = name_or_path
        else:
            owner, repo = name_or_path.split("/")
-            path = Path(global_cache_dir("diffusers") / f"models--{owner}--{repo}")
+            path = Path(global_cache_dir("hub") / f"models--{owner}--{repo}")
        if not path.exists():
            return None
        hashpath = path / "checksum.sha256"
@ -1310,7 +1309,7 @@ class ModelManager(object):
        using_fp16 = self.precision == "float16"
        vae_args.update(
-            cache_dir=global_cache_dir("diffusers"),
+            cache_dir=global_cache_dir("hug"),
            local_files_only=not Globals.internet_available,
        )
--- a/ldm/invoke/training/textual_inversion_training.py
+++ b/ldm/invoke/training/textual_inversion_training.py
@ -634,7 +634,7 @@ def do_textual_inversion_training(
    assert (
        pretrained_model_name_or_path
    ), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
-    pipeline_args = dict(cache_dir=global_cache_dir("diffusers"))
+    pipeline_args = dict(cache_dir=global_cache_dir("hub"))
    # Load tokenizer
    if tokenizer_name:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -34,7 +34,7 @@ dependencies = [
  "clip_anytorch",
  "compel==0.1.7",
  "datasets",
-  "diffusers[torch]~=0.13",
+  "diffusers[torch]~=0.14",
  "dnspython==2.2.1",
  "einops",
  "eventlet",