migrate to new HF diffusers cache location

2024-08-30 20:32:17 +00:00 · 2023-03-05 08:20:24 -05:00
parent 7cf2f58513
commit ef8cf83b28
5 changed files with 25 additions and 43 deletions
--- a/invokeai/backend/config/invokeai_configure.py
+++ b/invokeai/backend/config/invokeai_configure.py
@ -295,7 +295,7 @@ def download_vaes():
        # first the diffusers version
        repo_id = "stabilityai/sd-vae-ft-mse"
        args = dict(
-            cache_dir=global_cache_dir("diffusers"),
+            cache_dir=global_cache_dir("hub"),
        )
        if not AutoencoderKL.from_pretrained(repo_id, **args):
            raise Exception(f"download of {repo_id} failed")
--- a/invokeai/backend/globals.py
+++ b/invokeai/backend/globals.py
@ -98,16 +98,13 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
    """
    Returns Path to the model cache directory. If a subdirectory
    is provided, it will be appended to the end of the path, allowing
-    for huggingface-style conventions:
-         global_cache_dir('diffusers')
+    for huggingface-style conventions. Currently, hugging face has
+    moved all models into the "hub" subfolder, so for any pretrained
+    HF model, use:
         global_cache_dir('hub')
-    Current HuggingFace documentation (mid-Jan 2023) indicates that
-    transformers models will be cached into a "transformers" subdirectory,
-    but in practice they seem to go into "hub". But if needed:
-         global_cache_dir('transformers')
-    One other caveat is that HuggingFace is moving some diffusers models
-    into the "hub" subdirectory as well, so this will need to be revisited
-    from time to time.
+
+    The legacy location for transformers used to be global_cache_dir('transformers')
+    and global_cache_dir('diffusers') for diffusers.
    """
    home: str = os.getenv("HF_HOME")

--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -43,13 +43,11 @@ class SDLegacyType(Enum):
    V2 = 3
    UNKNOWN = 99

-
 DEFAULT_MAX_MODELS = 2
 VAE_TO_REPO_ID = {  # hack, see note in convert_and_import()
    "vae-ft-mse-840000-ema-pruned": "stabilityai/sd-vae-ft-mse",
 }

-
 class ModelManager(object):
    def __init__(
        self,
@ -369,7 +367,7 @@ class ModelManager(object):
            if vae := self._load_vae(mconfig["vae"]):
                pipeline_args.update(vae=vae)
        if not isinstance(name_or_path, Path):
-            pipeline_args.update(cache_dir=global_cache_dir("diffusers"))
+            pipeline_args.update(cache_dir=global_cache_dir("hub"))
        if using_fp16:
            pipeline_args.update(torch_dtype=torch.float16)
            fp_args_list = [{"revision": "fp16"}, {}]
@ -916,25 +914,30 @@ class ModelManager(object):
        to the 2.3.0 "diffusers" version. This should be a one-time operation, called at
        script startup time.
        """
-        # Three transformer models to check: bert, clip and safety checker
+        # Three transformer models to check: bert, clip and safety checker, and
+        # the diffusers as well
+        models_dir = Path(Globals.root, "models")
        legacy_locations = [
            Path(
+                models_dir,
                "CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
            ),
-            Path("bert-base-uncased/models--bert-base-uncased"),
+            Path(models_dir, "bert-base-uncased/models--bert-base-uncased"),
            Path(
+                models_dir,
                "openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
            ),
        ]
-        models_dir = Path(Globals.root, "models")
+        legacy_locations.extend(list(Path(models_dir,"diffusers").glob('*')))
+        
        legacy_layout = False
        for model in legacy_locations:
-            legacy_layout = legacy_layout or Path(models_dir, model).exists()
+            legacy_layout = legacy_layout or model.exists()
        if not legacy_layout:
            return

        print(
-            "** Legacy version <= 2.2.5 model directory layout detected. Reorganizing."
+            "** Old model directory layout (< v3.0) detected. Reorganizing."
        )
        print("** This is a quick one-time operation.")

@ -948,6 +951,8 @@ class ModelManager(object):
        for model in legacy_locations:
            source = models_dir / model
            dest = hub / model.stem
+            if dest.exists() and not source.exists():
+                continue
            print(f"** {source} => {dest}")
            if source.exists():
                if dest.exists():
@ -955,26 +960,6 @@ class ModelManager(object):
                else:
                    move(source, dest)

-        # anything else gets moved into the diffusers directory
-        if cls._is_huggingface_hub_directory_present():
-            diffusers = global_cache_dir("diffusers")
-        else:
-            diffusers = models_dir / "diffusers"
-
-        os.makedirs(diffusers, exist_ok=True)
-        for root, dirs, _ in os.walk(models_dir, topdown=False):
-            for dir in dirs:
-                full_path = Path(root, dir)
-                if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
-                    continue
-                if Path(dir).match("models--*--*"):
-                    dest = diffusers / dir
-                    print(f"** {full_path} => {dest}")
-                    if dest.exists():
-                        rmtree(full_path)
-                    else:
-                        move(full_path, dest)
-
        # now clean up by removing any empty directories
        empty = [
            root
@ -1072,7 +1057,7 @@ class ModelManager(object):
            path = name_or_path
        else:
            owner, repo = name_or_path.split("/")
-            path = Path(global_cache_dir("diffusers") / f"models--{owner}--{repo}")
+            path = Path(global_cache_dir("hub") / f"models--{owner}--{repo}")
        if not path.exists():
            return None
        hashpath = path / "checksum.sha256"
@ -1133,7 +1118,7 @@ class ModelManager(object):
        using_fp16 = self.precision == "float16"

        vae_args.update(
-            cache_dir=global_cache_dir("diffusers"),
+            cache_dir=global_cache_dir("hub"),
            local_files_only=not Globals.internet_available,
        )

@ -1172,7 +1157,7 @@ class ModelManager(object):

    @staticmethod
    def _delete_model_from_cache(repo_id):
-        cache_info = scan_cache_dir(global_cache_dir("diffusers"))
+        cache_info = scan_cache_dir(global_cache_dir("hub"))

        # I'm sure there is a way to do this with comprehensions
        # but the code quickly became incomprehensible!
--- a/invokeai/backend/training/textual_inversion_training.py
+++ b/invokeai/backend/training/textual_inversion_training.py
@ -640,7 +640,7 @@ def do_textual_inversion_training(
    assert (
        pretrained_model_name_or_path
    ), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
-    pipeline_args = dict(cache_dir=global_cache_dir("diffusers"))
+    pipeline_args = dict(cache_dir=global_cache_dir("hub"))

    # Load tokenizer
    if tokenizer_name:
--- a/invokeai/frontend/merge/merge_diffusers.py
+++ b/invokeai/frontend/merge/merge_diffusers.py
@ -442,7 +442,7 @@ def main():
    args = _parse_args()
    global_set_root(args.root_dir)

-    cache_dir = str(global_cache_dir("diffusers"))
+    cache_dir = str(global_cache_dir("hub"))
    os.environ[
        "HF_HOME"
    ] = cache_dir  # because not clear the merge pipeline is honoring cache_dir