Migrate to new HF diffusers cache location (#2867)

# Migrate to new HF diffusers cache location

This PR adjusts the model cache directory to use the layout of
`diffusers 0.14`. This will automatically migrate any diffusers models
located in `INVOKEAI_ROOT/models/diffusers` to
`INVOKEAI_ROOT/models/hub`, and cache new downloaded diffusers files
into the same location.

As before, if environment variable `HF_HOME` is set, then both
HuggingFace `from_pretrained()` calls as well as all InvokeAI methods
will use `HF_HOME/hub` as their cache.
This commit is contained in:
blessedcoolant 2023-03-06 13:05:13 +13:00 committed by GitHub
commit 1b21e5df54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 42 additions and 48 deletions

View File

@ -295,7 +295,7 @@ def download_vaes():
# first the diffusers version
repo_id = "stabilityai/sd-vae-ft-mse"
args = dict(
cache_dir=global_cache_dir("diffusers"),
cache_dir=global_cache_dir("hub"),
)
if not AutoencoderKL.from_pretrained(repo_id, **args):
raise Exception(f"download of {repo_id} failed")

View File

@ -270,7 +270,6 @@ def _download_diffusion_weights(
path = download_from_hf(
model_class,
repo_id,
cache_subdir="diffusers",
safety_checker=None,
**extra_args,
)

View File

@ -98,16 +98,13 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
"""
Returns Path to the model cache directory. If a subdirectory
is provided, it will be appended to the end of the path, allowing
for huggingface-style conventions:
global_cache_dir('diffusers')
for Hugging Face-style conventions. Currently, Hugging Face has
moved all models into the "hub" subfolder, so for any pretrained
HF model, use:
global_cache_dir('hub')
Current HuggingFace documentation (mid-Jan 2023) indicates that
transformers models will be cached into a "transformers" subdirectory,
but in practice they seem to go into "hub". But if needed:
global_cache_dir('transformers')
One other caveat is that HuggingFace is moving some diffusers models
into the "hub" subdirectory as well, so this will need to be revisited
from time to time.
The legacy location for transformers used to be global_cache_dir('transformers')
and global_cache_dir('diffusers') for diffusers.
"""
home: str = os.getenv("HF_HOME")
@ -115,7 +112,7 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
home = os.getenv("XDG_CACHE_HOME")
if home is not None:
# Set `home` to $XDG_CACHE_HOME/huggingface, which is the default location mentioned in HuggingFace Hub Client Library.
# Set `home` to $XDG_CACHE_HOME/huggingface, which is the default location mentioned in Hugging Face Hub Client Library.
# See: https://huggingface.co/docs/huggingface_hub/main/en/package_reference/environment_variables#xdgcachehome
home += os.sep + "huggingface"

View File

@ -43,13 +43,11 @@ class SDLegacyType(Enum):
V2 = 3
UNKNOWN = 99
DEFAULT_MAX_MODELS = 2
VAE_TO_REPO_ID = { # hack, see note in convert_and_import()
"vae-ft-mse-840000-ema-pruned": "stabilityai/sd-vae-ft-mse",
}
class ModelManager(object):
def __init__(
self,
@ -369,7 +367,7 @@ class ModelManager(object):
if vae := self._load_vae(mconfig["vae"]):
pipeline_args.update(vae=vae)
if not isinstance(name_or_path, Path):
pipeline_args.update(cache_dir=global_cache_dir("diffusers"))
pipeline_args.update(cache_dir=global_cache_dir("hub"))
if using_fp16:
pipeline_args.update(torch_dtype=torch.float16)
fp_args_list = [{"revision": "fp16"}, {}]
@ -916,27 +914,40 @@ class ModelManager(object):
to the 2.3.0 "diffusers" version. This should be a one-time operation, called at
script startup time.
"""
# Three transformer models to check: bert, clip and safety checker
# Three transformer models to check: bert, clip and safety checker, and
# the diffusers as well
models_dir = Path(Globals.root, "models")
legacy_locations = [
Path(
models_dir,
"CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
),
Path("bert-base-uncased/models--bert-base-uncased"),
Path(models_dir, "bert-base-uncased/models--bert-base-uncased"),
Path(
models_dir,
"openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
),
]
models_dir = Path(Globals.root, "models")
legacy_locations.extend(list(global_cache_dir("diffusers").glob('*')))
legacy_layout = False
for model in legacy_locations:
legacy_layout = legacy_layout or Path(models_dir, model).exists()
legacy_layout = legacy_layout or model.exists()
if not legacy_layout:
return
print(
"** Legacy version <= 2.2.5 model directory layout detected. Reorganizing."
"""
>> ALERT:
>> The location of your previously-installed diffusers models needs to move from
>> invokeai/models/diffusers to invokeai/models/hub due to a change introduced by
>> diffusers version 0.14. InvokeAI will now move all models from the "diffusers" directory
>> into "hub" and then remove the diffusers directory. This is a quick, safe, one-time
>> operation. However if you have customized either of these directories and need to
>> make adjustments, please press ctrl-C now to abort and relaunch InvokeAI when you are ready.
>> Otherwise press <enter> to continue."""
)
print("** This is a quick one-time operation.")
input('continue> ')
# transformer files get moved into the hub directory
if cls._is_huggingface_hub_directory_present():
@ -948,33 +959,20 @@ class ModelManager(object):
for model in legacy_locations:
source = models_dir / model
dest = hub / model.stem
if dest.exists() and not source.exists():
continue
print(f"** {source} => {dest}")
if source.exists():
if dest.exists():
rmtree(source)
if dest.is_symlink():
print(f"** Found symlink at {dest.name}. Not migrating.")
elif dest.exists():
if source.is_dir():
rmtree(source)
else:
source.unlink()
else:
move(source, dest)
# anything else gets moved into the diffusers directory
if cls._is_huggingface_hub_directory_present():
diffusers = global_cache_dir("diffusers")
else:
diffusers = models_dir / "diffusers"
os.makedirs(diffusers, exist_ok=True)
for root, dirs, _ in os.walk(models_dir, topdown=False):
for dir in dirs:
full_path = Path(root, dir)
if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
continue
if Path(dir).match("models--*--*"):
dest = diffusers / dir
print(f"** {full_path} => {dest}")
if dest.exists():
rmtree(full_path)
else:
move(full_path, dest)
# now clean up by removing any empty directories
empty = [
root
@ -1072,7 +1070,7 @@ class ModelManager(object):
path = name_or_path
else:
owner, repo = name_or_path.split("/")
path = Path(global_cache_dir("diffusers") / f"models--{owner}--{repo}")
path = Path(global_cache_dir("hub") / f"models--{owner}--{repo}")
if not path.exists():
return None
hashpath = path / "checksum.sha256"
@ -1133,7 +1131,7 @@ class ModelManager(object):
using_fp16 = self.precision == "float16"
vae_args.update(
cache_dir=global_cache_dir("diffusers"),
cache_dir=global_cache_dir("hub"),
local_files_only=not Globals.internet_available,
)
@ -1172,7 +1170,7 @@ class ModelManager(object):
@staticmethod
def _delete_model_from_cache(repo_id):
cache_info = scan_cache_dir(global_cache_dir("diffusers"))
cache_info = scan_cache_dir(global_cache_dir("hub"))
# I'm sure there is a way to do this with comprehensions
# but the code quickly became incomprehensible!

View File

@ -640,7 +640,7 @@ def do_textual_inversion_training(
assert (
pretrained_model_name_or_path
), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
pipeline_args = dict(cache_dir=global_cache_dir("diffusers"))
pipeline_args = dict(cache_dir=global_cache_dir("hub"))
# Load tokenizer
if tokenizer_name:

View File

@ -442,7 +442,7 @@ def main():
args = _parse_args()
global_set_root(args.root_dir)
cache_dir = str(global_cache_dir("diffusers"))
cache_dir = str(global_cache_dir("hub"))
os.environ[
"HF_HOME"
] = cache_dir # because not clear the merge pipeline is honoring cache_dir