migrate to new HF diffusers cache location

This commit is contained in:
Lincoln Stein 2023-03-05 08:20:24 -05:00
parent 7cf2f58513
commit ef8cf83b28
5 changed files with 25 additions and 43 deletions

View File

@ -295,7 +295,7 @@ def download_vaes():
# first the diffusers version # first the diffusers version
repo_id = "stabilityai/sd-vae-ft-mse" repo_id = "stabilityai/sd-vae-ft-mse"
args = dict( args = dict(
cache_dir=global_cache_dir("diffusers"), cache_dir=global_cache_dir("hub"),
) )
if not AutoencoderKL.from_pretrained(repo_id, **args): if not AutoencoderKL.from_pretrained(repo_id, **args):
raise Exception(f"download of {repo_id} failed") raise Exception(f"download of {repo_id} failed")

View File

@ -98,16 +98,13 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
""" """
Returns Path to the model cache directory. If a subdirectory Returns Path to the model cache directory. If a subdirectory
is provided, it will be appended to the end of the path, allowing is provided, it will be appended to the end of the path, allowing
for huggingface-style conventions: for huggingface-style conventions. Currently, hugging face has
global_cache_dir('diffusers') moved all models into the "hub" subfolder, so for any pretrained
HF model, use:
global_cache_dir('hub') global_cache_dir('hub')
Current HuggingFace documentation (mid-Jan 2023) indicates that
transformers models will be cached into a "transformers" subdirectory, The legacy location for transformers used to be global_cache_dir('transformers')
but in practice they seem to go into "hub". But if needed: and global_cache_dir('diffusers') for diffusers.
global_cache_dir('transformers')
One other caveat is that HuggingFace is moving some diffusers models
into the "hub" subdirectory as well, so this will need to be revisited
from time to time.
""" """
home: str = os.getenv("HF_HOME") home: str = os.getenv("HF_HOME")

View File

@ -43,13 +43,11 @@ class SDLegacyType(Enum):
V2 = 3 V2 = 3
UNKNOWN = 99 UNKNOWN = 99
DEFAULT_MAX_MODELS = 2 DEFAULT_MAX_MODELS = 2
VAE_TO_REPO_ID = { # hack, see note in convert_and_import() VAE_TO_REPO_ID = { # hack, see note in convert_and_import()
"vae-ft-mse-840000-ema-pruned": "stabilityai/sd-vae-ft-mse", "vae-ft-mse-840000-ema-pruned": "stabilityai/sd-vae-ft-mse",
} }
class ModelManager(object): class ModelManager(object):
def __init__( def __init__(
self, self,
@ -369,7 +367,7 @@ class ModelManager(object):
if vae := self._load_vae(mconfig["vae"]): if vae := self._load_vae(mconfig["vae"]):
pipeline_args.update(vae=vae) pipeline_args.update(vae=vae)
if not isinstance(name_or_path, Path): if not isinstance(name_or_path, Path):
pipeline_args.update(cache_dir=global_cache_dir("diffusers")) pipeline_args.update(cache_dir=global_cache_dir("hub"))
if using_fp16: if using_fp16:
pipeline_args.update(torch_dtype=torch.float16) pipeline_args.update(torch_dtype=torch.float16)
fp_args_list = [{"revision": "fp16"}, {}] fp_args_list = [{"revision": "fp16"}, {}]
@ -916,25 +914,30 @@ class ModelManager(object):
to the 2.3.0 "diffusers" version. This should be a one-time operation, called at to the 2.3.0 "diffusers" version. This should be a one-time operation, called at
script startup time. script startup time.
""" """
# Three transformer models to check: bert, clip and safety checker # Three transformer models to check: bert, clip and safety checker, and
# the diffusers as well
models_dir = Path(Globals.root, "models")
legacy_locations = [ legacy_locations = [
Path( Path(
models_dir,
"CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker" "CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
), ),
Path("bert-base-uncased/models--bert-base-uncased"), Path(models_dir, "bert-base-uncased/models--bert-base-uncased"),
Path( Path(
models_dir,
"openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14" "openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
), ),
] ]
models_dir = Path(Globals.root, "models") legacy_locations.extend(list(Path(models_dir,"diffusers").glob('*')))
legacy_layout = False legacy_layout = False
for model in legacy_locations: for model in legacy_locations:
legacy_layout = legacy_layout or Path(models_dir, model).exists() legacy_layout = legacy_layout or model.exists()
if not legacy_layout: if not legacy_layout:
return return
print( print(
"** Legacy version <= 2.2.5 model directory layout detected. Reorganizing." "** Old model directory layout (< v3.0) detected. Reorganizing."
) )
print("** This is a quick one-time operation.") print("** This is a quick one-time operation.")
@ -948,6 +951,8 @@ class ModelManager(object):
for model in legacy_locations: for model in legacy_locations:
source = models_dir / model source = models_dir / model
dest = hub / model.stem dest = hub / model.stem
if dest.exists() and not source.exists():
continue
print(f"** {source} => {dest}") print(f"** {source} => {dest}")
if source.exists(): if source.exists():
if dest.exists(): if dest.exists():
@ -955,26 +960,6 @@ class ModelManager(object):
else: else:
move(source, dest) move(source, dest)
# anything else gets moved into the diffusers directory
if cls._is_huggingface_hub_directory_present():
diffusers = global_cache_dir("diffusers")
else:
diffusers = models_dir / "diffusers"
os.makedirs(diffusers, exist_ok=True)
for root, dirs, _ in os.walk(models_dir, topdown=False):
for dir in dirs:
full_path = Path(root, dir)
if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
continue
if Path(dir).match("models--*--*"):
dest = diffusers / dir
print(f"** {full_path} => {dest}")
if dest.exists():
rmtree(full_path)
else:
move(full_path, dest)
# now clean up by removing any empty directories # now clean up by removing any empty directories
empty = [ empty = [
root root
@ -1072,7 +1057,7 @@ class ModelManager(object):
path = name_or_path path = name_or_path
else: else:
owner, repo = name_or_path.split("/") owner, repo = name_or_path.split("/")
path = Path(global_cache_dir("diffusers") / f"models--{owner}--{repo}") path = Path(global_cache_dir("hub") / f"models--{owner}--{repo}")
if not path.exists(): if not path.exists():
return None return None
hashpath = path / "checksum.sha256" hashpath = path / "checksum.sha256"
@ -1133,7 +1118,7 @@ class ModelManager(object):
using_fp16 = self.precision == "float16" using_fp16 = self.precision == "float16"
vae_args.update( vae_args.update(
cache_dir=global_cache_dir("diffusers"), cache_dir=global_cache_dir("hub"),
local_files_only=not Globals.internet_available, local_files_only=not Globals.internet_available,
) )
@ -1172,7 +1157,7 @@ class ModelManager(object):
@staticmethod @staticmethod
def _delete_model_from_cache(repo_id): def _delete_model_from_cache(repo_id):
cache_info = scan_cache_dir(global_cache_dir("diffusers")) cache_info = scan_cache_dir(global_cache_dir("hub"))
# I'm sure there is a way to do this with comprehensions # I'm sure there is a way to do this with comprehensions
# but the code quickly became incomprehensible! # but the code quickly became incomprehensible!

View File

@ -640,7 +640,7 @@ def do_textual_inversion_training(
assert ( assert (
pretrained_model_name_or_path pretrained_model_name_or_path
), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}" ), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
pipeline_args = dict(cache_dir=global_cache_dir("diffusers")) pipeline_args = dict(cache_dir=global_cache_dir("hub"))
# Load tokenizer # Load tokenizer
if tokenizer_name: if tokenizer_name:

View File

@ -442,7 +442,7 @@ def main():
args = _parse_args() args = _parse_args()
global_set_root(args.root_dir) global_set_root(args.root_dir)
cache_dir = str(global_cache_dir("diffusers")) cache_dir = str(global_cache_dir("hub"))
os.environ[ os.environ[
"HF_HOME" "HF_HOME"
] = cache_dir # because not clear the merge pipeline is honoring cache_dir ] = cache_dir # because not clear the merge pipeline is honoring cache_dir