migrate to new HF diffusers cache location

This commit is contained in:
Lincoln Stein 2023-03-05 08:20:24 -05:00
parent 7cf2f58513
commit ef8cf83b28
5 changed files with 25 additions and 43 deletions

View File

@ -295,7 +295,7 @@ def download_vaes():
# first the diffusers version
repo_id = "stabilityai/sd-vae-ft-mse"
args = dict(
cache_dir=global_cache_dir("diffusers"),
cache_dir=global_cache_dir("hub"),
)
if not AutoencoderKL.from_pretrained(repo_id, **args):
raise Exception(f"download of {repo_id} failed")

View File

@ -98,16 +98,13 @@ def global_cache_dir(subdir: Union[str, Path] = "") -> Path:
"""
Returns Path to the model cache directory. If a subdirectory
is provided, it will be appended to the end of the path, allowing
for huggingface-style conventions:
global_cache_dir('diffusers')
for huggingface-style conventions. Currently, hugging face has
moved all models into the "hub" subfolder, so for any pretrained
HF model, use:
global_cache_dir('hub')
Current HuggingFace documentation (mid-Jan 2023) indicates that
transformers models will be cached into a "transformers" subdirectory,
but in practice they seem to go into "hub". But if needed:
global_cache_dir('transformers')
One other caveat is that HuggingFace is moving some diffusers models
into the "hub" subdirectory as well, so this will need to be revisited
from time to time.
The legacy location for transformers used to be global_cache_dir('transformers')
and global_cache_dir('diffusers') for diffusers.
"""
home: str = os.getenv("HF_HOME")

View File

@ -43,13 +43,11 @@ class SDLegacyType(Enum):
V2 = 3
UNKNOWN = 99
DEFAULT_MAX_MODELS = 2
VAE_TO_REPO_ID = { # hack, see note in convert_and_import()
"vae-ft-mse-840000-ema-pruned": "stabilityai/sd-vae-ft-mse",
}
class ModelManager(object):
def __init__(
self,
@ -369,7 +367,7 @@ class ModelManager(object):
if vae := self._load_vae(mconfig["vae"]):
pipeline_args.update(vae=vae)
if not isinstance(name_or_path, Path):
pipeline_args.update(cache_dir=global_cache_dir("diffusers"))
pipeline_args.update(cache_dir=global_cache_dir("hub"))
if using_fp16:
pipeline_args.update(torch_dtype=torch.float16)
fp_args_list = [{"revision": "fp16"}, {}]
@ -916,25 +914,30 @@ class ModelManager(object):
to the 2.3.0 "diffusers" version. This should be a one-time operation, called at
script startup time.
"""
# Three transformer models to check: bert, clip and safety checker
# Three transformer models to check: bert, clip and safety checker, and
# the diffusers as well
models_dir = Path(Globals.root, "models")
legacy_locations = [
Path(
models_dir,
"CompVis/stable-diffusion-safety-checker/models--CompVis--stable-diffusion-safety-checker"
),
Path("bert-base-uncased/models--bert-base-uncased"),
Path(models_dir, "bert-base-uncased/models--bert-base-uncased"),
Path(
models_dir,
"openai/clip-vit-large-patch14/models--openai--clip-vit-large-patch14"
),
]
models_dir = Path(Globals.root, "models")
legacy_locations.extend(list(Path(models_dir,"diffusers").glob('*')))
legacy_layout = False
for model in legacy_locations:
legacy_layout = legacy_layout or Path(models_dir, model).exists()
legacy_layout = legacy_layout or model.exists()
if not legacy_layout:
return
print(
"** Legacy version <= 2.2.5 model directory layout detected. Reorganizing."
"** Old model directory layout (< v3.0) detected. Reorganizing."
)
print("** This is a quick one-time operation.")
@ -948,6 +951,8 @@ class ModelManager(object):
for model in legacy_locations:
source = models_dir / model
dest = hub / model.stem
if dest.exists() and not source.exists():
continue
print(f"** {source} => {dest}")
if source.exists():
if dest.exists():
@ -955,26 +960,6 @@ class ModelManager(object):
else:
move(source, dest)
# anything else gets moved into the diffusers directory
if cls._is_huggingface_hub_directory_present():
diffusers = global_cache_dir("diffusers")
else:
diffusers = models_dir / "diffusers"
os.makedirs(diffusers, exist_ok=True)
for root, dirs, _ in os.walk(models_dir, topdown=False):
for dir in dirs:
full_path = Path(root, dir)
if full_path.is_relative_to(hub) or full_path.is_relative_to(diffusers):
continue
if Path(dir).match("models--*--*"):
dest = diffusers / dir
print(f"** {full_path} => {dest}")
if dest.exists():
rmtree(full_path)
else:
move(full_path, dest)
# now clean up by removing any empty directories
empty = [
root
@ -1072,7 +1057,7 @@ class ModelManager(object):
path = name_or_path
else:
owner, repo = name_or_path.split("/")
path = Path(global_cache_dir("diffusers") / f"models--{owner}--{repo}")
path = Path(global_cache_dir("hub") / f"models--{owner}--{repo}")
if not path.exists():
return None
hashpath = path / "checksum.sha256"
@ -1133,7 +1118,7 @@ class ModelManager(object):
using_fp16 = self.precision == "float16"
vae_args.update(
cache_dir=global_cache_dir("diffusers"),
cache_dir=global_cache_dir("hub"),
local_files_only=not Globals.internet_available,
)
@ -1172,7 +1157,7 @@ class ModelManager(object):
@staticmethod
def _delete_model_from_cache(repo_id):
cache_info = scan_cache_dir(global_cache_dir("diffusers"))
cache_info = scan_cache_dir(global_cache_dir("hub"))
# I'm sure there is a way to do this with comprehensions
# but the code quickly became incomprehensible!

View File

@ -640,7 +640,7 @@ def do_textual_inversion_training(
assert (
pretrained_model_name_or_path
), f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}"
pipeline_args = dict(cache_dir=global_cache_dir("diffusers"))
pipeline_args = dict(cache_dir=global_cache_dir("hub"))
# Load tokenizer
if tokenizer_name:

View File

@ -442,7 +442,7 @@ def main():
args = _parse_args()
global_set_root(args.root_dir)
cache_dir = str(global_cache_dir("diffusers"))
cache_dir = str(global_cache_dir("hub"))
os.environ[
"HF_HOME"
] = cache_dir # because not clear the merge pipeline is honoring cache_dir