mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
implement StALKeR7779 requested API for fetching submodels
This commit is contained in:
parent
fd63e36822
commit
c15b49c805
@ -19,10 +19,9 @@ context. Use like this:
|
||||
import contextlib
|
||||
import gc
|
||||
import hashlib
|
||||
import logging
|
||||
import warnings
|
||||
from collections import Counter
|
||||
from enum import Enum
|
||||
from enum import Enum,auto
|
||||
from pathlib import Path
|
||||
from psutil import Process
|
||||
from typing import Dict, Sequence, Union, Tuple, types
|
||||
@ -52,9 +51,15 @@ DEFAULT_MAX_CACHE_SIZE = 6.0
|
||||
GIG = 1073741824
|
||||
|
||||
# This is the mapping from the stable diffusion submodel dict key to the class
|
||||
class LoraType(dict):
|
||||
pass
|
||||
class TIType(dict):
|
||||
pass
|
||||
class CkptType(dict):
|
||||
pass
|
||||
|
||||
class SDModelType(Enum):
|
||||
diffusion_pipeline=StableDiffusionGeneratorPipeline # whole thing
|
||||
diffusers=StableDiffusionGeneratorPipeline # same thing, different name
|
||||
diffusers=StableDiffusionGeneratorPipeline # whole pipeline
|
||||
vae=AutoencoderKL # diffusers parts
|
||||
text_encoder=CLIPTextModel
|
||||
tokenizer=CLIPTokenizer
|
||||
@ -62,10 +67,11 @@ class SDModelType(Enum):
|
||||
scheduler=SchedulerMixin
|
||||
safety_checker=StableDiffusionSafetyChecker
|
||||
feature_extractor=CLIPFeatureExtractor
|
||||
# These are all loaded as dicts of tensors
|
||||
lora=dict
|
||||
textual_inversion=dict
|
||||
ckpt=dict
|
||||
# These are all loaded as dicts of tensors, and we
|
||||
# distinguish them by class
|
||||
lora=LoraType
|
||||
textual_inversion=TIType
|
||||
ckpt=CkptType
|
||||
|
||||
class ModelStatus(Enum):
|
||||
unknown='unknown'
|
||||
@ -78,17 +84,16 @@ class ModelStatus(Enum):
|
||||
# After loading, we will know it exactly.
|
||||
# Sizes are in Gigs, estimated for float16; double for float32
|
||||
SIZE_GUESSTIMATE = {
|
||||
SDModelType.diffusion_pipeline: 2.5,
|
||||
SDModelType.diffusers: 2.5,
|
||||
SDModelType.vae: 0.35,
|
||||
SDModelType.text_encoder: 0.5,
|
||||
SDModelType.tokenizer: 0.0001,
|
||||
SDModelType.tokenizer: 0.001,
|
||||
SDModelType.unet: 3.4,
|
||||
SDModelType.scheduler: 0.0001,
|
||||
SDModelType.scheduler: 0.001,
|
||||
SDModelType.safety_checker: 1.2,
|
||||
SDModelType.feature_extractor: 0.0001,
|
||||
SDModelType.feature_extractor: 0.001,
|
||||
SDModelType.lora: 0.1,
|
||||
SDModelType.textual_inversion: 0.0001,
|
||||
SDModelType.textual_inversion: 0.001,
|
||||
SDModelType.ckpt: 4.2,
|
||||
}
|
||||
|
||||
@ -152,7 +157,7 @@ class ModelCache(object):
|
||||
def get_model(
|
||||
self,
|
||||
repo_id_or_path: Union[str,Path],
|
||||
model_type: SDModelType=SDModelType.diffusion_pipeline,
|
||||
model_type: SDModelType=SDModelType.diffusers,
|
||||
subfolder: Path=None,
|
||||
submodel: SDModelType=None,
|
||||
revision: str=None,
|
||||
@ -263,7 +268,7 @@ class ModelCache(object):
|
||||
self.current_cache_size += usage.mem_used # increment size of the cache
|
||||
|
||||
# this is a bit of legacy work needed to support the old-style "load this diffuser with custom VAE"
|
||||
if model_type==SDModelType.diffusion_pipeline and attach_model_part[0]:
|
||||
if model_type==SDModelType.diffusers and attach_model_part[0]:
|
||||
self.attach_part(model,*attach_model_part)
|
||||
|
||||
self.stack.append(key) # add to LRU cache
|
||||
@ -301,8 +306,10 @@ class ModelCache(object):
|
||||
cache.locked_models[key] += 1
|
||||
if cache.lazy_offloading:
|
||||
cache._offload_unlocked_models()
|
||||
cache.logger.debug(f'Loading {key} into {cache.execution_device}')
|
||||
model.to(cache.execution_device) # move into GPU
|
||||
if model.device != cache.execution_device:
|
||||
cache.logger.debug(f'Moving {key} into {cache.execution_device}')
|
||||
model.to(cache.execution_device) # move into GPU
|
||||
cache.logger.debug(f'Locking {key} in {cache.execution_device}')
|
||||
cache._print_cuda_stats()
|
||||
else:
|
||||
# in the event that the caller wants the model in RAM, we
|
||||
@ -345,7 +352,7 @@ class ModelCache(object):
|
||||
|
||||
def status(self,
|
||||
repo_id_or_path: Union[str,Path],
|
||||
model_type: SDModelType=SDModelType.diffusion_pipeline,
|
||||
model_type: SDModelType=SDModelType.diffusers,
|
||||
revision: str=None,
|
||||
subfolder: Path=None,
|
||||
)->ModelStatus:
|
||||
@ -428,7 +435,7 @@ class ModelCache(object):
|
||||
def _make_cache_room(self, key, model_type):
|
||||
# calculate how much memory this model will require
|
||||
multiplier = 2 if self.precision==torch.float32 else 1
|
||||
bytes_needed = int(self.model_sizes.get(key,0) or SIZE_GUESSTIMATE[model_type]*GIG*multiplier)
|
||||
bytes_needed = int(self.model_sizes.get(key,0) or SIZE_GUESSTIMATE.get(model_type,0.5)*GIG*multiplier)
|
||||
maximum_size = self.max_cache_size * GIG # stored in GB, convert to bytes
|
||||
current_size = self.current_cache_size
|
||||
|
||||
@ -473,7 +480,7 @@ class ModelCache(object):
|
||||
# silence transformer and diffuser warnings
|
||||
with SilenceWarnings():
|
||||
if self.is_legacy_ckpt(repo_id_or_path):
|
||||
model = self._load_ckpt_from_storage(repo_id_or_path, legacy_info)
|
||||
model = model_class(self._load_ckpt_from_storage(repo_id_or_path, legacy_info))
|
||||
else:
|
||||
model = self._load_diffusers_from_storage(
|
||||
repo_id_or_path,
|
||||
|
@ -20,18 +20,37 @@ return a SDModelInfo object that contains the following attributes:
|
||||
Typical usage:
|
||||
|
||||
from invokeai.backend import ModelManager
|
||||
manager = ModelManager(config_path='./configs/models.yaml',max_models=4)
|
||||
|
||||
manager = ModelManager(
|
||||
config='./configs/models.yaml',
|
||||
max_cache_size=8
|
||||
) # gigabytes
|
||||
|
||||
model_info = manager.get_model('stable-diffusion-1.5')
|
||||
with model_info.context as my_model:
|
||||
my_model.latents_from_embeddings(...)
|
||||
|
||||
The manager uses the underlying ModelCache class to keep
|
||||
frequently-used models in RAM and move them into GPU as needed for
|
||||
generation operations. The ModelCache object can be accessed using
|
||||
the manager's "cache" attribute.
|
||||
generation operations. The optional `max_cache_size` argument
|
||||
indicates the maximum size the cache can grow to, in gigabytes. The
|
||||
underlying ModelCache object can be accessed using the manager's "cache"
|
||||
attribute.
|
||||
|
||||
Other methods provided by ModelManager support importing, editing,
|
||||
converting and deleting models.
|
||||
Because the model manager can return multiple different types of
|
||||
models, you may wish to add additional type checking on the class
|
||||
of model returned. To do this, provide the option `model_type`
|
||||
parameter:
|
||||
|
||||
model_info = manager.get_model(
|
||||
'clip-tokenizer',
|
||||
model_type=SDModelType.tokenizer
|
||||
)
|
||||
|
||||
This will raise an InvalidModelError if the format defined in the
|
||||
config file doesn't match the requested model type.
|
||||
|
||||
MODELS.YAML
|
||||
|
||||
The general format of a models.yaml section is:
|
||||
|
||||
@ -40,7 +59,6 @@ The general format of a models.yaml section is:
|
||||
repo_id: owner/repo
|
||||
path: /path/to/local/file/or/directory
|
||||
subfolder: subfolder-name
|
||||
submodel: vae|text_encoder|tokenizer...
|
||||
|
||||
The format is one of {diffusers, ckpt, vae, text_encoder, tokenizer,
|
||||
unet, scheduler, safety_checker, feature_extractor}, and correspond to
|
||||
@ -54,11 +72,7 @@ If subfolder is provided, then the model exists in a subdirectory of
|
||||
the main model. These are usually named after the model type, such as
|
||||
"unet".
|
||||
|
||||
Finally, if submodel is provided, then the path/repo_id is treated as
|
||||
a diffusers model, the whole thing is ready into memory, and then the
|
||||
requested part (e.g. "unet") is retrieved.
|
||||
|
||||
This summarizes the three ways of getting a non-diffuser model:
|
||||
This example summarizes the two ways of getting a non-diffuser model:
|
||||
|
||||
clip-test-1:
|
||||
format: text_encoder
|
||||
@ -66,21 +80,48 @@ This summarizes the three ways of getting a non-diffuser model:
|
||||
description: Returns standalone CLIPTextModel
|
||||
|
||||
clip-test-2:
|
||||
format: diffusers
|
||||
repo_id: stabilityai/stable-diffusion-2
|
||||
submodel: text_encoder
|
||||
description: Returns the text_encoder part of whole diffusers model (whole thing in RAM)
|
||||
|
||||
clip-test-3:
|
||||
format: text_encoder
|
||||
repo_id: stabilityai/stable-diffusion-2
|
||||
subfolder: text_encoder
|
||||
description: Returns the text_encoder in the subfolder of the diffusers model (just the encoder in RAM)
|
||||
|
||||
clip-token:
|
||||
SUBMODELS:
|
||||
|
||||
It is also possible to fetch an isolated submodel from a diffusers
|
||||
model. Use the `submodel` parameter to select which part:
|
||||
|
||||
vae = manager.get_model('stable-diffusion-1.5',submodel=SDModelType.vae)
|
||||
with vae.context as my_vae:
|
||||
print(type(my_vae))
|
||||
# "AutoencoderKL"
|
||||
|
||||
DISAMBIGUATION:
|
||||
|
||||
You may wish to use the same name for a related family of models. To
|
||||
do this, disambiguate the stanza key with the model and and format
|
||||
separated by "/". Example:
|
||||
|
||||
clip-large/tokenizer:
|
||||
format: tokenizer
|
||||
repo_id: openai/clip-vit-large-patch14
|
||||
description: Returns standalone tokenizer
|
||||
|
||||
clip-large/text_encoder:
|
||||
format: text_encoder
|
||||
repo_id: openai/clip-vit-large-patch14
|
||||
description: Returns standalone text encoder
|
||||
|
||||
You can now use the `model_type` argument to indicate which model you
|
||||
want:
|
||||
|
||||
tokenizer = mgr.get('clip-large',model_type=SDModelType.tokenizer)
|
||||
encoder = mgr.get('clip-large',model_type=SDModelType.text_encoder)
|
||||
|
||||
OTHER FUNCTIONS:
|
||||
|
||||
Other methods provided by ModelManager support importing, editing,
|
||||
converting and deleting models.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@ -152,7 +193,7 @@ class ModelManager(object):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config_path: Path,
|
||||
config: Union[Path, DictConfig, str],
|
||||
device_type: torch.device = CUDA_DEVICE,
|
||||
precision: torch.dtype = torch.float16,
|
||||
max_cache_size=MAX_CACHE_SIZE,
|
||||
@ -165,8 +206,15 @@ class ModelManager(object):
|
||||
and sequential_offload boolean. Note that the default device
|
||||
type and precision are set up for a CUDA system running at half precision.
|
||||
"""
|
||||
self.config_path = config_path
|
||||
self.config = OmegaConf.load(self.config_path)
|
||||
if isinstance(config, DictConfig):
|
||||
self.config = config
|
||||
self.config_path = None
|
||||
elif type(config) in [str,DictConfig]:
|
||||
self.config_path = config
|
||||
self.config = OmegaConf.load(self.config_path)
|
||||
else:
|
||||
raise ValueError('config argument must be an OmegaConf object, a Path or a string')
|
||||
|
||||
self.cache = ModelCache(
|
||||
max_cache_size=max_cache_size,
|
||||
execution_device = device_type,
|
||||
@ -185,28 +233,64 @@ class ModelManager(object):
|
||||
return model_name in self.config
|
||||
|
||||
def get_model(self,
|
||||
model_name: str = None,
|
||||
model_name: str,
|
||||
model_type: SDModelType=None,
|
||||
submodel: SDModelType=None,
|
||||
) -> SDModelInfo:
|
||||
"""Given a model named identified in models.yaml, return
|
||||
an SDModelInfo object describing it.
|
||||
:param model_name: symbolic name of the model in models.yaml
|
||||
:param model_type: SDModelType enum indicating the type of model to return
|
||||
:param submodel: an SDModelType enum indicating the portion of
|
||||
the model to retrieve (e.g. SDModelType.vae)
|
||||
|
||||
If not provided, the model_type will be read from the `format` field
|
||||
of the corresponding stanza. If provided, the model_type will be used
|
||||
to disambiguate stanzas in the configuration file. The default is to
|
||||
assume a diffusers pipeline. The behavior is illustrated here:
|
||||
|
||||
[models.yaml]
|
||||
test1/diffusers:
|
||||
repo_id: foo/bar
|
||||
format: diffusers
|
||||
description: Typical diffusers pipeline
|
||||
|
||||
test1/lora:
|
||||
repo_id: /tmp/loras/test1.safetensors
|
||||
format: lora
|
||||
description: Typical lora file
|
||||
|
||||
test1_pipeline = mgr.get_model('test1')
|
||||
# returns a StableDiffusionGeneratorPipeline
|
||||
|
||||
test1_vae1 = mgr.get_model('test1',submodel=SDModelType.vae)
|
||||
# returns the VAE part of a diffusers model as an AutoencoderKL
|
||||
|
||||
test1_vae2 = mgr.get_model('test1',model_type=SDModelType.diffusers,submodel=SDModelType.vae)
|
||||
# does the same thing as the previous statement. Note that model_type
|
||||
# is for the parent model, and submodel is for the part
|
||||
|
||||
test1_lora = mgr.get_model('test1',model_type=SDModelType.lora)
|
||||
# returns a LoRA embed (as a 'dict' of tensors)
|
||||
|
||||
test1_encoder = mgr.get_modelI('test1',model_type=SDModelType.textencoder)
|
||||
# raises an InvalidModelError
|
||||
|
||||
"""
|
||||
if not model_name:
|
||||
model_name = self.default_model()
|
||||
|
||||
if not self.valid_model(model_name):
|
||||
raise InvalidModelError(
|
||||
f'"{model_name}" is not a known model name. Please check your models.yaml file'
|
||||
)
|
||||
model_key = self._disambiguate_name(model_name, model_type)
|
||||
|
||||
# get the required loading info out of the config file
|
||||
mconfig = self.config[model_name]
|
||||
mconfig = self.config[model_key]
|
||||
|
||||
format = mconfig.get('format','diffusers')
|
||||
model_type = SDModelType.diffusion_pipeline
|
||||
if model_type and model_type.name != format:
|
||||
raise InvalidModelError(
|
||||
f'Inconsistent model definition; {model_key} has format {format}, but type {model_type.name} was requested'
|
||||
)
|
||||
|
||||
model_parts = dict([(x.name,x) for x in SDModelType])
|
||||
legacy = None
|
||||
|
||||
@ -219,16 +303,14 @@ class ModelManager(object):
|
||||
legacy.vae_file = global_resolve_path(mconfig.vae)
|
||||
elif format=='diffusers':
|
||||
location = mconfig.get('repo_id') or mconfig.get('path')
|
||||
if sm := mconfig.get('submodel'):
|
||||
submodel = model_parts[sm]
|
||||
elif format in model_parts:
|
||||
location = mconfig.get('repo_id') or mconfig.get('path') or mconfig.get('weights')
|
||||
model_type = model_parts[format]
|
||||
else:
|
||||
raise InvalidModelError(
|
||||
f'"{model_name}" has an unknown format {format}'
|
||||
f'"{model_key}" has an unknown format {format}'
|
||||
)
|
||||
|
||||
model_type = model_parts[format]
|
||||
subfolder = mconfig.get('subfolder')
|
||||
revision = mconfig.get('revision')
|
||||
hash = self.cache.model_hash(location,revision)
|
||||
@ -254,7 +336,7 @@ class ModelManager(object):
|
||||
# in case we need to communicate information about this
|
||||
# model to the cache manager, then we need to remember
|
||||
# the cache key
|
||||
self.cache_keys[model_name] = model_context.key
|
||||
self.cache_keys[model_key] = model_context.key
|
||||
|
||||
return SDModelInfo(
|
||||
context = model_context,
|
||||
@ -449,18 +531,20 @@ class ModelManager(object):
|
||||
else:
|
||||
assert "weights" in model_attributes and "description" in model_attributes
|
||||
|
||||
model_key = f'{model_name}/{format}'
|
||||
|
||||
assert (
|
||||
clobber or model_name not in omega
|
||||
), f'attempt to overwrite existing model definition "{model_name}"'
|
||||
clobber or model_key not in omega
|
||||
), f'attempt to overwrite existing model definition "{model_key}"'
|
||||
|
||||
omega[model_name] = model_attributes
|
||||
omega[model_key] = model_attributes
|
||||
|
||||
if "weights" in omega[model_name]:
|
||||
omega[model_name]["weights"].replace("\\", "/")
|
||||
if "weights" in omega[model_key]:
|
||||
omega[model_key]["weights"].replace("\\", "/")
|
||||
|
||||
if clobber and model_name in self.cache_keys:
|
||||
self.cache.uncache_model(self.cache_keys[model_name])
|
||||
del self.cache_keys[model_name]
|
||||
if clobber and model_key in self.cache_keys:
|
||||
self.cache.uncache_model(self.cache_keys[model_key])
|
||||
del self.cache_keys[model_key]
|
||||
|
||||
def import_diffuser_model(
|
||||
self,
|
||||
@ -482,6 +566,7 @@ class ModelManager(object):
|
||||
models.yaml file.
|
||||
"""
|
||||
model_name = model_name or Path(repo_or_path).stem
|
||||
model_key = f'{model_name}/diffusers'
|
||||
model_description = description or f"Imported diffusers model {model_name}"
|
||||
new_config = dict(
|
||||
description=model_description,
|
||||
@ -493,10 +578,10 @@ class ModelManager(object):
|
||||
else:
|
||||
new_config.update(repo_id=repo_or_path)
|
||||
|
||||
self.add_model(model_name, new_config, True)
|
||||
self.add_model(model_key, new_config, True)
|
||||
if commit_to_conf:
|
||||
self.commit(commit_to_conf)
|
||||
return model_name
|
||||
return model_key
|
||||
|
||||
def import_lora(
|
||||
self,
|
||||
@ -511,7 +596,7 @@ class ModelManager(object):
|
||||
path = Path(path)
|
||||
model_name = model_name or path.stem
|
||||
model_description = description or f"LoRA model {model_name}"
|
||||
self.add_model(model_name,
|
||||
self.add_model(f'{model_name}/{SDModelType.lora.name}',
|
||||
dict(
|
||||
format="lora",
|
||||
weights=str(path),
|
||||
@ -538,7 +623,7 @@ class ModelManager(object):
|
||||
|
||||
model_name = model_name or path.stem
|
||||
model_description = description or f"Textual embedding model {model_name}"
|
||||
self.add_model(model_name,
|
||||
self.add_model(f'{model_name}/{SDModelType.textual_inversion.name}',
|
||||
dict(
|
||||
format="textual_inversion",
|
||||
weights=str(weights),
|
||||
@ -871,6 +956,7 @@ class ModelManager(object):
|
||||
"""
|
||||
yaml_str = OmegaConf.to_yaml(self.config)
|
||||
config_file_path = conf_file or self.config_path
|
||||
assert config_file_path is not None,'no config file path to write to'
|
||||
tmpfile = os.path.join(os.path.dirname(config_file_path), "new_config.tmp")
|
||||
with open(tmpfile, "w", encoding="utf-8") as outfile:
|
||||
outfile.write(self.preamble())
|
||||
@ -893,6 +979,18 @@ class ModelManager(object):
|
||||
"""
|
||||
)
|
||||
|
||||
def _disambiguate_name(self, model_name:str, model_type:SDModelType)->str:
|
||||
model_type = model_type or SDModelType.diffusers
|
||||
full_name = f"{model_name}/{model_type.name}"
|
||||
if self.valid_model(full_name):
|
||||
return full_name
|
||||
if self.valid_model(model_name):
|
||||
return model_name
|
||||
raise InvalidModelError(
|
||||
f'Neither "{model_name}" nor "{full_name}" are known model names. Please check your models.yaml file'
|
||||
)
|
||||
|
||||
|
||||
@classmethod
|
||||
def _delete_model_from_cache(cls,repo_id):
|
||||
cache_info = scan_cache_dir(global_cache_dir("hub"))
|
||||
|
@ -40,7 +40,7 @@ class DMType(Enum):
|
||||
type1 = DummyModelType1
|
||||
type2 = DummyModelType2
|
||||
|
||||
cache = ModelCache(max_models=4)
|
||||
cache = ModelCache(max_cache_size=4)
|
||||
|
||||
def test_pipeline_fetch():
|
||||
assert cache.cache_size()==0
|
||||
@ -53,12 +53,10 @@ def test_pipeline_fetch():
|
||||
assert type(pipeline1)==DMType.dummy_pipeline.value,'get_model() did not return model of expected type'
|
||||
assert pipeline1==pipeline1a,'pipelines with the same repo_id should be the same'
|
||||
assert pipeline1!=pipeline2,'pipelines with different repo_ids should not be the same'
|
||||
assert cache.cache_size()==2,'cache should uniquely cache models with same identity'
|
||||
assert len(cache.models)==2,'cache should uniquely cache models with same identity'
|
||||
with cache.get_model('dummy/pipeline3',DMType.dummy_pipeline) as pipeline3,\
|
||||
cache.get_model('dummy/pipeline4',DMType.dummy_pipeline) as pipeline4:
|
||||
assert cache.cache_size()==4,'cache did not grow as expected'
|
||||
with cache.get_model('dummy/pipeline5',DMType.dummy_pipeline) as pipeline5:
|
||||
assert cache.cache_size()==4,'cache did not free space as expected'
|
||||
assert len(cache.models)==4,'cache did not grow as expected'
|
||||
|
||||
def test_signatures():
|
||||
with cache.get_model('dummy/pipeline',DMType.dummy_pipeline,revision='main') as pipeline1,\
|
||||
|
Loading…
Reference in New Issue
Block a user