implement StALKeR7779 requested API for fetching submodels

2024-08-30 20:32:17 +00:00 · 2023-05-07 23:18:17 -04:00
parent fd63e36822
commit c15b49c805
3 changed files with 175 additions and 72 deletions
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -19,10 +19,9 @@ context. Use like this:
 import contextlib
 import gc
 import hashlib
 import logging
 import warnings
 from collections import Counter
-from enum import Enum
+from enum import Enum,auto
 from pathlib import Path
 from psutil import Process
 from typing import Dict, Sequence, Union, Tuple, types
@ -52,9 +51,15 @@ DEFAULT_MAX_CACHE_SIZE = 6.0
 GIG = 1073741824 
 # This is the mapping from the stable diffusion submodel dict key to the class
 class LoraType(dict):
    pass
 class TIType(dict):
    pass
 class CkptType(dict):
    pass
 class SDModelType(Enum):
-    diffusion_pipeline=StableDiffusionGeneratorPipeline # whole thing
+    diffusers=StableDiffusionGeneratorPipeline          # whole pipeline
    diffusers=StableDiffusionGeneratorPipeline          # same thing, different name
    vae=AutoencoderKL                                   # diffusers parts
    text_encoder=CLIPTextModel
    tokenizer=CLIPTokenizer
@ -62,10 +67,11 @@ class SDModelType(Enum):
    scheduler=SchedulerMixin
    safety_checker=StableDiffusionSafetyChecker
    feature_extractor=CLIPFeatureExtractor
-    # These are all loaded as dicts of tensors
+    # These are all loaded as dicts of tensors, and we
-    lora=dict
+    # distinguish them by class
-    textual_inversion=dict
+    lora=LoraType
-    ckpt=dict
+    textual_inversion=TIType
    ckpt=CkptType
 class ModelStatus(Enum):
    unknown='unknown'
@ -78,17 +84,16 @@ class ModelStatus(Enum):
 # After loading, we will know it exactly.
 # Sizes are in Gigs, estimated for float16; double for float32
 SIZE_GUESSTIMATE = {
    SDModelType.diffusion_pipeline: 2.5,
    SDModelType.diffusers: 2.5,
    SDModelType.vae: 0.35,
    SDModelType.text_encoder: 0.5,
-    SDModelType.tokenizer: 0.0001,
+    SDModelType.tokenizer: 0.001,
    SDModelType.unet: 3.4,
-    SDModelType.scheduler: 0.0001,
+    SDModelType.scheduler: 0.001,
    SDModelType.safety_checker: 1.2,
-    SDModelType.feature_extractor: 0.0001,
+    SDModelType.feature_extractor: 0.001,
    SDModelType.lora: 0.1,
-    SDModelType.textual_inversion: 0.0001,
+    SDModelType.textual_inversion: 0.001,
    SDModelType.ckpt: 4.2,
 }
@ -152,7 +157,7 @@ class ModelCache(object):
    def get_model(
            self,
            repo_id_or_path: Union[str,Path],
-            model_type: SDModelType=SDModelType.diffusion_pipeline,
+            model_type: SDModelType=SDModelType.diffusers,
            subfolder: Path=None,
            submodel: SDModelType=None,
            revision: str=None,
@ -263,7 +268,7 @@ class ModelCache(object):
            self.current_cache_size += usage.mem_used   # increment size of the cache
            # this is a bit of legacy work needed to support the old-style "load this diffuser with custom VAE"
-            if model_type==SDModelType.diffusion_pipeline and attach_model_part[0]:
+            if model_type==SDModelType.diffusers and attach_model_part[0]:
                self.attach_part(model,*attach_model_part)
            self.stack.append(key)          # add to LRU cache
@ -301,8 +306,10 @@ class ModelCache(object):
                cache.locked_models[key] += 1
                if cache.lazy_offloading:
                   cache._offload_unlocked_models()
-                cache.logger.debug(f'Loading {key} into {cache.execution_device}')
+                if  model.device != cache.execution_device:
                    cache.logger.debug(f'Moving {key} into {cache.execution_device}')
                    model.to(cache.execution_device)  # move into GPU
                cache.logger.debug(f'Locking {key} in {cache.execution_device}')                
                cache._print_cuda_stats()
            else:
                # in the event that the caller wants the model in RAM, we
@ -345,7 +352,7 @@ class ModelCache(object):
    def status(self,
               repo_id_or_path: Union[str,Path],
-               model_type: SDModelType=SDModelType.diffusion_pipeline,
+               model_type: SDModelType=SDModelType.diffusers,
               revision: str=None,
               subfolder: Path=None,
               )->ModelStatus:
@ -428,7 +435,7 @@ class ModelCache(object):
    def _make_cache_room(self, key, model_type):
        # calculate how much memory this model will require
        multiplier = 2 if self.precision==torch.float32 else 1
-        bytes_needed = int(self.model_sizes.get(key,0) or SIZE_GUESSTIMATE[model_type]*GIG*multiplier)
+        bytes_needed = int(self.model_sizes.get(key,0) or SIZE_GUESSTIMATE.get(model_type,0.5)*GIG*multiplier)
        maximum_size = self.max_cache_size * GIG  # stored in GB, convert to bytes
        current_size = self.current_cache_size
@ -473,7 +480,7 @@ class ModelCache(object):
        # silence transformer and diffuser warnings
        with SilenceWarnings():
            if self.is_legacy_ckpt(repo_id_or_path):
-                model = self._load_ckpt_from_storage(repo_id_or_path, legacy_info)
+                model = model_class(self._load_ckpt_from_storage(repo_id_or_path, legacy_info))
            else:
                model = self._load_diffusers_from_storage(
                    repo_id_or_path,
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@ -20,18 +20,37 @@ return a SDModelInfo object that contains the following attributes:
 Typical usage:
   from invokeai.backend import ModelManager
-   manager = ModelManager(config_path='./configs/models.yaml',max_models=4)
+
   manager = ModelManager(
                 config='./configs/models.yaml',
                 max_cache_size=8
             ) # gigabytes
   model_info = manager.get_model('stable-diffusion-1.5')
   with model_info.context as my_model:
      my_model.latents_from_embeddings(...)
 The manager uses the underlying ModelCache class to keep
 frequently-used models in RAM and move them into GPU as needed for
-generation operations. The ModelCache object can be accessed using 
+generation operations. The optional `max_cache_size` argument
-the manager's "cache" attribute.
+indicates the maximum size the cache can grow to, in gigabytes. The
 underlying ModelCache object can be accessed using the manager's "cache"
 attribute.
-Other methods provided by ModelManager support importing, editing,
+Because the model manager can return multiple different types of
-converting and deleting models.
+models, you may wish to add additional type checking on the class
 of model returned. To do this, provide the option `model_type`
 parameter:
    model_info = manager.get_model(
                      'clip-tokenizer',
                       model_type=SDModelType.tokenizer
                      )
 This will raise an InvalidModelError if the format defined in the
 config file doesn't match the requested model type.
 MODELS.YAML
 The general format of a models.yaml section is:
@ -40,7 +59,6 @@ The general format of a models.yaml section is:
     repo_id: owner/repo
     path: /path/to/local/file/or/directory
     subfolder: subfolder-name
     submodel: vae|text_encoder|tokenizer...
 The format is one of {diffusers, ckpt, vae, text_encoder, tokenizer,
 unet, scheduler, safety_checker, feature_extractor}, and correspond to
@ -54,11 +72,7 @@ If subfolder is provided, then the model exists in a subdirectory of
 the main model. These are usually named after the model type, such as
 "unet".
-Finally, if submodel is provided, then the path/repo_id is treated as
+This example summarizes the two ways of getting a non-diffuser model:
 a diffusers model, the whole thing is ready into memory, and then the
 requested part (e.g. "unet") is retrieved.
 This summarizes the three ways of getting a non-diffuser model:
 clip-test-1:
   format: text_encoder
@ -66,21 +80,48 @@ This summarizes the three ways of getting a non-diffuser model:
   description: Returns standalone CLIPTextModel
 clip-test-2:
   format: diffusers
   repo_id: stabilityai/stable-diffusion-2
   submodel: text_encoder
   description: Returns the text_encoder part of whole diffusers model (whole thing in RAM)
 clip-test-3:
   format: text_encoder
   repo_id: stabilityai/stable-diffusion-2
   subfolder: text_encoder
   description: Returns the text_encoder in the subfolder of the diffusers model (just the encoder in RAM)
- clip-token:
+SUBMODELS:
 It is also possible to fetch an isolated submodel from a diffusers
 model. Use the `submodel` parameter to select which part:
 vae = manager.get_model('stable-diffusion-1.5',submodel=SDModelType.vae)
 with vae.context as my_vae:
    print(type(my_vae))
    # "AutoencoderKL"
 DISAMBIGUATION:
 You may wish to use the same name for a related family of models. To
 do this, disambiguate the stanza key with the model and and format
 separated by "/". Example:
 clip-large/tokenizer:
   format: tokenizer
   repo_id: openai/clip-vit-large-patch14
   description: Returns standalone tokenizer
 clip-large/text_encoder:
   format: text_encoder
   repo_id: openai/clip-vit-large-patch14
   description: Returns standalone text encoder
 You can now use the `model_type` argument to indicate which model you
 want:
 tokenizer = mgr.get('clip-large',model_type=SDModelType.tokenizer)
 encoder = mgr.get('clip-large',model_type=SDModelType.text_encoder)
 OTHER FUNCTIONS:
 Other methods provided by ModelManager support importing, editing,
 converting and deleting models.
 """
 from __future__ import annotations
@ -152,7 +193,7 @@ class ModelManager(object):
    def __init__(
            self,
-            config_path: Path,
+            config: Union[Path, DictConfig, str],
            device_type: torch.device = CUDA_DEVICE,
            precision: torch.dtype = torch.float16,
            max_cache_size=MAX_CACHE_SIZE,
@ -165,8 +206,15 @@ class ModelManager(object):
        and sequential_offload boolean. Note that the default device
        type and precision are set up for a CUDA system running at half precision.
        """
-        self.config_path = config_path
+        if isinstance(config, DictConfig):
            self.config = config
            self.config_path = None
        elif type(config) in [str,DictConfig]:
            self.config_path = config
            self.config = OmegaConf.load(self.config_path)
        else:
            raise ValueError('config argument must be an OmegaConf object, a Path or a string')
        self.cache = ModelCache(
            max_cache_size=max_cache_size,
            execution_device = device_type,
@ -185,28 +233,64 @@ class ModelManager(object):
        return model_name in self.config
    def get_model(self,
-                  model_name: str = None,
+                  model_name: str,
                  model_type: SDModelType=None,
                  submodel: SDModelType=None,
                  ) -> SDModelInfo:
        """Given a model named identified in models.yaml, return
        an SDModelInfo object describing it.
        :param model_name: symbolic name of the model in models.yaml
        :param model_type: SDModelType enum indicating the type of model to return
        :param submodel: an SDModelType enum indicating the portion of 
               the model to retrieve (e.g. SDModelType.vae)
        If not provided, the model_type will be read from the `format` field
        of the corresponding stanza. If provided, the model_type will be used
        to disambiguate stanzas in the configuration file. The default is to
        assume a diffusers pipeline. The behavior is illustrated here:
        [models.yaml]
        test1/diffusers:
           repo_id: foo/bar
           format: diffusers
           description: Typical diffusers pipeline
        test1/lora:
           repo_id: /tmp/loras/test1.safetensors
           format: lora
           description: Typical lora file
        test1_pipeline = mgr.get_model('test1')
        # returns a StableDiffusionGeneratorPipeline
        test1_vae1 = mgr.get_model('test1',submodel=SDModelType.vae)
        # returns the VAE part of a diffusers model as an AutoencoderKL
        test1_vae2 = mgr.get_model('test1',model_type=SDModelType.diffusers,submodel=SDModelType.vae)
        # does the same thing as the previous  statement. Note that model_type
        # is for the parent model, and submodel is for the part
        test1_lora = mgr.get_model('test1',model_type=SDModelType.lora)
        # returns a LoRA embed (as a 'dict' of tensors)
        test1_encoder = mgr.get_modelI('test1',model_type=SDModelType.textencoder)
        # raises an InvalidModelError
        """
        if not model_name:
            model_name = self.default_model()
-        if not self.valid_model(model_name):
+        model_key = self._disambiguate_name(model_name, model_type)
            raise InvalidModelError(
                f'"{model_name}" is not a known model name. Please check your models.yaml file'
            )
        # get the required loading info out of the config file
-        mconfig = self.config[model_name]
+        mconfig = self.config[model_key]
        format = mconfig.get('format','diffusers')
-        model_type = SDModelType.diffusion_pipeline
+        if model_type and model_type.name != format:
            raise InvalidModelError(
                f'Inconsistent model definition; {model_key} has format {format}, but type {model_type.name} was requested'
            )
        model_parts = dict([(x.name,x) for x in SDModelType])
        legacy = None
@ -219,16 +303,14 @@ class ModelManager(object):
                legacy.vae_file = global_resolve_path(mconfig.vae)
        elif format=='diffusers':
            location = mconfig.get('repo_id') or mconfig.get('path')
            if sm := mconfig.get('submodel'):
                submodel = model_parts[sm]
        elif format in model_parts:
            location = mconfig.get('repo_id') or mconfig.get('path') or mconfig.get('weights')
            model_type = model_parts[format]
        else:
            raise InvalidModelError(
-                f'"{model_name}" has an unknown format {format}'
+                f'"{model_key}" has an unknown format {format}'
            )
        model_type = model_parts[format]
        subfolder = mconfig.get('subfolder')
        revision = mconfig.get('revision')
        hash = self.cache.model_hash(location,revision)
@ -254,7 +336,7 @@ class ModelManager(object):
        # in case we need to communicate information about this
        # model to the cache manager, then we need to remember
        # the cache key
-        self.cache_keys[model_name] = model_context.key
+        self.cache_keys[model_key] = model_context.key
        return SDModelInfo(
            context = model_context,
@ -449,18 +531,20 @@ class ModelManager(object):
        else:
            assert "weights" in model_attributes and "description" in model_attributes
        model_key = f'{model_name}/{format}'
        assert (
-            clobber or model_name not in omega
+            clobber or model_key not in omega
-        ), f'attempt to overwrite existing model definition "{model_name}"'
+        ), f'attempt to overwrite existing model definition "{model_key}"'
-        omega[model_name] = model_attributes
+        omega[model_key] = model_attributes
-        if "weights" in omega[model_name]:
+        if "weights" in omega[model_key]:
-            omega[model_name]["weights"].replace("\\", "/")
+            omega[model_key]["weights"].replace("\\", "/")
-        if clobber and model_name in self.cache_keys:
+        if clobber and model_key in self.cache_keys:
-            self.cache.uncache_model(self.cache_keys[model_name])
+            self.cache.uncache_model(self.cache_keys[model_key])
-            del self.cache_keys[model_name]
+            del self.cache_keys[model_key]
    def import_diffuser_model(
        self,
@ -482,6 +566,7 @@ class ModelManager(object):
        models.yaml file.
        """
        model_name = model_name or Path(repo_or_path).stem
        model_key = f'{model_name}/diffusers'
        model_description = description or f"Imported diffusers model {model_name}"
        new_config = dict(
            description=model_description,
@ -493,10 +578,10 @@ class ModelManager(object):
        else:
            new_config.update(repo_id=repo_or_path)
-        self.add_model(model_name, new_config, True)
+        self.add_model(model_key, new_config, True)
        if commit_to_conf:
            self.commit(commit_to_conf)
-        return model_name
+        return model_key
    def import_lora(
            self,
@ -511,7 +596,7 @@ class ModelManager(object):
        path = Path(path)
        model_name = model_name or path.stem
        model_description = description or f"LoRA model {model_name}"
-        self.add_model(model_name,
+        self.add_model(f'{model_name}/{SDModelType.lora.name}',
                       dict(
                           format="lora",
                           weights=str(path),
@ -538,7 +623,7 @@ class ModelManager(object):
        model_name = model_name or path.stem
        model_description = description or f"Textual embedding model {model_name}"
-        self.add_model(model_name,
+        self.add_model(f'{model_name}/{SDModelType.textual_inversion.name}',
                       dict(
                           format="textual_inversion",
                           weights=str(weights),
@ -871,6 +956,7 @@ class ModelManager(object):
        """
        yaml_str = OmegaConf.to_yaml(self.config)
        config_file_path = conf_file or self.config_path
        assert config_file_path is not None,'no config file path to write to'
        tmpfile = os.path.join(os.path.dirname(config_file_path), "new_config.tmp")
        with open(tmpfile, "w", encoding="utf-8") as outfile:
            outfile.write(self.preamble())
@ -893,6 +979,18 @@ class ModelManager(object):
        """
        )
    def _disambiguate_name(self, model_name:str, model_type:SDModelType)->str:
        model_type = model_type or SDModelType.diffusers
        full_name = f"{model_name}/{model_type.name}"
        if self.valid_model(full_name):
            return full_name
        if self.valid_model(model_name):
            return model_name
        raise InvalidModelError(
            f'Neither "{model_name}" nor "{full_name}" are known model names. Please check your models.yaml file'
        )
    @classmethod
    def _delete_model_from_cache(cls,repo_id):
        cache_info = scan_cache_dir(global_cache_dir("hub"))
--- a/tests/test_model_cache.py
+++ b/tests/test_model_cache.py
@ -40,7 +40,7 @@ class DMType(Enum):
    type1 = DummyModelType1
    type2 = DummyModelType2
-cache = ModelCache(max_models=4)
+cache = ModelCache(max_cache_size=4)
 def test_pipeline_fetch():
    assert cache.cache_size()==0
@ -53,12 +53,10 @@ def test_pipeline_fetch():
        assert type(pipeline1)==DMType.dummy_pipeline.value,'get_model() did not return model of expected type'
        assert pipeline1==pipeline1a,'pipelines with the same repo_id should be the same'
        assert pipeline1!=pipeline2,'pipelines with different repo_ids should not be the same'
-        assert cache.cache_size()==2,'cache should uniquely cache models with same identity'
+        assert len(cache.models)==2,'cache should uniquely cache models with same identity'
    with cache.get_model('dummy/pipeline3',DMType.dummy_pipeline) as pipeline3,\
         cache.get_model('dummy/pipeline4',DMType.dummy_pipeline) as pipeline4:
-        assert cache.cache_size()==4,'cache did not grow as expected'
+        assert len(cache.models)==4,'cache did not grow as expected'
    with cache.get_model('dummy/pipeline5',DMType.dummy_pipeline) as pipeline5:
        assert cache.cache_size()==4,'cache did not free space as expected'
 def test_signatures():
    with cache.get_model('dummy/pipeline',DMType.dummy_pipeline,revision='main') as pipeline1,\