InvokeAI/invokeai/backend/model_management/models.py
2023-06-10 03:14:10 +03:00

727 lines
25 KiB
Python

import sys
from enum import Enum
import torch
import safetensors.torch
from diffusers.utils import is_safetensors_available
class BaseModelType(str, Enum):
#StableDiffusion1_5 = "stable_diffusion_1_5"
#StableDiffusion2 = "stable_diffusion_2"
#StableDiffusion2Base = "stable_diffusion_2_base"
# TODO: maybe then add sample size(512/768)?
StableDiffusion1_5 = "SD-1"
StableDiffusion2Base = "SD-2-base" # 512 pixels; this will have epsilon parameterization
StableDiffusion2 = "SD-2" # 768 pixels; this will have v-prediction parameterization
#Kandinsky2_1 = "kandinsky_2_1"
class ModelType(str, Enum):
Pipeline = "pipeline"
Classifier = "classifier"
Vae = "vae"
Lora = "lora"
ControlNet = "controlnet"
TextualInversion = "embedding"
class SubModelType:
UNet = "unet"
TextEncoder = "text_encoder"
Tokenizer = "tokenizer"
Vae = "vae"
Scheduler = "scheduler"
SafetyChecker = "safety_checker"
#MoVQ = "movq"
MODEL_CLASSES = {
BaseModel.StableDiffusion1_5: {
ModelType.Pipeline: StableDiffusionModel,
ModelType.Classifier: ClassifierModel,
ModelType.Vae: VaeModel,
ModelType.Lora: LoraModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
},
BaseModel.StableDiffusion2: {
ModelType.Pipeline: StableDiffusionModel,
ModelType.Classifier: ClassifierModel,
ModelType.Vae: VaeModel,
ModelType.Lora: LoraModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
},
BaseModel.StableDiffusion2Base: {
ModelType.Pipeline: StableDiffusionModel,
ModelType.Classifier: ClassifierModel,
ModelType.Vae: VaeModel,
ModelType.Lora: LoraModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
},
#BaseModel.Kandinsky2_1: {
# ModelType.Pipeline: Kandinsky2_1Model,
# ModelType.Classifier: ClassifierModel,
# ModelType.MoVQ: MoVQModel,
# ModelType.Lora: LoraModel,
# ModelType.ControlNet: ControlNetModel,
# ModelType.TextualInversion: TextualInversionModel,
#},
}
class EmptyConfigLoader(ConfigMixin):
@classmethod
def load_config(cls, *args, **kwargs):
cls.config_name = kwargs.pop("config_name")
return super().load_config(*args, **kwargs)
class ModelBase:
#model_path: str
#base_model: BaseModelType
#model_type: ModelType
def __init__(
self,
model_path: str,
base_model: BaseModelType,
model_type: ModelType,
):
self.model_path = model_path
self.base_model = base_model
self.model_type = model_type
def _hf_definition_to_type(self, subtypes: List[str]) -> Type:
if len(subtypes) < 2:
raise Exception("Invalid subfolder definition!")
if subtypes[0] in ["diffusers", "transformers"]:
res_type = sys.modules[subtypes[0]]
subtypes = subtypes[1:]
else:
res_type = sys.modules["diffusers"]
res_type = getattr(res_type, "pipelines")
for subtype in subtypes:
res_type = getattr(res_type, subtype)
return res_type
class DiffusersModel(ModelBase):
#child_types: Dict[str, Type]
#child_sizes: Dict[str, int]
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
super().__init__(model_path, base_model, model_type)
self.child_types: Dict[str, Type] = dict()
self.child_sizes: Dict[str, int] = dict()
try:
config_data = DiffusionPipeline.load_config(self.model_path)
#config_data = json.loads(os.path.join(self.model_path, "model_index.json"))
except:
raise Exception("Invalid diffusers model! (model_index.json not found or invalid)")
config_data.pop("_ignore_files", None)
# retrieve all folder_names that contain relevant files
child_components = [k for k, v in config_data.items() if isinstance(v, list)]
for child_name in child_components:
child_type = self._hf_definition_to_type(config_data[child_name])
self.child_types[child_name] = child_type
self.child_sizes[child_name] = calc_model_size_by_fs(self.model_path, subfolder=child_name)
def get_size(self, child_type: Optional[SubModelType] = None):
if child_type is None:
return sum(self.child_sizes.values())
else:
return self.child_sizes[child_type]
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SubModelType] = None,
):
# return pipeline in different function to pass more arguments
if child_type is None:
raise Exception("Child model type can't be null on diffusers model")
if child_type not in self.child_types:
return None # TODO: or raise
if torch_dtype == torch.float16:
variants = ["fp16", None]
else:
variants = [None, "fp16"]
# TODO: better error handling(differentiate not found from others)
for variant in variants:
try:
# TODO: set cache_dir to /dev/null to be sure that cache not used?
model = self.child_types[child_type].from_pretrained(
self.model_path,
subfolder=child_type.value,
torch_dtype=torch_dtype,
variant=variant,
local_files_only=True,
)
break
except Exception as e:
print("====ERR LOAD====")
print(f"{variant}: {e}")
# calc more accurate size
self.child_sizes[child_type] = calc_model_size_by_data(model)
return model
#def convert_if_required(model_path: Union[str, Path], cache_path: str, config: Optional[dict]) -> Path:
class StableDiffusionModel(DiffusersModel):
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert base_model in {
BaseModelType.StableDiffusion1_5,
BaseModelType.StableDiffusion2,
BaseModelType.StableDiffusion2Base,
}
assert model_type == ModelType.Pipeline
super().__init__(model_path, base_model, model_type)
@staticmethod
def convert_if_required(model_path: Union[str, Path], dst_path: str, config: Optional[dict]) -> Path:
if not isinstance(model_path, Path):
model_path = Path(model_path)
# TODO: args
# TODO: set model_path, to config? pass dst_path as arg?
# TODO: check
return _convert_ckpt_and_cache(config)
class classproperty(object): # pylint: disable=invalid-name
"""Class property decorator.
Example usage:
class MyClass(object):
@classproperty
def value(cls):
return '123'
> print MyClass.value
123
"""
def __init__(self, func):
self._func = func
def __get__(self, owner_self, owner_cls):
return self._func(owner_cls)
class ModelConfigBase(BaseModel):
path: str # or Path
name: str
description: Optional[str]
class StableDiffusionDModel(DiffusersModel):
class Config(ModelConfigBase):
format: str
vae: Optional[str] = Field(None)
config: Optional[str] = Field(None)
@root_validator
def validator(cls, values):
if values["format"] not in {"checkpoint", "diffusers"}:
raise ValueError(f"Unkown stable diffusion model format: {values['format']}")
if values["config"] is not None and values["format"] != "checkpoint":
raise ValueError(f"Custom config field allowed only in checkpoint stable diffusion model")
return values
# return config only for checkpoint format
def dict(self, *args, **kwargs):
result = super().dict(*args, **kwargs)
if self.format != "checkpoint":
result.pop("config", None)
return result
@classproperty
def has_config(self):
return True
def build_config(self, **kwargs) -> dict:
try:
res = dict(
path=kwargs["path"],
name=kwargs["name"],
description=kwargs.get("description", None),
format=kwargs["format"],
vae=kwargs.get("vae", None),
)
if res["format"] not in {"checkpoint", "diffusers"}:
raise Exception(f"Unkonwn stable diffusion model format: {res['format']}")
if res["format"] == "checkpoint":
res["config"] = kwargs.get("config", None)
# TODO: raise if config specified for diffusers?
return res
except KeyError as e:
raise Exception(f"Field \"{e.args[0]}\" not found!")
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert base_model == BaseModelType.StableDiffusion1_5
assert model_type == ModelType.Pipeline
super().__init__(model_path, base_model, model_type)
@classmethod
def convert_if_required(cls, model_path: str, dst_path: str, config: Optional[dict]) -> str:
model_config = cls.Config(
**config,
path=model_path,
name="",
)
if hasattr(model_config, "config"):
convert_ckpt_and_cache(
model_path=model_path,
dst_path=dst_path,
config=config,
)
return dst_path
else:
return model_path
class StableDiffusion15CheckpointModel(DiffusersModel):
class Cnfig(ModelConfigBase):
vae: Optional[str] = Field(None)
config: Optional[str] = Field(None)
class StableDiffusion2BaseDiffusersModel(DiffusersModel):
class Config(ModelConfigBase):
vae: Optional[str] = Field(None)
class StableDiffusion2BaseCheckpointModel(DiffusersModel):
class Cnfig(ModelConfigBase):
vae: Optional[str] = Field(None)
config: Optional[str] = Field(None)
class StableDiffusion2DiffusersModel(DiffusersModel):
class Config(ModelConfigBase):
vae: Optional[str] = Field(None)
attention_upscale: bool = Field(True)
class StableDiffusion2CheckpointModel(DiffusersModel):
class Config(ModelConfigBase):
vae: Optional[str] = Field(None)
config: Optional[str] = Field(None)
attention_upscale: bool = Field(True)
class ClassifierModel(ModelBase):
#child_types: Dict[str, Type]
#child_sizes: Dict[str, int]
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == SDModelType.Classifier
super().__init__(model_path, base_model, model_type)
self.child_types: Dict[str, Type] = dict()
self.child_sizes: Dict[str, int] = dict()
try:
main_config = EmptyConfigLoader.load_config(self.model_path, config_name="config.json")
#main_config = json.loads(os.path.join(self.model_path, "config.json"))
except:
raise Exception("Invalid classifier model! (config.json not found or invalid)")
self._load_tokenizer(main_config)
self._load_text_encoder(main_config)
self._load_feature_extractor(main_config)
def _load_tokenizer(self, main_config: dict):
try:
tokenizer_config = EmptyConfigLoader.load_config(self.model_path, config_name="tokenizer_config.json")
#tokenizer_config = json.loads(os.path.join(self.model_path, "tokenizer_config.json"))
except:
raise Exception("Invalid classifier model! (Failed to load tokenizer_config.json)")
if "tokenizer_class" in tokenizer_config:
tokenizer_class_name = tokenizer_config["tokenizer_class"]
elif "model_type" in main_config:
tokenizer_class_name = transformers.models.auto.tokenization_auto.TOKENIZER_MAPPING_NAMES[main_config["model_type"]]
else:
raise Exception("Invalid classifier model! (Failed to detect tokenizer type)")
self.child_types[SDModelType.Tokenizer] = self._hf_definition_to_type(["transformers", tokenizer_class_name])
self.child_sizes[SDModelType.Tokenizer] = 0
def _load_text_encoder(self, main_config: dict):
if "architectures" in main_config and len(main_config["architectures"]) > 0:
text_encoder_class_name = main_config["architectures"][0]
elif "model_type" in main_config:
text_encoder_class_name = transformers.models.auto.modeling_auto.MODEL_FOR_PRETRAINING_MAPPING_NAMES[main_config["model_type"]]
else:
raise Exception("Invalid classifier model! (Failed to detect text_encoder type)")
self.child_types[SDModelType.TextEncoder] = self._hf_definition_to_type(["transformers", text_encoder_class_name])
self.child_sizes[SDModelType.TextEncoder] = calc_model_size_by_fs(self.model_path)
def _load_feature_extractor(self, main_config: dict):
self.child_sizes[SDModelType.FeatureExtractor] = 0
try:
feature_extractor_config = EmptyConfigLoader.load_config(self.model_path, config_name="preprocessor_config.json")
except:
return # feature extractor not passed with t5
try:
feature_extractor_class_name = feature_extractor_config["feature_extractor_type"]
self.child_types[SDModelType.FeatureExtractor] = self._hf_definition_to_type(["transformers", feature_extractor_class_name])
except:
raise Exception("Invalid classifier model! (Unknown feature_extrator type)")
def get_size(self, child_type: Optional[SDModelType] = None):
if child_type is None:
return sum(self.child_sizes.values())
else:
return self.child_sizes[child_type]
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SDModelType] = None,
):
if child_type is None:
raise Exception("Child model type can't be null on classififer model")
if child_type not in self.child_types:
return None # TODO: or raise
model = self.child_types[child_type].from_pretrained(
self.model_path,
subfolder=child_type.value,
torch_dtype=torch_dtype,
)
# calc more accurate size
self.child_sizes[child_type] = calc_model_size_by_data(model)
return model
@staticmethod
def convert_if_required(model_path: Union[str, Path], cache_path: str, config: Optional[dict]) -> Path:
if not isinstance(model_path, Path):
model_path = Path(model_path)
return model_path
class VaeModel(ModelBase):
#vae_class: Type
#model_size: int
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.Vae
super().__init__(model_path, base_model, model_type)
try:
config = EmptyConfigLoader.load_config(self.model_path, config_name="config.json")
#config = json.loads(os.path.join(self.model_path, "config.json"))
except:
raise Exception("Invalid vae model! (config.json not found or invalid)")
try:
vae_class_name = config.get("_class_name", "AutoencoderKL")
self.vae_class = self._hf_definition_to_type(["diffusers", vae_class_name])
self.model_size = calc_model_size_by_fs(self.model_path)
except:
raise Exception("Invalid vae model! (Unkown vae type)")
def get_size(self, child_type: Optional[SDModelType] = None):
if child_type is not None:
raise Exception("There is no child models in vae model")
return self.model_size
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SDModelType] = None,
):
if child_type is not None:
raise Exception("There is no child models in vae model")
model = self.vae_class.from_pretrained(
self.model_path,
torch_dtype=torch_dtype,
)
# calc more accurate size
self.model_size = calc_model_size_by_data(model)
return model
@staticmethod
def convert_if_required(model_path: Union[str, Path], cache_path: str, config: Optional[dict]) -> Path:
if not isinstance(model_path, Path):
model_path = Path(model_path)
# TODO:
#_convert_vae_ckpt_and_cache
raise Exception("TODO: ")
class LoRAModel(ModelBase):
#model_size: int
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.Lora
super().__init__(model_path, base_model, model_type)
self.model_size = os.path.getsize(self.model_path)
def get_size(self, child_type: Optional[SDModelType] = None):
if child_type is not None:
raise Exception("There is no child models in lora")
return self.model_size
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SDModelType] = None,
):
if child_type is not None:
raise Exception("There is no child models in lora")
model = LoRAModel.from_checkpoint(
file_path=self.model_path,
dtype=torch_dtype,
)
self.model_size = model.calc_size()
return model
@staticmethod
def convert_if_required(model_path: Union[str, Path], cache_path: str, config: Optional[dict]) -> Path:
if not isinstance(model_path, Path):
model_path = Path(model_path)
# TODO: add diffusers lora when it stabilizes a bit
return model_path
class TextualInversionModel(ModelBase):
#model_size: int
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert model_type == ModelType.TextualInversion
super().__init__(model_path, base_model, model_type)
self.model_size = os.path.getsize(self.model_path)
def get_size(self, child_type: Optional[SDModelType] = None):
if child_type is not None:
raise Exception("There is no child models in textual inversion")
return self.model_size
def get_model(
self,
torch_dtype: Optional[torch.dtype],
child_type: Optional[SDModelType] = None,
):
if child_type is not None:
raise Exception("There is no child models in textual inversion")
model = TextualInversionModel.from_checkpoint(
file_path=self.model_path,
dtype=torch_dtype,
)
self.model_size = model.embedding.nelement() * model.embedding.element_size()
return model
@staticmethod
def convert_if_required(model_path: Union[str, Path], cache_path: str, config: Optional[dict]) -> Path:
if not isinstance(model_path, Path):
model_path = Path(model_path)
return model_path
def calc_model_size_by_fs(
model_path: str,
subfolder: Optional[str] = None,
variant: Optional[str] = None
):
if subfolder is not None:
model_path = os.path.join(model_path, subfolder)
# this can happen when, for example, the safety checker
# is not downloaded.
if not os.path.exists(model_path):
return 0
all_files = os.listdir(model_path)
all_files = [f for f in all_files if os.path.isfile(os.path.join(model_path, f))]
fp16_files = set([f for f in all_files if ".fp16." in f or ".fp16-" in f])
bit8_files = set([f for f in all_files if ".8bit." in f or ".8bit-" in f])
other_files = set(all_files) - fp16_files - bit8_files
if variant is None:
files = other_files
elif variant == "fp16":
files = fp16_files
elif variant == "8bit":
files = bit8_files
else:
raise NotImplementedError(f"Unknown variant: {variant}")
# try read from index if exists
index_postfix = ".index.json"
if variant is not None:
index_postfix = f".index.{variant}.json"
for file in files:
if not file.endswith(index_postfix):
continue
try:
with open(os.path.join(model_path, file), "r") as f:
index_data = json.loads(f.read())
return int(index_data["metadata"]["total_size"])
except:
pass
# calculate files size if there is no index file
formats = [
(".safetensors",), # safetensors
(".bin",), # torch
(".onnx", ".pb"), # onnx
(".msgpack",), # flax
(".ckpt",), # tf
(".h5",), # tf2
]
for file_format in formats:
model_files = [f for f in files if f.endswith(file_format)]
if len(model_files) == 0:
continue
model_size = 0
for model_file in model_files:
file_stats = os.stat(os.path.join(model_path, model_file))
model_size += file_stats.st_size
return model_size
#raise NotImplementedError(f"Unknown model structure! Files: {all_files}")
return 0 # scheduler/feature_extractor/tokenizer - models without loading to gpu
def calc_model_size_by_data(model) -> int:
if isinstance(model, DiffusionPipeline):
return _calc_pipeline_by_data(model)
elif isinstance(model, torch.nn.Module):
return _calc_model_by_data(model)
else:
return 0
def _calc_pipeline_by_data(pipeline) -> int:
res = 0
for submodel_key in pipeline.components.keys():
submodel = getattr(pipeline, submodel_key)
if submodel is not None and isinstance(submodel, torch.nn.Module):
res += _calc_model_by_data(submodel)
return res
def _calc_model_by_data(model) -> int:
mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
mem_bufs = sum([buf.nelement()*buf.element_size() for buf in model.buffers()])
mem = mem_params + mem_bufs # in bytes
return mem
def _convert_ckpt_and_cache(self, mconfig: DictConfig) -> Path:
"""
Convert the checkpoint model indicated in mconfig into a
diffusers, cache it to disk, and return Path to converted
file. If already on disk then just returns Path.
"""
app_config = InvokeAIAppConfig.get_config()
weights = app_config.root_dir / mconfig.path
config_file = app_config.root_dir / mconfig.config
diffusers_path = app_config.converted_ckpts_dir / weights.stem
# return cached version if it exists
if diffusers_path.exists():
return diffusers_path
# TODO: I think that it more correctly to convert with embedded vae
# as if user will delete custom vae he will got not embedded but also custom vae
#vae_ckpt_path, vae_model = self._get_vae_for_conversion(weights, mconfig)
vae_ckpt_path, vae_model = None, None
# to avoid circular import errors
from .convert_ckpt_to_diffusers import convert_ckpt_to_diffusers
with SilenceWarnings():
convert_ckpt_to_diffusers(
weights,
diffusers_path,
extract_ema=True,
original_config_file=config_file,
vae=vae_model,
vae_path=str(app_config.root_dir / vae_ckpt_path) if vae_ckpt_path else None,
scan_needed=True,
)
return diffusers_path
def _convert_vae_ckpt_and_cache(self, mconfig: DictConfig) -> Path:
"""
Convert the VAE indicated in mconfig into a diffusers AutoencoderKL
object, cache it to disk, and return Path to converted
file. If already on disk then just returns Path.
"""
app_config = InvokeAIAppConfig.get_config()
root = app_config.root_dir
weights_file = root / mconfig.path
config_file = root / mconfig.config
diffusers_path = app_config.converted_ckpts_dir / weights_file.stem
image_size = mconfig.get('width') or mconfig.get('height') or 512
# return cached version if it exists
if diffusers_path.exists():
return diffusers_path
# this avoids circular import error
from .convert_ckpt_to_diffusers import convert_ldm_vae_to_diffusers
if weights_file.suffix == '.safetensors':
checkpoint = safetensors.torch.load_file(weights_file)
else:
checkpoint = torch.load(weights_file, map_location="cpu")
# sometimes weights are hidden under "state_dict", and sometimes not
if "state_dict" in checkpoint:
checkpoint = checkpoint["state_dict"]
config = OmegaConf.load(config_file)
vae_model = convert_ldm_vae_to_diffusers(
checkpoint = checkpoint,
vae_config = config,
image_size = image_size
)
vae_model.save_pretrained(
diffusers_path,
safe_serialization=is_safetensors_available()
)
return diffusers_path