add model loading support for SDXL

2024-08-30 20:32:17 +00:00 · 2023-07-09 15:47:06 -04:00 · 2023-07-09 15:47:06 -04:00 · 130249a2dd
commit 130249a2dd
parent 11d78ad75f
5 changed files with 136 additions and 5 deletions
--- a/invokeai/backend/model_management/model_probe.py
+++ b/invokeai/backend/model_management/model_probe.py
@ -38,6 +38,8 @@ class ModelProbe(object):

    CLASS2TYPE = {
        'StableDiffusionPipeline' : ModelType.Main,
+        'StableDiffusionXLPipeline' : ModelType.Main,
+        'StableDiffusionXLImg2ImgPipeline' : ModelType.Main,
        'AutoencoderKL' : ModelType.Vae,
        'ControlNetModel' : ModelType.ControlNet,
    }
@ -99,9 +101,10 @@ class ModelProbe(object):
                upcast_attention = (base_type==BaseModelType.StableDiffusion2 \
                                     and prediction_type==SchedulerPredictionType.VPrediction),
                format = format,
-                image_size = 768 if (base_type==BaseModelType.StableDiffusion2 \
-                                     and prediction_type==SchedulerPredictionType.VPrediction \
-                                     ) else 512,
+                image_size = 1024 if (base_type==BaseModelType.StableDiffusionXL) else \
+                              768 if (base_type==BaseModelType.StableDiffusion2 \
+                                     and prediction_type==SchedulerPredictionType.VPrediction ) else \
+                              512
            )
        except Exception:
            raise
@ -248,6 +251,9 @@ class PipelineCheckpointProbe(CheckpointProbeBase):
            return BaseModelType.StableDiffusion1
        if key_name in state_dict and state_dict[key_name].shape[-1] == 1024:
            return BaseModelType.StableDiffusion2
+        # TODO: Verify that this is correct! Need an XL checkpoint file for this.
+        if key_name in state_dict and state_dict[key_name].shape[-1] == 2048:
+            return BaseModelType.StableDiffusionXL
        raise Exception("Cannot determine base type")

    def get_scheduler_prediction_type(self)->SchedulerPredictionType:
@ -360,6 +366,8 @@ class PipelineFolderProbe(FolderProbeBase):
            return BaseModelType.StableDiffusion1  
        elif unet_conf['cross_attention_dim'] == 1024:
            return BaseModelType.StableDiffusion2
+        elif unet_conf['cross_attention_dim'] in {1280,2048}:
+            return BaseModelType.StableDiffusionXL
        else:
            raise ValueError(f'Unknown base model for {self.folder_path}')

--- a/invokeai/backend/model_management/models/init.py
+++ b/invokeai/backend/model_management/models/init.py
@ -3,7 +3,7 @@ from enum import Enum
 from pydantic import BaseModel
 from typing import Literal, get_origin
 from .base import BaseModelType, ModelType, SubModelType, ModelBase, ModelConfigBase, ModelVariantType, SchedulerPredictionType, ModelError, SilenceWarnings, ModelNotFoundException
-from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model
+from .stable_diffusion import StableDiffusion1Model, StableDiffusion2Model, StableDiffusionXLModel
 from .vae import VaeModel
 from .lora import LoRAModel
 from .controlnet import ControlNetModel # TODO:
@ -24,6 +24,14 @@ MODEL_CLASSES = {
        ModelType.ControlNet: ControlNetModel,
        ModelType.TextualInversion: TextualInversionModel,
    },
+    BaseModelType.StableDiffusionXL: {
+        ModelType.Main: StableDiffusionXLModel,
+        ModelType.Vae: VaeModel,
+        # will not work until support written
+        ModelType.Lora: LoRAModel,
+        ModelType.ControlNet: ControlNetModel,
+        ModelType.TextualInversion: TextualInversionModel,
+    },
    #BaseModelType.Kandinsky2_1: {
    #    ModelType.Main: Kandinsky2_1Model,
    #    ModelType.MoVQ: MoVQModel,
--- a/invokeai/backend/model_management/models/base.py
+++ b/invokeai/backend/model_management/models/base.py
@ -21,6 +21,7 @@ class ModelNotFoundException(Exception):
 class BaseModelType(str, Enum):
    StableDiffusion1 = "sd-1"
    StableDiffusion2 = "sd-2"
+    StableDiffusionXL = "sdxl"
    #Kandinsky2_1 = "kandinsky-2.1"

 class ModelType(str, Enum):
@ -33,7 +34,9 @@ class ModelType(str, Enum):
 class SubModelType(str, Enum):
    UNet = "unet"
    TextEncoder = "text_encoder"
+    TextEncoder2 = "text_encoder_2"
    Tokenizer = "tokenizer"
+    Tokenizer2 = "tokenizer_2"
    Vae = "vae"
    Scheduler = "scheduler"
    SafetyChecker = "safety_checker"
--- a/invokeai/backend/model_management/models/stable_diffusion.py
+++ b/invokeai/backend/model_management/models/stable_diffusion.py
@ -222,6 +222,105 @@ class StableDiffusion2Model(DiffusersModel):
        else:
            return model_path

+class StableDiffusionXLModelFormat(str, Enum):
+    Checkpoint = "checkpoint"
+    Diffusers = "diffusers"
+    
+class StableDiffusionXLModel(DiffusersModel):
+
+    # TODO: check that configs overwriten properly
+    class DiffusersConfig(ModelConfigBase):
+        model_format: Literal[StableDiffusionXLModelFormat.Diffusers]
+        vae: Optional[str] = Field(None)
+        variant: ModelVariantType
+
+    class CheckpointConfig(ModelConfigBase):
+        model_format: Literal[StableDiffusionXLModelFormat.Checkpoint]
+        vae: Optional[str] = Field(None)
+        config: str
+        variant: ModelVariantType
+
+    def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
+        assert base_model == BaseModelType.StableDiffusionXL
+        assert model_type == ModelType.Main
+        super().__init__(
+            model_path=model_path,
+            base_model=BaseModelType.StableDiffusionXL,
+            model_type=ModelType.Main,
+        )
+
+    @classmethod
+    def probe_config(cls, path: str, **kwargs):
+        model_format = cls.detect_format(path)
+        ckpt_config_path = kwargs.get("config", None)
+        if model_format == StableDiffusionXLModelFormat.Checkpoint:
+            if ckpt_config_path:
+                ckpt_config = OmegaConf.load(ckpt_config_path)
+                ckpt_config["model"]["params"]["unet_config"]["params"]["in_channels"]
+
+            else:
+                checkpoint = read_checkpoint_meta(path)
+                checkpoint = checkpoint.get('state_dict', checkpoint)
+                in_channels = checkpoint["model.diffusion_model.input_blocks.0.0.weight"].shape[1]
+
+        elif model_format == StableDiffusionXLModelFormat.Diffusers:
+            unet_config_path = os.path.join(path, "unet", "config.json")
+            if os.path.exists(unet_config_path):
+                with open(unet_config_path, "r") as f:
+                    unet_config = json.loads(f.read())
+                in_channels = unet_config['in_channels']
+
+            else:
+                raise Exception("Not supported stable diffusion diffusers format(possibly onnx?)")
+
+        else:
+            raise NotImplementedError(f"Unknown stable diffusion 2.* format: {model_format}")
+
+        if in_channels == 9:
+            variant = ModelVariantType.Inpaint
+        elif in_channels == 5:
+            variant = ModelVariantType.Depth
+        elif in_channels == 4:
+            variant = ModelVariantType.Normal
+        else:
+            raise Exception("Unkown stable diffusion 2.* model format")
+
+        if ckpt_config_path is None:
+            ckpt_config_path = _select_ckpt_config(BaseModelType.StableDiffusionXL, variant)
+
+        return cls.create_config(
+            path=path,
+            model_format=model_format,
+
+            config=ckpt_config_path,
+            variant=variant,
+        )
+
+    @classproperty
+    def save_to_config(cls) -> bool:
+        return True
+
+    @classmethod
+    def detect_format(cls, model_path: str):
+        if os.path.isdir(model_path):
+            return StableDiffusionXLModelFormat.Diffusers
+        else:
+            return StableDiffusionXLModelFormat.Checkpoint
+
+    @classmethod
+    def convert_if_required(
+        cls,
+        model_path: str,
+        output_path: str,
+        config: ModelConfigBase,
+        base_model: BaseModelType,
+    ) -> str:
+        if isinstance(config, cls.CheckpointConfig):
+            raise NotImplementedError('conversion of SDXL checkpoint models to diffusers format is not yet supported')
+        else:
+            return model_path
+        
+        
 def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
    ckpt_configs = {
        BaseModelType.StableDiffusion1: {
@ -232,6 +331,12 @@ def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):
            ModelVariantType.Normal: "v2-inference-v.yaml", # best guess, as we can't differentiate with base(512)
            ModelVariantType.Inpaint: "v2-inpainting-inference.yaml",
            ModelVariantType.Depth: "v2-midas-inference.yaml",
+        },
+        # note that these .yaml files don't yet exist!
+        BaseModelType.StableDiffusionXL: {
+            ModelVariantType.Normal: "xl-inference-v.yaml",
+            ModelVariantType.Inpaint: "xl-inpainting-inference.yaml",
+            ModelVariantType.Depth: "xl-midas-inference.yaml",
        }
    }

@ -247,9 +352,10 @@ def _select_ckpt_config(version: BaseModelType, variant: ModelVariantType):


 # TODO: rework
+# Note that convert_ckpt_to_diffuses does not currently support conversion of SDXL models
 def _convert_ckpt_and_cache(
    version: BaseModelType,
-    model_config: Union[StableDiffusion1Model.CheckpointConfig, StableDiffusion2Model.CheckpointConfig],
+    model_config: Union[StableDiffusion1Model.CheckpointConfig, StableDiffusion2Model.CheckpointConfig, StableDiffusionXLModel.CheckpointConfig],
    output_path: str,
 ) -> str:
    """
--- a/invokeai/configs/INITIAL_MODELS.yaml
+++ b/invokeai/configs/INITIAL_MODELS.yaml
@ -1,4 +1,10 @@
 # This file predefines a few models that the user may want to install.
+sd-1/main/stable-diffusion-xdl-base:
+   description: Stable Diffusion XL base model - NOT YET RELEASED!! (70 GB)
+   repo_id: stabilityai/stable-diffusion-xl-base
+sd-1/main/stable-diffusion-xdl-refiner:
+   description: Stable Diffusion XL refiner model - NOT YET RELEASED!! (60 GB)
+   repo_id: stabilityai/stable-diffusion-xl-refiner
 sd-1/main/stable-diffusion-v1-5:
   description: Stable Diffusion version 1.5 diffusers model (4.27 GB)
   repo_id: runwayml/stable-diffusion-v1-5