Select dev/schnell based on state dict, use correct max seq len based on dev/schnell, and shift in inference, separate vae flux params into separate config

2024-08-30 20:32:17 +00:00 · 2024-08-19 14:41:28 -04:00
parent 4bd7fda694
commit a63f842a13
9 changed files with 170 additions and 66 deletions
--- a/invokeai/app/invocations/flux_text_encoder.py
+++ b/invokeai/app/invocations/flux_text_encoder.py
@ -1,4 +1,5 @@
 import torch
 from typing import Literal
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer
 from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
@ -23,11 +24,12 @@ class FluxTextEncoderInvocation(BaseInvocation):
        description=FieldDescriptions.clip,
        input=Input.Connection,
    )
-    t5Encoder: T5EncoderField = InputField(
+    t5_encoder: T5EncoderField = InputField(
        title="T5Encoder",
        description=FieldDescriptions.t5Encoder,
        input=Input.Connection,
    )
    max_seq_len: Literal[256, 512] = InputField(description="Max sequence length for the desired flux model")
    positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.")
    # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not
@ -43,21 +45,15 @@ class FluxTextEncoderInvocation(BaseInvocation):
        return ConditioningOutput.build(conditioning_name)
    def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]:
-        # TODO: Determine the T5 max sequence length based on the model.
+        max_seq_len = self.max_seq_len
        # if self.model == "flux-schnell":
        max_seq_len = 256
        # # elif self.model == "flux-dev":
        # #     max_seq_len = 512
        # else:
        #     raise ValueError(f"Unknown model: {self.model}")
        # Load CLIP.
        clip_tokenizer_info = context.models.load(self.clip.tokenizer)
        clip_text_encoder_info = context.models.load(self.clip.text_encoder)
        # Load T5.
-        t5_tokenizer_info = context.models.load(self.t5Encoder.tokenizer)
+        t5_tokenizer_info = context.models.load(self.t5_encoder.tokenizer)
-        t5_text_encoder_info = context.models.load(self.t5Encoder.text_encoder)
+        t5_text_encoder_info = context.models.load(self.t5_encoder.text_encoder)
        with (
            clip_text_encoder_info as clip_text_encoder,
--- a/invokeai/app/invocations/flux_text_to_image.py
+++ b/invokeai/app/invocations/flux_text_to_image.py
@ -19,6 +19,7 @@ from invokeai.backend.flux.modules.autoencoder import AutoEncoder
 from invokeai.backend.flux.sampling import denoise, get_noise, get_schedule, unpack
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.model_manager.config import CheckpointConfigBase
@invocation(
@ -89,7 +90,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
        img, img_ids = self._prepare_latent_img_patches(x)
        # HACK(ryand): Find a better way to determine if this is a schnell model or not.
-        is_schnell = "schnell" in transformer_info.config.path if transformer_info.config else ""
+        is_schnell = "schnell" in transformer_info.config.config_path if transformer_info.config and isinstance(transformer_info.config, CheckpointConfigBase) else ""
        timesteps = get_schedule(
            num_steps=self.num_steps,
            image_seq_len=img.shape[1],
--- a/invokeai/app/invocations/model.py
+++ b/invokeai/app/invocations/model.py
@ -1,4 +1,5 @@
 import copy
 import yaml
 from time import sleep
 from typing import Dict, List, Literal, Optional
@ -16,6 +17,7 @@ from invokeai.app.services.model_records import ModelRecordChanges
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.shared.models import FreeUConfig
 from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType
 from invokeai.backend.model_manager.config import CheckpointConfigBase
 class ModelIdentifierField(BaseModel):
@ -154,8 +156,9 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
    transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
    clip: CLIPField = OutputField(description=FieldDescriptions.clip, title="CLIP")
-    t5Encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5Encoder, title="T5 Encoder")
+    t5_encoder: T5EncoderField = OutputField(description=FieldDescriptions.t5Encoder, title="T5 Encoder")
    vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
    max_seq_len: Literal[256, 512] = OutputField(description=FieldDescriptions.vae, title="Max Seq Length")
@invocation("flux_model_loader", title="Flux Main Model", tags=["model", "flux"], category="model", version="1.0.3")
@ -189,12 +192,22 @@ class FluxModelLoaderInvocation(BaseInvocation):
            ModelType.VAE,
            BaseModelType.Flux,
        )
        transformer_config = context.models.get_config(transformer)
        assert isinstance(transformer_config, CheckpointConfigBase)
        legacy_config_path = context.config.get().legacy_conf_path / transformer_config.config_path
        config_path = legacy_config_path.as_posix()
        with open(config_path, "r") as stream:
            try:
                flux_conf = yaml.safe_load(stream)
            except:
                raise
        return FluxModelLoaderOutput(
            transformer=TransformerField(transformer=transformer),
            clip=CLIPField(tokenizer=tokenizer, text_encoder=clip_encoder, loras=[], skipped_layers=0),
-            t5Encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
+            t5_encoder=T5EncoderField(tokenizer=tokenizer2, text_encoder=t5_encoder),
            vae=VAEField(vae=vae),
            max_seq_len=flux_conf['max_seq_len']
        )
    def _get_model(self, context: InvocationContext, submodel: SubModelType) -> ModelIdentifierField:
--- a/invokeai/backend/model_manager/load/model_loaders/flux.py
+++ b/invokeai/backend/model_manager/load/model_loaders/flux.py
@ -32,7 +32,6 @@ from invokeai.backend.model_manager.config import (
 )
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
 from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
 from invokeai.backend.util.devices import TorchDevice
 from invokeai.backend.util.silence_warnings import SilenceWarnings
 from invokeai.backend.quantization.bnb_nf4 import quantize_model_nf4
@ -60,7 +59,7 @@ class FluxVAELoader(GenericDiffusersLoader):
                    raise
            dataclass_fields = {f.name for f in fields(AutoEncoderParams)}
-            filtered_data = {k: v for k, v in flux_conf["params"]["ae_params"].items() if k in dataclass_fields}
+            filtered_data = {k: v for k, v in flux_conf["params"].items() if k in dataclass_fields}
            params = AutoEncoderParams(**filtered_data)
            with SilenceWarnings():
--- a/invokeai/backend/model_manager/probe.py
+++ b/invokeai/backend/model_manager/probe.py
@ -324,7 +324,12 @@ class ModelProbe(object):
        if model_type is ModelType.Main:
            if base_type == BaseModelType.Flux:
                # TODO: Decide between dev/schnell
-                config_file = "flux/flux1-schnell.yaml"
+                checkpoint = ModelProbe._scan_and_load_checkpoint(model_path)
                state_dict = checkpoint.get("state_dict") or checkpoint
                if 'guidance_in.out_layer.weight' in state_dict:
                    config_file = "flux/flux1-dev.yaml"
                else:
                    config_file = "flux/flux1-schnell.yaml"
            else:
                config_file = LEGACY_CONFIGS[base_type][variant_type]
                if isinstance(config_file, dict):  # need another tier for sd-2.x models
@ -338,7 +343,7 @@ class ModelProbe(object):
            )
        elif model_type is ModelType.VAE:
            config_file = (
-                "flux/flux1-schnell.yaml"
+                "flux/flux1-vae.yaml"
                if base_type is BaseModelType.Flux
                else "stable-diffusion/v1-inference.yaml"
                if base_type is BaseModelType.StableDiffusion1
--- a/invokeai/configs/flux/flux1-dev.yaml
+++ b/invokeai/configs/flux/flux1-dev.yaml
@ -1,6 +1,6 @@
 repo_id: "black-forest-labs/FLUX.1-dev"
 repo_ae: "ae.safetensors"
-max_length: 512
+max_seq_len: 512
 params:
  in_channels: 64
  vec_in_dim: 768
@ -17,17 +17,3 @@ params:
  theta: 10_000
  qkv_bias: True
  guidance_embed: True
  ae_params:
    resolution: 256
    in_channels: 3
    ch: 128
    out_ch: 3
    ch_mult:
    - 1
    - 2
    - 4
    - 4
    num_res_blocks: 2
    z_channels: 16
    scale_factor: 0.3611
    shift_factor: 0.1159
--- a/invokeai/configs/flux/flux1-schnell.yaml
+++ b/invokeai/configs/flux/flux1-schnell.yaml
@ -1,7 +1,6 @@
 repo_id: "black-forest-labs/FLUX.1-schnell"
 repo_ae: "ae.safetensors"
-t5_encoder: "google/t5-v1_1-xxl"
+max_seq_len: 256
 max_length: 512
 params:
  in_channels: 64
  vec_in_dim: 768
@ -18,17 +17,3 @@ params:
  theta: 10_000
  qkv_bias: True
  guidance_embed: False
  ae_params:
    resolution: 256
    in_channels: 3
    ch: 128
    out_ch: 3
    ch_mult:
    - 1
    - 2
    - 4
    - 4
    num_res_blocks: 2
    z_channels: 16
    scale_factor: 0.3611
    shift_factor: 0.1159
--- a/invokeai/configs/flux/flux1-vae.yaml
+++ b/invokeai/configs/flux/flux1-vae.yaml
@ -0,0 +1,16 @@
 repo_id: "black-forest-labs/FLUX.1-schnell"
 repo_path: "ae.safetensors"
 params:
  resolution: 256
  in_channels: 3
  ch: 128
  out_ch: 3
  ch_mult:
  - 1
  - 2
  - 4
  - 4
  num_res_blocks: 2
  z_channels: 16
  scale_factor: 0.3611
  shift_factor: 0.1159
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
@ -3735,11 +3735,11 @@ export type components = {
            cover_image?: string | null;
            /**
             * Format
             * @description Format of the provided checkpoint model
             * @default checkpoint
             * @constant
             * @enum {string}
             */
-            format: "checkpoint";
+            format: "checkpoint" | "bnb_quantized_nf4b";
            /**
             * Config Path
             * @description path to the checkpoint model config file
@ -5750,7 +5750,7 @@ export type components = {
             * @default null
             * @enum {string}
             */
-            t5_encoder?: "base" | "16b_quantized" | "8b_quantized";
+            t5_encoder?: "base" | "8b_quantized";
            /**
             * type
             * @default flux_model_loader
@ -5778,12 +5778,18 @@ export type components = {
             * T5 Encoder
             * @description T5 tokenizer and text encoder
             */
-            t5Encoder: components["schemas"]["T5EncoderField"];
+            t5_encoder: components["schemas"]["T5EncoderField"];
            /**
             * VAE
             * @description VAE
             */
            vae: components["schemas"]["VAEField"];
            /**
             * Max Seq Length
             * @description VAE
             * @enum {integer}
             */
            max_seq_len: 256 | 512;
            /**
             * type
             * @default flux_model_loader_output
@ -5822,7 +5828,14 @@ export type components = {
             * @description T5 tokenizer and text encoder
             * @default null
             */
-            t5Encoder?: components["schemas"]["T5EncoderField"];
+            t5_encoder?: components["schemas"]["T5EncoderField"];
            /**
             * Max Seq Len
             * @description Max sequence length for the desired flux model
             * @default null
             * @enum {integer}
             */
            max_seq_len?: 256 | 512;
            /**
             * Positive Prompt
             * @description Positive prompt for text-to-image generation.
@ -9694,6 +9707,96 @@ export type components = {
         * @enum {integer}
         */
        LogLevel: 0 | 10 | 20 | 30 | 40 | 50;
        /**
         * MainBnbQuantized4bCheckpointConfig
         * @description Model config for main checkpoint models.
         */
        MainBnbQuantized4bCheckpointConfig: {
            /**
             * Key
             * @description A unique key for this model.
             */
            key: string;
            /**
             * Hash
             * @description The hash of the model file(s).
             */
            hash: string;
            /**
             * Path
             * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory.
             */
            path: string;
            /**
             * Name
             * @description Name of the model.
             */
            name: string;
            /** @description The base model. */
            base: components["schemas"]["BaseModelType"];
            /**
             * Description
             * @description Model description
             */
            description?: string | null;
            /**
             * Source
             * @description The original source of the model (path, URL or repo_id).
             */
            source: string;
            /** @description The type of source */
            source_type: components["schemas"]["ModelSourceType"];
            /**
             * Source Api Response
             * @description The original API response from the source, as stringified JSON.
             */
            source_api_response?: string | null;
            /**
             * Cover Image
             * @description Url for image to preview model
             */
            cover_image?: string | null;
            /**
             * Type
             * @default main
             * @constant
             * @enum {string}
             */
            type: "main";
            /**
             * Trigger Phrases
             * @description Set of trigger phrases for this model
             */
            trigger_phrases?: string[] | null;
            /** @description Default settings for this model */
            default_settings?: components["schemas"]["MainModelDefaultSettings"] | null;
            /** @default normal */
            variant?: components["schemas"]["ModelVariantType"];
            /**
             * Format
             * @description Format of the provided checkpoint model
             * @default checkpoint
             * @enum {string}
             */
            format: "checkpoint" | "bnb_quantized_nf4b";
            /**
             * Config Path
             * @description path to the checkpoint model config file
             */
            config_path: string;
            /**
             * Converted At
             * @description When this model was last converted to diffusers
             */
            converted_at?: number | null;
            /** @default epsilon */
            prediction_type?: components["schemas"]["SchedulerPredictionType"];
            /**
             * Upcast Attention
             * @default false
             */
            upcast_attention?: boolean;
        };
        /**
         * MainCheckpointConfig
         * @description Model config for main checkpoint models.
@ -9761,11 +9864,11 @@ export type components = {
            variant?: components["schemas"]["ModelVariantType"];
            /**
             * Format
             * @description Format of the provided checkpoint model
             * @default checkpoint
             * @constant
             * @enum {string}
             */
-            format: "checkpoint";
+            format: "checkpoint" | "bnb_quantized_nf4b";
            /**
             * Config Path
             * @description path to the checkpoint model config file
@ -10670,7 +10773,7 @@ export type components = {
         * @description Storage format of model.
         * @enum {string}
         */
-        ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b";
+        ModelFormat: "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "t5_encoder_8b" | "t5_encoder_4b" | "bnb_quantized_nf4b";
        /** ModelIdentifierField */
        ModelIdentifierField: {
            /**
@ -10970,7 +11073,7 @@ export type components = {
             * Config Out
             * @description After successful installation, this will hold the configuration object.
             */
-            config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null;
+            config_out?: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"]) | null;
            /**
             * Inplace
             * @description Leave model in its current location; otherwise install under models directory
@ -11056,7 +11159,7 @@ export type components = {
             * Config
             * @description The model's config
             */
-            config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
+            config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
            /**
             * @description The submodel type, if any
             * @default null
@ -11077,7 +11180,7 @@ export type components = {
             * Config
             * @description The model's config
             */
-            config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
+            config: components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
            /**
             * @description The submodel type, if any
             * @default null
@ -11218,7 +11321,7 @@ export type components = {
         */
        ModelsList: {
            /** Models */
-            models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[];
+            models: (components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"])[];
        };
        /**
         * Multiply Integers
@ -15087,11 +15190,11 @@ export type components = {
            cover_image?: string | null;
            /**
             * Format
             * @description Format of the provided checkpoint model
             * @default checkpoint
             * @constant
             * @enum {string}
             */
-            format: "checkpoint";
+            format: "checkpoint" | "bnb_quantized_nf4b";
            /**
             * Config Path
             * @description path to the checkpoint model config file
@ -15619,7 +15722,7 @@ export interface operations {
                    [name: string]: unknown;
                };
                content: {
-                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
+                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
                };
            };
            /** @description Validation Error */
@ -15651,7 +15754,7 @@ export interface operations {
                    [name: string]: unknown;
                };
                content: {
-                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
+                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
                };
            };
            /** @description Bad request */
@ -15748,7 +15851,7 @@ export interface operations {
                    [name: string]: unknown;
                };
                content: {
-                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
+                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
                };
            };
            /** @description Bad request */
@ -16248,7 +16351,7 @@ export interface operations {
                    [name: string]: unknown;
                };
                content: {
-                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
+                    "application/json": components["schemas"]["MainDiffusersConfig"] | components["schemas"]["MainCheckpointConfig"] | components["schemas"]["MainBnbQuantized4bCheckpointConfig"] | components["schemas"]["VAEDiffusersConfig"] | components["schemas"]["VAECheckpointConfig"] | components["schemas"]["ControlNetDiffusersConfig"] | components["schemas"]["ControlNetCheckpointConfig"] | components["schemas"]["LoRALyCORISConfig"] | components["schemas"]["LoRADiffusersConfig"] | components["schemas"]["T5EncoderConfig"] | components["schemas"]["TextualInversionFileConfig"] | components["schemas"]["TextualInversionFolderConfig"] | components["schemas"]["IPAdapterInvokeAIConfig"] | components["schemas"]["IPAdapterCheckpointConfig"] | components["schemas"]["T2IAdapterConfig"] | components["schemas"]["SpandrelImageToImageConfig"] | components["schemas"]["CLIPVisionDiffusersConfig"] | components["schemas"]["CLIPEmbedDiffusersConfig"];
                };
            };
            /** @description Bad request */