Make quantized loading fast for both T5XXL and FLUX transformer.

Make quantized loading fast.
WIP - experimentation
2024-08-30 20:32:17 +00:00 · 2024-08-09 19:54:09 +00:00 · 2024-08-09 16:39:43 +00:00 · 2024-08-09 16:23:37 +00:00 · 2024-08-08 18:12:04 -04:00 · 2024-08-08 18:23:20 +00:00
31 changed files with 20237 additions and 20392 deletions
--- a/docs/help/FAQ.md
+++ b/docs/help/FAQ.md
@ -197,22 +197,6 @@ tips to reduce the problem:
        This should be sufficient to generate larger images up to about 1280x1280.
 ## Checkpoint Models Load Slowly or Use Too Much RAM
 The difference between diffusers models (a folder containing multiple
 subfolders) and checkpoint models (a file ending with .safetensors or
 .ckpt) is that InvokeAI is able to load diffusers models into memory
 incrementally, while checkpoint models must be loaded all at
 once. With very large models, or systems with limited RAM, you may
 experience slowdowns and other memory-related issues when loading
 checkpoint models.
 To solve this, go to the Model Manager tab (the cube), select the
 checkpoint model that's giving you trouble, and press the "Convert"
 button in the upper right of your browser window. This will conver the
 checkpoint into a diffusers model, after which loading should be
 faster and less memory-intensive.
 ## Memory Leak (Linux)
 If you notice a memory leak, it could be caused to memory fragmentation as models are loaded and/or moved from CPU to GPU.
--- a/invokeai/app/api/routers/images.py
+++ b/invokeai/app/api/routers/images.py
@ -218,8 +218,9 @@ async def get_image_workflow(
        raise HTTPException(status_code=404)
-@images_router.get(
+@images_router.api_route(
    "/i/{image_name}/full",
    methods=["GET", "HEAD"],
    operation_id="get_image_full",
    response_class=Response,
    responses={
@ -230,18 +231,6 @@ async def get_image_workflow(
        404: {"description": "Image not found"},
    },
 )
@images_router.head(
    "/i/{image_name}/full",
    operation_id="get_image_full_head",
    response_class=Response,
    responses={
        200: {
            "description": "Return the full-resolution image",
            "content": {"image/png": {}},
        },
        404: {"description": "Image not found"},
    },
 )
 async def get_image_full(
    image_name: str = Path(description="The name of full-resolution image file to get"),
 ) -> Response:
@ -253,7 +242,6 @@ async def get_image_full(
            content = f.read()
        response = Response(content, media_type="image/png")
        response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
        response.headers["Content-Disposition"] = f'inline; filename="{image_name}"'
        return response
    except Exception:
        raise HTTPException(status_code=404)
--- a/invokeai/app/invocations/controlnet_image_processors.py
+++ b/invokeai/app/invocations/controlnet_image_processors.py
@ -21,8 +21,6 @@ from controlnet_aux import (
 from controlnet_aux.util import HWC3, ade_palette
 from PIL import Image
 from pydantic import BaseModel, Field, field_validator, model_validator
 from transformers import pipeline
 from transformers.pipelines import DepthEstimationPipeline
 from invokeai.app.invocations.baseinvocation import (
    BaseInvocation,
@ -46,12 +44,13 @@ from invokeai.app.invocations.util import validate_begin_end_step, validate_weig
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
 from invokeai.backend.image_util.canny import get_canny_edges
-from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
+from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
 from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
 from invokeai.backend.image_util.hed import HEDProcessor
 from invokeai.backend.image_util.lineart import LineartProcessor
 from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
 from invokeai.backend.image_util.util import np_to_pil, pil_to_np
 from invokeai.backend.util.devices import TorchDevice
 class ControlField(BaseModel):
@ -593,14 +592,7 @@ class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
        return color_map
-DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
+DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
 # DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
 DEPTH_ANYTHING_MODELS = {
    "large": "LiheYoung/depth-anything-large-hf",
    "base": "LiheYoung/depth-anything-base-hf",
    "small": "LiheYoung/depth-anything-small-hf",
    "small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
 }
@invocation(
@ -608,33 +600,28 @@ DEPTH_ANYTHING_MODELS = {
    title="Depth Anything Processor",
    tags=["controlnet", "depth", "depth anything"],
    category="controlnet",
-    version="1.1.3",
+    version="1.1.2",
 )
 class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
    """Generates a depth map based on the Depth Anything algorithm"""
    model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
-        default="small_v2", description="The size of the depth model to use"
+        default="small", description="The size of the depth model to use"
    )
    resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
    def run_processor(self, image: Image.Image) -> Image.Image:
-        def load_depth_anything(model_path: Path):
+        def loader(model_path: Path):
-            depth_anything_pipeline = pipeline(model=str(model_path), task="depth-estimation", local_files_only=True)
+            return DepthAnythingDetector.load_model(
-            assert isinstance(depth_anything_pipeline, DepthEstimationPipeline)
+                model_path, model_size=self.model_size, device=TorchDevice.choose_torch_device()
-            return DepthAnythingPipeline(depth_anything_pipeline)
+            )
        with self._context.models.load_remote_model(
-            source=DEPTH_ANYTHING_MODELS[self.model_size], loader=load_depth_anything
+            source=DEPTH_ANYTHING_MODELS[self.model_size], loader=loader
-        ) as depth_anything_detector:
+        ) as model:
-            assert isinstance(depth_anything_detector, DepthAnythingPipeline)
+            depth_anything_detector = DepthAnythingDetector(model, TorchDevice.choose_torch_device())
-            depth_map = depth_anything_detector.generate_depth(image)
+            processed_image = depth_anything_detector(image=image, resolution=self.resolution)
-
+            return processed_image
            # Resizing to user target specified size
            new_height = int(image.size[1] * (self.resolution / image.size[0]))
            depth_map = depth_map.resize((self.resolution, new_height))
            return depth_map
@invocation(
--- a/invokeai/app/invocations/flux_text_to_image.py
+++ b/invokeai/app/invocations/flux_text_to_image.py
@ -0,0 +1,278 @@
 from pathlib import Path
 from typing import Literal
 import torch
 from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
 from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
 from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
 from optimum.quanto import qfloat8
 from PIL import Image
 from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 from transformers.models.auto import AutoModelForTextEncoding
 from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
 from invokeai.app.invocations.fields import InputField, WithBoard, WithMetadata
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel
 from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel
 from invokeai.backend.util.devices import TorchDevice
 TFluxModelKeys = Literal["flux-schnell"]
 FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"}
 class QuantizedFluxTransformer2DModel(FastQuantizedDiffusersModel):
    base_class = FluxTransformer2DModel
 class QuantizedModelForTextEncoding(FastQuantizedTransformersModel):
    auto_class = AutoModelForTextEncoding
@invocation(
    "flux_text_to_image",
    title="FLUX Text to Image",
    tags=["image"],
    category="image",
    version="1.0.0",
 )
 class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
    """Text-to-image generation using a FLUX model."""
    model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.")
    use_8bit: bool = InputField(
        default=False, description="Whether to quantize the transformer model to 8-bit precision."
    )
    positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.")
    width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
    height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
    num_steps: int = InputField(default=4, description="Number of diffusion steps.")
    guidance: float = InputField(
        default=4.0,
        description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images.",
    )
    seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> ImageOutput:
        model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model])
        t5_embeddings, clip_embeddings = self._encode_prompt(context, model_path)
        latents = self._run_diffusion(context, model_path, clip_embeddings, t5_embeddings)
        image = self._run_vae_decoding(context, model_path, latents)
        image_dto = context.images.save(image=image)
        return ImageOutput.build(image_dto)
    def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]:
        # Determine the T5 max sequence length based on the model.
        if self.model == "flux-schnell":
            max_seq_len = 256
        # elif self.model == "flux-dev":
        #     max_seq_len = 512
        else:
            raise ValueError(f"Unknown model: {self.model}")
        # Load the CLIP tokenizer.
        clip_tokenizer_path = flux_model_dir / "tokenizer"
        clip_tokenizer = CLIPTokenizer.from_pretrained(clip_tokenizer_path, local_files_only=True)
        assert isinstance(clip_tokenizer, CLIPTokenizer)
        # Load the T5 tokenizer.
        t5_tokenizer_path = flux_model_dir / "tokenizer_2"
        t5_tokenizer = T5TokenizerFast.from_pretrained(t5_tokenizer_path, local_files_only=True)
        assert isinstance(t5_tokenizer, T5TokenizerFast)
        clip_text_encoder_path = flux_model_dir / "text_encoder"
        t5_text_encoder_path = flux_model_dir / "text_encoder_2"
        with (
            context.models.load_local_model(
                model_path=clip_text_encoder_path, loader=self._load_flux_text_encoder
            ) as clip_text_encoder,
            context.models.load_local_model(
                model_path=t5_text_encoder_path, loader=self._load_flux_text_encoder_2
            ) as t5_text_encoder,
        ):
            assert isinstance(clip_text_encoder, CLIPTextModel)
            assert isinstance(t5_text_encoder, T5EncoderModel)
            pipeline = FluxPipeline(
                scheduler=None,
                vae=None,
                text_encoder=clip_text_encoder,
                tokenizer=clip_tokenizer,
                text_encoder_2=t5_text_encoder,
                tokenizer_2=t5_tokenizer,
                transformer=None,
            )
            # prompt_embeds: T5 embeddings
            # pooled_prompt_embeds: CLIP embeddings
            prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
                prompt=self.positive_prompt,
                prompt_2=self.positive_prompt,
                device=TorchDevice.choose_torch_device(),
                max_sequence_length=max_seq_len,
            )
        assert isinstance(prompt_embeds, torch.Tensor)
        assert isinstance(pooled_prompt_embeds, torch.Tensor)
        return prompt_embeds, pooled_prompt_embeds
    def _run_diffusion(
        self,
        context: InvocationContext,
        flux_model_dir: Path,
        clip_embeddings: torch.Tensor,
        t5_embeddings: torch.Tensor,
    ):
        scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(flux_model_dir / "scheduler", local_files_only=True)
        # HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from
        # disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems
        # if the cache is not empty.
        context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30)
        transformer_path = flux_model_dir / "transformer"
        with context.models.load_local_model(
            model_path=transformer_path, loader=self._load_flux_transformer
        ) as transformer:
            assert isinstance(transformer, FluxTransformer2DModel)
            flux_pipeline_with_transformer = FluxPipeline(
                scheduler=scheduler,
                vae=None,
                text_encoder=None,
                tokenizer=None,
                text_encoder_2=None,
                tokenizer_2=None,
                transformer=transformer,
            )
            t5_embeddings = t5_embeddings.to(dtype=transformer.dtype)
            clip_embeddings = clip_embeddings.to(dtype=transformer.dtype)
            latents = flux_pipeline_with_transformer(
                height=self.height,
                width=self.width,
                num_inference_steps=self.num_steps,
                guidance_scale=self.guidance,
                generator=torch.Generator().manual_seed(self.seed),
                prompt_embeds=t5_embeddings,
                pooled_prompt_embeds=clip_embeddings,
                output_type="latent",
                return_dict=False,
            )[0]
        assert isinstance(latents, torch.Tensor)
        return latents
    def _run_vae_decoding(
        self,
        context: InvocationContext,
        flux_model_dir: Path,
        latents: torch.Tensor,
    ) -> Image.Image:
        vae_path = flux_model_dir / "vae"
        with context.models.load_local_model(model_path=vae_path, loader=self._load_flux_vae) as vae:
            assert isinstance(vae, AutoencoderKL)
            flux_pipeline_with_vae = FluxPipeline(
                scheduler=None,
                vae=vae,
                text_encoder=None,
                tokenizer=None,
                text_encoder_2=None,
                tokenizer_2=None,
                transformer=None,
            )
            latents = flux_pipeline_with_vae._unpack_latents(
                latents, self.height, self.width, flux_pipeline_with_vae.vae_scale_factor
            )
            latents = (
                latents / flux_pipeline_with_vae.vae.config.scaling_factor
            ) + flux_pipeline_with_vae.vae.config.shift_factor
            latents = latents.to(dtype=vae.dtype)
            image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0]
            image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil")[0]
        assert isinstance(image, Image.Image)
        return image
    @staticmethod
    def _load_flux_text_encoder(path: Path) -> CLIPTextModel:
        model = CLIPTextModel.from_pretrained(path, local_files_only=True)
        assert isinstance(model, CLIPTextModel)
        return model
    def _load_flux_text_encoder_2(self, path: Path) -> T5EncoderModel:
        if self.use_8bit:
            model_8bit_path = path / "quantized"
            if model_8bit_path.exists():
                # The quantized model exists, load it.
                # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like
                # something that we should be able to make much faster.
                q_model = QuantizedModelForTextEncoding.from_pretrained(model_8bit_path)
                # Access the underlying wrapped model.
                # We access the wrapped model, even though it is private, because it simplifies the type checking by
                # always returning a T5EncoderModel from this function.
                model = q_model._wrapped
            else:
                # The quantized model does not exist yet, quantize and save it.
                # TODO(ryand): dtype?
                model = T5EncoderModel.from_pretrained(path, local_files_only=True)
                assert isinstance(model, T5EncoderModel)
                q_model = QuantizedModelForTextEncoding.quantize(model, weights=qfloat8)
                model_8bit_path.mkdir(parents=True, exist_ok=True)
                q_model.save_pretrained(model_8bit_path)
                # (See earlier comment about accessing the wrapped model.)
                model = q_model._wrapped
        else:
            model = T5EncoderModel.from_pretrained(path, local_files_only=True)
        assert isinstance(model, T5EncoderModel)
        return model
    def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel:
        if self.use_8bit:
            model_8bit_path = path / "quantized"
            if model_8bit_path.exists():
                # The quantized model exists, load it.
                # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like
                # something that we should be able to make much faster.
                q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path)
                # Access the underlying wrapped model.
                # We access the wrapped model, even though it is private, because it simplifies the type checking by
                # always returning a FluxTransformer2DModel from this function.
                model = q_model._wrapped
            else:
                # The quantized model does not exist yet, quantize and save it.
                # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on
                # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it
                # here.
                model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
                assert isinstance(model, FluxTransformer2DModel)
                q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8)
                model_8bit_path.mkdir(parents=True, exist_ok=True)
                q_model.save_pretrained(model_8bit_path)
                # (See earlier comment about accessing the wrapped model.)
                model = q_model._wrapped
        else:
            model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
        assert isinstance(model, FluxTransformer2DModel)
        return model
    @staticmethod
    def _load_flux_vae(path: Path) -> AutoencoderKL:
        model = AutoencoderKL.from_pretrained(path, local_files_only=True)
        assert isinstance(model, AutoencoderKL)
        return model
--- a/invokeai/app/util/custom_openapi.py
+++ b/invokeai/app/util/custom_openapi.py
@ -81,7 +81,7 @@ def get_openapi_func(
        # Add the output map to the schema
        openapi_schema["components"]["schemas"]["InvocationOutputMap"] = {
            "type": "object",
-            "properties": dict(sorted(invocation_output_map_properties.items())),
+            "properties": invocation_output_map_properties,
            "required": invocation_output_map_required,
        }
--- a/invokeai/backend/image_util/depth_anything/init.py
+++ b/invokeai/backend/image_util/depth_anything/init.py
@ -0,0 +1,90 @@
 from pathlib import Path
 from typing import Literal
 import cv2
 import numpy as np
 import torch
 import torch.nn.functional as F
 from einops import repeat
 from PIL import Image
 from torchvision.transforms import Compose
 from invokeai.app.services.config.config_default import get_config
 from invokeai.backend.image_util.depth_anything.model.dpt import DPT_DINOv2
 from invokeai.backend.image_util.depth_anything.utilities.util import NormalizeImage, PrepareForNet, Resize
 from invokeai.backend.util.logging import InvokeAILogger
 config = get_config()
 logger = InvokeAILogger.get_logger(config=config)
 DEPTH_ANYTHING_MODELS = {
    "large": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth?download=true",
    "base": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitb14.pth?download=true",
    "small": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vits14.pth?download=true",
 }
 transform = Compose(
    [
        Resize(
            width=518,
            height=518,
            resize_target=False,
            keep_aspect_ratio=True,
            ensure_multiple_of=14,
            resize_method="lower_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ]
 )
 class DepthAnythingDetector:
    def __init__(self, model: DPT_DINOv2, device: torch.device) -> None:
        self.model = model
        self.device = device
    @staticmethod
    def load_model(
        model_path: Path, device: torch.device, model_size: Literal["large", "base", "small"] = "small"
    ) -> DPT_DINOv2:
        match model_size:
            case "small":
                model = DPT_DINOv2(encoder="vits", features=64, out_channels=[48, 96, 192, 384])
            case "base":
                model = DPT_DINOv2(encoder="vitb", features=128, out_channels=[96, 192, 384, 768])
            case "large":
                model = DPT_DINOv2(encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024])
        model.load_state_dict(torch.load(model_path.as_posix(), map_location="cpu"))
        model.eval()
        model.to(device)
        return model
    def __call__(self, image: Image.Image, resolution: int = 512) -> Image.Image:
        if not self.model:
            logger.warn("DepthAnything model was not loaded. Returning original image")
            return image
        np_image = np.array(image, dtype=np.uint8)
        np_image = np_image[:, :, ::-1] / 255.0
        image_height, image_width = np_image.shape[:2]
        np_image = transform({"image": np_image})["image"]
        tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(self.device)
        with torch.no_grad():
            depth = self.model(tensor_image)
            depth = F.interpolate(depth[None], (image_height, image_width), mode="bilinear", align_corners=False)[0, 0]
            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
        depth_map = repeat(depth, "h w -> h w 3").cpu().numpy().astype(np.uint8)
        depth_map = Image.fromarray(depth_map)
        new_height = int(image_height * (resolution / image_width))
        depth_map = depth_map.resize((resolution, new_height))
        return depth_map
--- a/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
+++ b/invokeai/backend/image_util/depth_anything/depth_anything_pipeline.py
@ -1,31 +0,0 @@
 from typing import Optional
 import torch
 from PIL import Image
 from transformers.pipelines import DepthEstimationPipeline
 from invokeai.backend.raw_model import RawModel
 class DepthAnythingPipeline(RawModel):
    """Custom wrapper for the Depth Estimation pipeline from transformers adding compatibility
    for Invoke's Model Management System"""
    def __init__(self, pipeline: DepthEstimationPipeline) -> None:
        self._pipeline = pipeline
    def generate_depth(self, image: Image.Image) -> Image.Image:
        depth_map = self._pipeline(image)["depth"]
        assert isinstance(depth_map, Image.Image)
        return depth_map
    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
        if device is not None and device.type not in {"cpu", "cuda"}:
            device = None
        self._pipeline.model.to(device=device, dtype=dtype)
        self._pipeline.device = self._pipeline.model.device
    def calc_size(self) -> int:
        from invokeai.backend.model_manager.load.model_util import calc_module_size
        return calc_module_size(self._pipeline.model)
--- a/invokeai/backend/image_util/depth_anything/model/blocks.py
+++ b/invokeai/backend/image_util/depth_anything/model/blocks.py
@ -0,0 +1,145 @@
 import torch.nn as nn
 def _make_scratch(in_shape, out_shape, groups=1, expand=False):
    scratch = nn.Module()
    out_shape1 = out_shape
    out_shape2 = out_shape
    out_shape3 = out_shape
    if len(in_shape) >= 4:
        out_shape4 = out_shape
    if expand:
        out_shape1 = out_shape
        out_shape2 = out_shape * 2
        out_shape3 = out_shape * 4
        if len(in_shape) >= 4:
            out_shape4 = out_shape * 8
    scratch.layer1_rn = nn.Conv2d(
        in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
    )
    scratch.layer2_rn = nn.Conv2d(
        in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
    )
    scratch.layer3_rn = nn.Conv2d(
        in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
    )
    if len(in_shape) >= 4:
        scratch.layer4_rn = nn.Conv2d(
            in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
        )
    return scratch
 class ResidualConvUnit(nn.Module):
    """Residual convolution module."""
    def __init__(self, features, activation, bn):
        """Init.
        Args:
            features (int): number of features
        """
        super().__init__()
        self.bn = bn
        self.groups = 1
        self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
        self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
        if self.bn:
            self.bn1 = nn.BatchNorm2d(features)
            self.bn2 = nn.BatchNorm2d(features)
        self.activation = activation
        self.skip_add = nn.quantized.FloatFunctional()
    def forward(self, x):
        """Forward pass.
        Args:
            x (tensor): input
        Returns:
            tensor: output
        """
        out = self.activation(x)
        out = self.conv1(out)
        if self.bn:
            out = self.bn1(out)
        out = self.activation(out)
        out = self.conv2(out)
        if self.bn:
            out = self.bn2(out)
        if self.groups > 1:
            out = self.conv_merge(out)
        return self.skip_add.add(out, x)
 class FeatureFusionBlock(nn.Module):
    """Feature fusion block."""
    def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
        """Init.
        Args:
            features (int): number of features
        """
        super(FeatureFusionBlock, self).__init__()
        self.deconv = deconv
        self.align_corners = align_corners
        self.groups = 1
        self.expand = expand
        out_features = features
        if self.expand:
            out_features = features // 2
        self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
        self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
        self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
        self.skip_add = nn.quantized.FloatFunctional()
        self.size = size
    def forward(self, *xs, size=None):
        """Forward pass.
        Returns:
            tensor: output
        """
        output = xs[0]
        if len(xs) == 2:
            res = self.resConfUnit1(xs[1])
            output = self.skip_add.add(output, res)
        output = self.resConfUnit2(output)
        if (size is None) and (self.size is None):
            modifier = {"scale_factor": 2}
        elif size is None:
            modifier = {"size": self.size}
        else:
            modifier = {"size": size}
        output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
        output = self.out_conv(output)
        return output
--- a/invokeai/backend/image_util/depth_anything/model/dpt.py
+++ b/invokeai/backend/image_util/depth_anything/model/dpt.py
@ -0,0 +1,183 @@
 from pathlib import Path
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from invokeai.backend.image_util.depth_anything.model.blocks import FeatureFusionBlock, _make_scratch
 torchhub_path = Path(__file__).parent.parent / "torchhub"
 def _make_fusion_block(features, use_bn, size=None):
    return FeatureFusionBlock(
        features,
        nn.ReLU(False),
        deconv=False,
        bn=use_bn,
        expand=False,
        align_corners=True,
        size=size,
    )
 class DPTHead(nn.Module):
    def __init__(self, nclass, in_channels, features, out_channels, use_bn=False, use_clstoken=False):
        super(DPTHead, self).__init__()
        self.nclass = nclass
        self.use_clstoken = use_clstoken
        self.projects = nn.ModuleList(
            [
                nn.Conv2d(
                    in_channels=in_channels,
                    out_channels=out_channel,
                    kernel_size=1,
                    stride=1,
                    padding=0,
                )
                for out_channel in out_channels
            ]
        )
        self.resize_layers = nn.ModuleList(
            [
                nn.ConvTranspose2d(
                    in_channels=out_channels[0], out_channels=out_channels[0], kernel_size=4, stride=4, padding=0
                ),
                nn.ConvTranspose2d(
                    in_channels=out_channels[1], out_channels=out_channels[1], kernel_size=2, stride=2, padding=0
                ),
                nn.Identity(),
                nn.Conv2d(
                    in_channels=out_channels[3], out_channels=out_channels[3], kernel_size=3, stride=2, padding=1
                ),
            ]
        )
        if use_clstoken:
            self.readout_projects = nn.ModuleList()
            for _ in range(len(self.projects)):
                self.readout_projects.append(nn.Sequential(nn.Linear(2 * in_channels, in_channels), nn.GELU()))
        self.scratch = _make_scratch(
            out_channels,
            features,
            groups=1,
            expand=False,
        )
        self.scratch.stem_transpose = None
        self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
        self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
        self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
        self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
        head_features_1 = features
        head_features_2 = 32
        if nclass > 1:
            self.scratch.output_conv = nn.Sequential(
                nn.Conv2d(head_features_1, head_features_1, kernel_size=3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(head_features_1, nclass, kernel_size=1, stride=1, padding=0),
            )
        else:
            self.scratch.output_conv1 = nn.Conv2d(
                head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1
            )
            self.scratch.output_conv2 = nn.Sequential(
                nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
                nn.ReLU(True),
                nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
                nn.ReLU(True),
                nn.Identity(),
            )
    def forward(self, out_features, patch_h, patch_w):
        out = []
        for i, x in enumerate(out_features):
            if self.use_clstoken:
                x, cls_token = x[0], x[1]
                readout = cls_token.unsqueeze(1).expand_as(x)
                x = self.readout_projects[i](torch.cat((x, readout), -1))
            else:
                x = x[0]
            x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
            x = self.projects[i](x)
            x = self.resize_layers[i](x)
            out.append(x)
        layer_1, layer_2, layer_3, layer_4 = out
        layer_1_rn = self.scratch.layer1_rn(layer_1)
        layer_2_rn = self.scratch.layer2_rn(layer_2)
        layer_3_rn = self.scratch.layer3_rn(layer_3)
        layer_4_rn = self.scratch.layer4_rn(layer_4)
        path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
        path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
        path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
        path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
        out = self.scratch.output_conv1(path_1)
        out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
        out = self.scratch.output_conv2(out)
        return out
 class DPT_DINOv2(nn.Module):
    def __init__(
        self,
        features,
        out_channels,
        encoder="vitl",
        use_bn=False,
        use_clstoken=False,
    ):
        super(DPT_DINOv2, self).__init__()
        assert encoder in ["vits", "vitb", "vitl"]
        # # in case the Internet connection is not stable, please load the DINOv2 locally
        # if use_local:
        #     self.pretrained = torch.hub.load(
        #         torchhub_path / "facebookresearch_dinov2_main",
        #         "dinov2_{:}14".format(encoder),
        #         source="local",
        #         pretrained=False,
        #     )
        # else:
        #     self.pretrained = torch.hub.load(
        #         "facebookresearch/dinov2",
        #         "dinov2_{:}14".format(encoder),
        #     )
        self.pretrained = torch.hub.load(
            "facebookresearch/dinov2",
            "dinov2_{:}14".format(encoder),
        )
        dim = self.pretrained.blocks[0].attn.qkv.in_features
        self.depth_head = DPTHead(1, dim, features, out_channels=out_channels, use_bn=use_bn, use_clstoken=use_clstoken)
    def forward(self, x):
        h, w = x.shape[-2:]
        features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
        patch_h, patch_w = h // 14, w // 14
        depth = self.depth_head(features, patch_h, patch_w)
        depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
        depth = F.relu(depth)
        return depth.squeeze(1)
--- a/invokeai/backend/image_util/depth_anything/utilities/util.py
+++ b/invokeai/backend/image_util/depth_anything/utilities/util.py
@ -0,0 +1,227 @@
 import math
 import cv2
 import numpy as np
 import torch
 import torch.nn.functional as F
 def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
    """Rezise the sample to ensure the given size. Keeps aspect ratio.
    Args:
        sample (dict): sample
        size (tuple): image size
    Returns:
        tuple: new size
    """
    shape = list(sample["disparity"].shape)
    if shape[0] >= size[0] and shape[1] >= size[1]:
        return sample
    scale = [0, 0]
    scale[0] = size[0] / shape[0]
    scale[1] = size[1] / shape[1]
    scale = max(scale)
    shape[0] = math.ceil(scale * shape[0])
    shape[1] = math.ceil(scale * shape[1])
    # resize
    sample["image"] = cv2.resize(sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method)
    sample["disparity"] = cv2.resize(sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST)
    sample["mask"] = cv2.resize(
        sample["mask"].astype(np.float32),
        tuple(shape[::-1]),
        interpolation=cv2.INTER_NEAREST,
    )
    sample["mask"] = sample["mask"].astype(bool)
    return tuple(shape)
 class Resize(object):
    """Resize sample to given size (width, height)."""
    def __init__(
        self,
        width,
        height,
        resize_target=True,
        keep_aspect_ratio=False,
        ensure_multiple_of=1,
        resize_method="lower_bound",
        image_interpolation_method=cv2.INTER_AREA,
    ):
        """Init.
        Args:
            width (int): desired output width
            height (int): desired output height
            resize_target (bool, optional):
                True: Resize the full sample (image, mask, target).
                False: Resize image only.
                Defaults to True.
            keep_aspect_ratio (bool, optional):
                True: Keep the aspect ratio of the input sample.
                Output sample might not have the given width and height, and
                resize behaviour depends on the parameter 'resize_method'.
                Defaults to False.
            ensure_multiple_of (int, optional):
                Output width and height is constrained to be multiple of this parameter.
                Defaults to 1.
            resize_method (str, optional):
                "lower_bound": Output will be at least as large as the given size.
                "upper_bound": Output will be at max as large as the given size. (Output size might be smaller
                    than given size.)
                "minimal": Scale as least as possible.  (Output size might be smaller than given size.)
                Defaults to "lower_bound".
        """
        self.__width = width
        self.__height = height
        self.__resize_target = resize_target
        self.__keep_aspect_ratio = keep_aspect_ratio
        self.__multiple_of = ensure_multiple_of
        self.__resize_method = resize_method
        self.__image_interpolation_method = image_interpolation_method
    def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
        y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
        if max_val is not None and y > max_val:
            y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
        if y < min_val:
            y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
        return y
    def get_size(self, width, height):
        # determine new height and width
        scale_height = self.__height / height
        scale_width = self.__width / width
        if self.__keep_aspect_ratio:
            if self.__resize_method == "lower_bound":
                # scale such that output size is lower bound
                if scale_width > scale_height:
                    # fit width
                    scale_height = scale_width
                else:
                    # fit height
                    scale_width = scale_height
            elif self.__resize_method == "upper_bound":
                # scale such that output size is upper bound
                if scale_width < scale_height:
                    # fit width
                    scale_height = scale_width
                else:
                    # fit height
                    scale_width = scale_height
            elif self.__resize_method == "minimal":
                # scale as least as possbile
                if abs(1 - scale_width) < abs(1 - scale_height):
                    # fit width
                    scale_height = scale_width
                else:
                    # fit height
                    scale_width = scale_height
            else:
                raise ValueError(f"resize_method {self.__resize_method} not implemented")
        if self.__resize_method == "lower_bound":
            new_height = self.constrain_to_multiple_of(scale_height * height, min_val=self.__height)
            new_width = self.constrain_to_multiple_of(scale_width * width, min_val=self.__width)
        elif self.__resize_method == "upper_bound":
            new_height = self.constrain_to_multiple_of(scale_height * height, max_val=self.__height)
            new_width = self.constrain_to_multiple_of(scale_width * width, max_val=self.__width)
        elif self.__resize_method == "minimal":
            new_height = self.constrain_to_multiple_of(scale_height * height)
            new_width = self.constrain_to_multiple_of(scale_width * width)
        else:
            raise ValueError(f"resize_method {self.__resize_method} not implemented")
        return (new_width, new_height)
    def __call__(self, sample):
        width, height = self.get_size(sample["image"].shape[1], sample["image"].shape[0])
        # resize sample
        sample["image"] = cv2.resize(
            sample["image"],
            (width, height),
            interpolation=self.__image_interpolation_method,
        )
        if self.__resize_target:
            if "disparity" in sample:
                sample["disparity"] = cv2.resize(
                    sample["disparity"],
                    (width, height),
                    interpolation=cv2.INTER_NEAREST,
                )
            if "depth" in sample:
                sample["depth"] = cv2.resize(sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST)
            if "semseg_mask" in sample:
                # sample["semseg_mask"] = cv2.resize(
                #     sample["semseg_mask"], (width, height), interpolation=cv2.INTER_NEAREST
                # )
                sample["semseg_mask"] = F.interpolate(
                    torch.from_numpy(sample["semseg_mask"]).float()[None, None, ...], (height, width), mode="nearest"
                ).numpy()[0, 0]
            if "mask" in sample:
                sample["mask"] = cv2.resize(
                    sample["mask"].astype(np.float32),
                    (width, height),
                    interpolation=cv2.INTER_NEAREST,
                )
                # sample["mask"] = sample["mask"].astype(bool)
        # print(sample['image'].shape, sample['depth'].shape)
        return sample
 class NormalizeImage(object):
    """Normlize image by given mean and std."""
    def __init__(self, mean, std):
        self.__mean = mean
        self.__std = std
    def __call__(self, sample):
        sample["image"] = (sample["image"] - self.__mean) / self.__std
        return sample
 class PrepareForNet(object):
    """Prepare sample for usage as network input."""
    def __init__(self):
        pass
    def __call__(self, sample):
        image = np.transpose(sample["image"], (2, 0, 1))
        sample["image"] = np.ascontiguousarray(image).astype(np.float32)
        if "mask" in sample:
            sample["mask"] = sample["mask"].astype(np.float32)
            sample["mask"] = np.ascontiguousarray(sample["mask"])
        if "depth" in sample:
            depth = sample["depth"].astype(np.float32)
            sample["depth"] = np.ascontiguousarray(depth)
        if "semseg_mask" in sample:
            sample["semseg_mask"] = sample["semseg_mask"].astype(np.float32)
            sample["semseg_mask"] = np.ascontiguousarray(sample["semseg_mask"])
        return sample
--- a/invokeai/backend/load_flux_model.py
+++ b/invokeai/backend/load_flux_model.py
@ -0,0 +1,129 @@
 import json
 import os
 import time
 from pathlib import Path
 from typing import Union
 import torch
 from diffusers.models.model_loading_utils import load_state_dict
 from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
 from diffusers.utils import (
    CONFIG_NAME,
    SAFE_WEIGHTS_INDEX_NAME,
    SAFETENSORS_WEIGHTS_NAME,
    _get_checkpoint_shard_files,
    is_accelerate_available,
 )
 from optimum.quanto import qfloat8
 from optimum.quanto.models import QuantizedDiffusersModel
 from optimum.quanto.models.shared_dict import ShardedStateDict
 from invokeai.backend.requantize import requantize
 class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel):
    base_class = FluxTransformer2DModel
    @classmethod
    def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]):
        if cls.base_class is None:
            raise ValueError("The `base_class` attribute needs to be configured.")
        if not is_accelerate_available():
            raise ValueError("Reloading a quantized diffusers model requires the accelerate library.")
        from accelerate import init_empty_weights
        if os.path.isdir(model_name_or_path):
            # Look for a quantization map
            qmap_path = os.path.join(model_name_or_path, cls._qmap_name())
            if not os.path.exists(qmap_path):
                raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?")
            # Look for original model config file.
            model_config_path = os.path.join(model_name_or_path, CONFIG_NAME)
            if not os.path.exists(model_config_path):
                raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.")
            with open(qmap_path, "r", encoding="utf-8") as f:
                qmap = json.load(f)
            with open(model_config_path, "r", encoding="utf-8") as f:
                original_model_cls_name = json.load(f)["_class_name"]
            configured_cls_name = cls.base_class.__name__
            if configured_cls_name != original_model_cls_name:
                raise ValueError(
                    f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})."
                )
            # Create an empty model
            config = cls.base_class.load_config(model_name_or_path)
            with init_empty_weights():
                model = cls.base_class.from_config(config)
            # Look for the index of a sharded checkpoint
            checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME)
            if os.path.exists(checkpoint_file):
                # Convert the checkpoint path to a list of shards
                _, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file)
                # Create a mapping for the sharded safetensor files
                state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"])
            else:
                # Look for a single checkpoint file
                checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME)
                if not os.path.exists(checkpoint_file):
                    raise ValueError(f"No safetensor weights found in {model_name_or_path}.")
                # Get state_dict from model checkpoint
                state_dict = load_state_dict(checkpoint_file)
            # Requantize and load quantized weights from state_dict
            requantize(model, state_dict=state_dict, quantization_map=qmap)
            model.eval()
            return cls(model)
        else:
            raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.")
 def load_flux_transformer(path: Path) -> FluxTransformer2DModel:
    # model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
    model_8bit_path = path / "quantized"
    if model_8bit_path.exists():
        # The quantized model exists, load it.
        # TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like
        # something that we should be able to make much faster.
        q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path)
        # Access the underlying wrapped model.
        # We access the wrapped model, even though it is private, because it simplifies the type checking by
        # always returning a FluxTransformer2DModel from this function.
        model = q_model._wrapped
    else:
        # The quantized model does not exist yet, quantize and save it.
        # TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on
        # GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it
        # here.
        model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
        assert isinstance(model, FluxTransformer2DModel)
        q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8)
        model_8bit_path.mkdir(parents=True, exist_ok=True)
        q_model.save_pretrained(model_8bit_path)
        # (See earlier comment about accessing the wrapped model.)
        model = q_model._wrapped
    assert isinstance(model, FluxTransformer2DModel)
    return model
 def main():
    start = time.time()
    model = load_flux_transformer(
        Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/")
    )
    print(f"Time to load: {time.time() - start}s")
    print("hi")
 if __name__ == "__main__":
    main()
--- a/invokeai/backend/lora.py
+++ b/invokeai/backend/lora.py
@ -220,17 +220,11 @@ class LoKRLayer(LoRALayerBase):
        if self.w1 is None:
            self.w1_a = values["lokr_w1_a"]
            self.w1_b = values["lokr_w1_b"]
        else:
            self.w1_b = None
            self.w1_a = None
        self.w2 = values.get("lokr_w2", None)
        if self.w2 is None:
            self.w2_a = values["lokr_w2_a"]
            self.w2_b = values["lokr_w2_b"]
        else:
            self.w2_a = None
            self.w2_b = None
        self.t2 = values.get("lokr_t2", None)
@ -378,39 +372,7 @@ class IA3Layer(LoRALayerBase):
        self.on_input = self.on_input.to(device=device, dtype=dtype)
-class NormLayer(LoRALayerBase):
+AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
    # bias handled in LoRALayerBase(calc_size, to)
    # weight: torch.Tensor
    # bias: Optional[torch.Tensor]
    def __init__(
        self,
        layer_key: str,
        values: Dict[str, torch.Tensor],
    ):
        super().__init__(layer_key, values)
        self.weight = values["w_norm"]
        self.bias = values.get("b_norm", None)
        self.rank = None  # unscaled
        self.check_keys(values, {"w_norm", "b_norm"})
    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
        return self.weight
    def calc_size(self) -> int:
        model_size = super().calc_size()
        model_size += self.weight.nelement() * self.weight.element_size()
        return model_size
    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        super().to(device=device, dtype=dtype)
        self.weight = self.weight.to(device=device, dtype=dtype)
 AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer]
 class LoRAModelRaw(RawModel):  # (torch.nn.Module):
@ -551,10 +513,6 @@ class LoRAModelRaw(RawModel):  # (torch.nn.Module):
            elif "on_input" in values:
                layer = IA3Layer(layer_key, values)
            # norms
            elif "w_norm" in values:
                layer = NormLayer(layer_key, values)
            else:
                print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
                raise Exception("Unknown lora format!")
--- a/invokeai/backend/model_manager/load/model_util.py
+++ b/invokeai/backend/model_manager/load/model_util.py
@ -11,7 +11,6 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
 from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from transformers import CLIPTokenizer
 from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
 from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
 from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
@ -46,7 +45,6 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
            SpandrelImageToImageModel,
            GroundingDinoPipeline,
            SegmentAnythingPipeline,
            DepthAnythingPipeline,
        ),
    ):
        return model.calc_size()
--- a/invokeai/backend/model_manager/util/select_hf_files.py
+++ b/invokeai/backend/model_manager/util/select_hf_files.py
@ -54,6 +54,7 @@ def filter_files(
                "lora_weights.safetensors",
                "weights.pb",
                "onnx_data",
                "spiece.model", # Added for `black-forest-labs/FLUX.1-schnell`.
            )
        ):
            paths.append(file)
@ -62,7 +63,7 @@ def filter_files(
        # downloading random checkpoints that might also be in the repo. However there is no guarantee
        # that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models
        # will adhere to this naming convention, so this is an area to be careful of.
-        elif re.search(r"model(\.[^.]+)?\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
+        elif re.search(r"model.*\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
            paths.append(file)
    # limit search to subfolder if requested
@ -97,7 +98,9 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            if variant == ModelRepoVariant.Flax:
                result.add(path)
-        elif path.suffix in [".json", ".txt"]:
+        # Note: '.model' was added to support:
        # https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/768d12a373ed5cc9ef9a9dea7504dc09fcc14842/tokenizer_2/spiece.model
        elif path.suffix in [".json", ".txt", ".model"]:
            result.add(path)
        elif variant in [
@ -140,6 +143,23 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
            continue
    for candidate_list in subfolder_weights.values():
        # Check if at least one of the files has the explicit fp16 variant.
        at_least_one_fp16 = False
        for candidate in candidate_list:
            if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0] == ".fp16":
                at_least_one_fp16 = True
                break
        if not at_least_one_fp16:
            # If none of the candidates in this candidate_list have the explicit fp16 variant label, then this
            # candidate_list probably doesn't adhere to the variant naming convention that we expected. In this case,
            # we'll simply keep all the candidates. An example of a model that hits this case is
            # `black-forest-labs/FLUX.1-schnell` (as of commit 012d2fd).
            for candidate in candidate_list:
                result.add(candidate.path)
        # The candidate_list seems to have the expected variant naming convention. We'll select the highest scoring
        # candidate.
        highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score)
        if highest_score_candidate:
            result.add(highest_score_candidate.path)
--- a/invokeai/backend/quantization/fast_quantized_diffusion_model.py
+++ b/invokeai/backend/quantization/fast_quantized_diffusion_model.py
@ -0,0 +1,77 @@
 import json
 import os
 from typing import Union
 from diffusers.models.model_loading_utils import load_state_dict
 from diffusers.utils import (
    CONFIG_NAME,
    SAFE_WEIGHTS_INDEX_NAME,
    SAFETENSORS_WEIGHTS_NAME,
    _get_checkpoint_shard_files,
    is_accelerate_available,
 )
 from optimum.quanto.models import QuantizedDiffusersModel
 from optimum.quanto.models.shared_dict import ShardedStateDict
 from invokeai.backend.requantize import requantize
 class FastQuantizedDiffusersModel(QuantizedDiffusersModel):
    @classmethod
    def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]):
        """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation."""
        if cls.base_class is None:
            raise ValueError("The `base_class` attribute needs to be configured.")
        if not is_accelerate_available():
            raise ValueError("Reloading a quantized diffusers model requires the accelerate library.")
        from accelerate import init_empty_weights
        if os.path.isdir(model_name_or_path):
            # Look for a quantization map
            qmap_path = os.path.join(model_name_or_path, cls._qmap_name())
            if not os.path.exists(qmap_path):
                raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?")
            # Look for original model config file.
            model_config_path = os.path.join(model_name_or_path, CONFIG_NAME)
            if not os.path.exists(model_config_path):
                raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.")
            with open(qmap_path, "r", encoding="utf-8") as f:
                qmap = json.load(f)
            with open(model_config_path, "r", encoding="utf-8") as f:
                original_model_cls_name = json.load(f)["_class_name"]
            configured_cls_name = cls.base_class.__name__
            if configured_cls_name != original_model_cls_name:
                raise ValueError(
                    f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})."
                )
            # Create an empty model
            config = cls.base_class.load_config(model_name_or_path)
            with init_empty_weights():
                model = cls.base_class.from_config(config)
            # Look for the index of a sharded checkpoint
            checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME)
            if os.path.exists(checkpoint_file):
                # Convert the checkpoint path to a list of shards
                _, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file)
                # Create a mapping for the sharded safetensor files
                state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"])
            else:
                # Look for a single checkpoint file
                checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME)
                if not os.path.exists(checkpoint_file):
                    raise ValueError(f"No safetensor weights found in {model_name_or_path}.")
                # Get state_dict from model checkpoint
                state_dict = load_state_dict(checkpoint_file)
            # Requantize and load quantized weights from state_dict
            requantize(model, state_dict=state_dict, quantization_map=qmap)
            model.eval()
            return cls(model)
        else:
            raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.")
--- a/invokeai/backend/quantization/fast_quantized_transformers_model.py
+++ b/invokeai/backend/quantization/fast_quantized_transformers_model.py
@ -0,0 +1,61 @@
 import json
 import os
 from typing import Union
 from optimum.quanto.models import QuantizedTransformersModel
 from optimum.quanto.models.shared_dict import ShardedStateDict
 from transformers import AutoConfig
 from transformers.modeling_utils import get_checkpoint_shard_files, load_state_dict
 from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available
 from invokeai.backend.requantize import requantize
 class FastQuantizedTransformersModel(QuantizedTransformersModel):
    @classmethod
    def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]):
        """We override the `from_pretrained()` method in order to use our custom `requantize()` implementation."""
        if cls.auto_class is None:
            raise ValueError(
                "Quantized models cannot be reloaded using {cls}: use a specialized quantized class such as QuantizedModelForCausalLM instead."
            )
        if not is_accelerate_available():
            raise ValueError("Reloading a quantized transformers model requires the accelerate library.")
        from accelerate import init_empty_weights
        if os.path.isdir(model_name_or_path):
            # Look for a quantization map
            qmap_path = os.path.join(model_name_or_path, cls._qmap_name())
            if not os.path.exists(qmap_path):
                raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?")
            with open(qmap_path, "r", encoding="utf-8") as f:
                qmap = json.load(f)
            # Create an empty model
            config = AutoConfig.from_pretrained(model_name_or_path)
            with init_empty_weights():
                model = cls.auto_class.from_config(config)
            # Look for the index of a sharded checkpoint
            checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME)
            if os.path.exists(checkpoint_file):
                # Convert the checkpoint path to a list of shards
                checkpoint_file, sharded_metadata = get_checkpoint_shard_files(model_name_or_path, checkpoint_file)
                # Create a mapping for the sharded safetensor files
                state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"])
            else:
                # Look for a single checkpoint file
                checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_NAME)
                if not os.path.exists(checkpoint_file):
                    raise ValueError(f"No safetensor weights found in {model_name_or_path}.")
                # Get state_dict from model checkpoint
                state_dict = load_state_dict(checkpoint_file)
            # Requantize and load quantized weights from state_dict
            requantize(model, state_dict=state_dict, quantization_map=qmap)
            if getattr(model.config, "tie_word_embeddings", True):
                # Tie output weight embeddings to input weight embeddings
                # Note that if they were quantized they would NOT be tied
                model.tie_weights()
            # Set model in evaluation mode as it is done in transformers
            model.eval()
            return cls(model)
        else:
            raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.")
--- a/invokeai/backend/requantize.py
+++ b/invokeai/backend/requantize.py
@ -0,0 +1,53 @@
 from typing import Any, Dict
 import torch
 from optimum.quanto.quantize import _quantize_submodule
 # def custom_freeze(model: torch.nn.Module):
 #     for name, m in model.named_modules():
 #         if isinstance(m, QModuleMixin):
 #             m.weight =
 #             m.freeze()
 def requantize(
    model: torch.nn.Module,
    state_dict: Dict[str, Any],
    quantization_map: Dict[str, Dict[str, str]],
    device: torch.device = None,
 ):
    if device is None:
        device = next(model.parameters()).device
        if device.type == "meta":
            device = torch.device("cpu")
    # Quantize the model with parameters from the quantization map
    for name, m in model.named_modules():
        qconfig = quantization_map.get(name, None)
        if qconfig is not None:
            weights = qconfig["weights"]
            if weights == "none":
                weights = None
            activations = qconfig["activations"]
            if activations == "none":
                activations = None
            _quantize_submodule(model, name, m, weights=weights, activations=activations)
    # Move model parameters and buffers to CPU before materializing quantized weights
    for name, m in model.named_modules():
        def move_tensor(t, device):
            if t.device.type == "meta":
                return torch.empty_like(t, device=device)
            return t.to(device)
        for name, param in m.named_parameters(recurse=False):
            setattr(m, name, torch.nn.Parameter(move_tensor(param, "cpu")))
        for name, param in m.named_buffers(recurse=False):
            setattr(m, name, move_tensor(param, "cpu"))
    # Freeze model and move to target device
    # freeze(model)
    # model.to(device)
    # Load the quantized model weights
    model.load_state_dict(state_dict, strict=False)
--- a/invokeai/backend/util/hotfixes.py
+++ b/invokeai/backend/util/hotfixes.py
@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 import diffusers
 import torch
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.loaders import FromOriginalControlNetMixin
+from diffusers.loaders.single_file_model import FromOriginalModelMixin
 from diffusers.models.attention_processor import AttentionProcessor, AttnProcessor
 from diffusers.models.controlnet import ControlNetConditioningEmbedding, ControlNetOutput, zero_module
 from diffusers.models.embeddings import (
@ -32,7 +32,7 @@ from invokeai.backend.util.logging import InvokeAILogger
 logger = InvokeAILogger.get_logger(__name__)
-class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlNetMixin):
+class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalModelMixin):
    """
    A ControlNet model.
--- a/invokeai/frontend/web/package.json
+++ b/invokeai/frontend/web/package.json
@ -53,61 +53,61 @@
  },
  "dependencies": {
    "@chakra-ui/react-use-size": "^2.1.0",
-    "@dagrejs/dagre": "^1.1.3",
+    "@dagrejs/dagre": "^1.1.2",
-    "@dagrejs/graphlib": "^2.2.3",
+    "@dagrejs/graphlib": "^2.2.2",
    "@dnd-kit/core": "^6.1.0",
    "@dnd-kit/sortable": "^8.0.0",
    "@dnd-kit/utilities": "^3.2.2",
-    "@fontsource-variable/inter": "^5.0.20",
+    "@fontsource-variable/inter": "^5.0.18",
    "@invoke-ai/ui-library": "^0.0.25",
-    "@nanostores/react": "^0.7.3",
+    "@nanostores/react": "^0.7.2",
    "@reduxjs/toolkit": "2.2.3",
    "@roarr/browser-log-writer": "^1.3.0",
-    "chakra-react-select": "^4.9.1",
+    "chakra-react-select": "^4.7.6",
-    "compare-versions": "^6.1.1",
+    "compare-versions": "^6.1.0",
    "dateformat": "^5.0.3",
-    "fracturedjsonjs": "^4.0.2",
+    "fracturedjsonjs": "^4.0.1",
-    "framer-motion": "^11.3.24",
+    "framer-motion": "^11.1.8",
-    "i18next": "^23.12.2",
+    "i18next": "^23.11.3",
-    "i18next-http-backend": "^2.5.2",
+    "i18next-http-backend": "^2.5.1",
    "idb-keyval": "^6.2.1",
    "jsondiffpatch": "^0.6.0",
-    "konva": "^9.3.14",
+    "konva": "^9.3.6",
    "lodash-es": "^4.17.21",
-    "nanostores": "^0.11.2",
+    "nanostores": "^0.10.3",
    "new-github-issue-url": "^1.0.0",
-    "overlayscrollbars": "^2.10.0",
+    "overlayscrollbars": "^2.7.3",
    "overlayscrollbars-react": "^0.5.6",
-    "query-string": "^9.1.0",
+    "query-string": "^9.0.0",
    "react": "^18.3.1",
    "react-colorful": "^5.6.1",
    "react-dom": "^18.3.1",
    "react-dropzone": "^14.2.3",
    "react-error-boundary": "^4.0.13",
-    "react-hook-form": "^7.52.2",
+    "react-hook-form": "^7.51.4",
    "react-hotkeys-hook": "4.5.0",
-    "react-i18next": "^14.1.3",
+    "react-i18next": "^14.1.1",
-    "react-icons": "^5.2.1",
+    "react-icons": "^5.2.0",
    "react-konva": "^18.2.10",
    "react-redux": "9.1.2",
-    "react-resizable-panels": "^2.0.23",
+    "react-resizable-panels": "^2.0.19",
    "react-select": "5.8.0",
-    "react-use": "^17.5.1",
+    "react-use": "^17.5.0",
-    "react-virtuoso": "^4.9.0",
+    "react-virtuoso": "^4.7.10",
-    "reactflow": "^11.11.4",
+    "reactflow": "^11.11.3",
    "redux-dynamic-middlewares": "^2.2.0",
    "redux-remember": "^5.1.0",
    "redux-undo": "^1.1.0",
-    "rfdc": "^1.4.1",
+    "rfdc": "^1.3.1",
    "roarr": "^7.21.1",
    "serialize-error": "^11.0.3",
    "socket.io-client": "^4.7.5",
-    "use-debounce": "^10.0.2",
+    "use-debounce": "^10.0.0",
    "use-device-pixel-ratio": "^1.1.2",
    "use-image": "^1.1.1",
-    "uuid": "^10.0.0",
+    "uuid": "^9.0.1",
-    "zod": "^3.23.8",
+    "zod": "^3.23.6",
-    "zod-validation-error": "^3.3.1"
+    "zod-validation-error": "^3.2.0"
  },
  "peerDependencies": {
    "@chakra-ui/react": "^2.8.2",
@ -118,38 +118,38 @@
  "devDependencies": {
    "@invoke-ai/eslint-config-react": "^0.0.14",
    "@invoke-ai/prettier-config-react": "^0.0.7",
-    "@storybook/addon-essentials": "^8.2.8",
+    "@storybook/addon-essentials": "^8.0.10",
-    "@storybook/addon-interactions": "^8.2.8",
+    "@storybook/addon-interactions": "^8.0.10",
-    "@storybook/addon-links": "^8.2.8",
+    "@storybook/addon-links": "^8.0.10",
-    "@storybook/addon-storysource": "^8.2.8",
+    "@storybook/addon-storysource": "^8.0.10",
-    "@storybook/manager-api": "^8.2.8",
+    "@storybook/manager-api": "^8.0.10",
-    "@storybook/react": "^8.2.8",
+    "@storybook/react": "^8.0.10",
-    "@storybook/react-vite": "^8.2.8",
+    "@storybook/react-vite": "^8.0.10",
-    "@storybook/theming": "^8.2.8",
+    "@storybook/theming": "^8.0.10",
    "@types/dateformat": "^5.0.2",
    "@types/lodash-es": "^4.17.12",
-    "@types/node": "^20.14.15",
+    "@types/node": "^20.12.10",
-    "@types/react": "^18.3.3",
+    "@types/react": "^18.3.1",
    "@types/react-dom": "^18.3.0",
-    "@types/uuid": "^10.0.0",
+    "@types/uuid": "^9.0.8",
-    "@vitejs/plugin-react-swc": "^3.7.0",
+    "@vitejs/plugin-react-swc": "^3.6.0",
    "@vitest/coverage-v8": "^1.5.0",
    "@vitest/ui": "^1.5.0",
    "concurrently": "^8.2.2",
    "dpdm": "^3.14.0",
    "eslint": "^8.57.0",
-    "eslint-plugin-i18next": "^6.0.9",
+    "eslint-plugin-i18next": "^6.0.3",
    "eslint-plugin-path": "^1.3.0",
-    "knip": "^5.27.2",
+    "knip": "^5.12.3",
    "openapi-types": "^12.1.3",
-    "openapi-typescript": "^7.3.0",
+    "openapi-typescript": "^6.7.5",
-    "prettier": "^3.3.3",
+    "prettier": "^3.2.5",
    "rollup-plugin-visualizer": "^5.12.0",
-    "storybook": "^8.2.8",
+    "storybook": "^8.0.10",
    "ts-toolbelt": "^9.6.0",
-    "tsafe": "^1.7.2",
+    "tsafe": "^1.6.6",
-    "typescript": "^5.5.4",
+    "typescript": "^5.4.5",
-    "vite": "^5.4.0",
+    "vite": "^5.2.11",
    "vite-plugin-css-injected-by-js": "^3.5.1",
    "vite-plugin-dts": "^3.9.1",
    "vite-plugin-eslint": "^1.8.1",
--- a/invokeai/frontend/web/pnpm-lock.yaml
+++ b/invokeai/frontend/web/pnpm-lock.yaml
--- a/invokeai/frontend/web/public/locales/en.json
+++ b/invokeai/frontend/web/public/locales/en.json
@ -200,7 +200,6 @@
        "delete": "Delete",
        "depthAnything": "Depth Anything",
        "depthAnythingDescription": "Depth map generation using the Depth Anything technique",
        "depthAnythingSmallV2": "Small V2",
        "depthMidas": "Depth (Midas)",
        "depthMidasDescription": "Depth map generation using Midas",
        "depthZoe": "Depth (Zoe)",
--- a/invokeai/frontend/web/scripts/typegen.js
+++ b/invokeai/frontend/web/scripts/typegen.js
@ -1,40 +1,26 @@
 /* eslint-disable no-console */
 import fs from 'node:fs';
-import openapiTS, { astToString } from 'openapi-typescript';
+import openapiTS from 'openapi-typescript';
 import ts from 'typescript';
 const OPENAPI_URL = 'http://127.0.0.1:9090/openapi.json';
 const OUTPUT_FILE = 'src/services/api/schema.ts';
 async function generateTypes(schema) {
  process.stdout.write(`Generating types ${OUTPUT_FILE}...`);
  // Use https://ts-ast-viewer.com to figure out how to create these AST nodes - define a type and use the bottom-left pane's output
  // `Blob` type
  const BLOB = ts.factory.createTypeReferenceNode(ts.factory.createIdentifier('Blob'));
  // `null` type
  const NULL = ts.factory.createLiteralTypeNode(ts.factory.createNull());
  // `Record<string, unknown>` type
  const RECORD_STRING_UNKNOWN = ts.factory.createTypeReferenceNode(ts.factory.createIdentifier('Record'), [
    ts.factory.createKeywordTypeNode(ts.SyntaxKind.StringKeyword),
    ts.factory.createKeywordTypeNode(ts.SyntaxKind.UnknownKeyword),
  ]);
  const types = await openapiTS(schema, {
    exportType: true,
    transform: (schemaObject) => {
      if ('format' in schemaObject && schemaObject.format === 'binary') {
-        return schemaObject.nullable ? ts.factory.createUnionTypeNode([BLOB, NULL]) : BLOB;
+        return schemaObject.nullable ? 'Blob | null' : 'Blob';
      }
      if (schemaObject.title === 'MetadataField') {
        // This is `Record<string, never>` by default, but it actually accepts any a dict of any valid JSON value.
-        return RECORD_STRING_UNKNOWN;
+        return 'Record<string, unknown>';
      }
    },
    defaultNonNullable: false,
  });
-  fs.writeFileSync(OUTPUT_FILE, astToString(types));
+  fs.writeFileSync(OUTPUT_FILE, types);
  process.stdout.write(`\nOK!\r\n`);
 }
--- a/invokeai/frontend/web/src/features/controlAdapters/components/processors/DepthAnyThingProcessor.tsx
+++ b/invokeai/frontend/web/src/features/controlAdapters/components/processors/DepthAnyThingProcessor.tsx
@ -42,7 +42,6 @@ const DepthAnythingProcessor = (props: Props) => {
  const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
    () => [
      { label: t('controlnet.depthAnythingSmallV2'), value: 'small_v2' },
      { label: t('controlnet.small'), value: 'small' },
      { label: t('controlnet.base'), value: 'base' },
      { label: t('controlnet.large'), value: 'large' },
--- a/invokeai/frontend/web/src/features/controlAdapters/store/constants.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/store/constants.ts
@ -94,7 +94,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = {
    buildDefaults: (baseModel?: BaseModelType) => ({
      id: 'depth_anything_image_processor',
      type: 'depth_anything_image_processor',
-      model_size: 'small_v2',
+      model_size: 'small',
      resolution: baseModel === 'sdxl' ? 1024 : 512,
    }),
  },
--- a/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
+++ b/invokeai/frontend/web/src/features/controlAdapters/store/types.ts
@ -84,7 +84,7 @@ export type RequiredDepthAnythingImageProcessorInvocation = O.Required<
  'type' | 'model_size' | 'resolution' | 'offload'
 >;
-const zDepthAnythingModelSize = z.enum(['large', 'base', 'small', 'small_v2']);
+const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);
 export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
 export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
  zDepthAnythingModelSize.safeParse(v).success;
--- a/invokeai/frontend/web/src/features/controlLayers/components/ControlAndIPAdapter/processors/DepthAnythingProcessor.tsx
+++ b/invokeai/frontend/web/src/features/controlLayers/components/ControlAndIPAdapter/processors/DepthAnythingProcessor.tsx
@ -24,7 +24,6 @@ export const DepthAnythingProcessor = memo(({ onChange, config }: Props) => {
  const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
    () => [
      { label: t('controlnet.depthAnythingSmallV2'), value: 'small_v2' },
      { label: t('controlnet.small'), value: 'small' },
      { label: t('controlnet.base'), value: 'base' },
      { label: t('controlnet.large'), value: 'large' },
--- a/invokeai/frontend/web/src/features/controlLayers/util/controlAdapters.ts
+++ b/invokeai/frontend/web/src/features/controlLayers/util/controlAdapters.ts
@ -36,7 +36,7 @@ const zContentShuffleProcessorConfig = z.object({
 });
 export type ContentShuffleProcessorConfig = z.infer<typeof zContentShuffleProcessorConfig>;
-const zDepthAnythingModelSize = z.enum(['large', 'base', 'small', 'small_v2']);
+const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);
 export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
 export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
  zDepthAnythingModelSize.safeParse(v).success;
@ -298,7 +298,7 @@ export const CA_PROCESSOR_DATA: CAProcessorsData = {
    buildDefaults: () => ({
      id: 'depth_anything_image_processor',
      type: 'depth_anything_image_processor',
-      model_size: 'small_v2',
+      model_size: 'small',
    }),
    buildNode: (image, config) => ({
      ...config,
--- a/invokeai/frontend/web/src/features/settingsAccordions/components/UpscaleSettingsAccordion/UpscaleWarning.tsx
+++ b/invokeai/frontend/web/src/features/settingsAccordions/components/UpscaleSettingsAccordion/UpscaleWarning.tsx
@ -57,11 +57,7 @@ export const UpscaleWarning = () => {
    $installModelsTab.set(3);
  }, [dispatch]);
-  if (modelWarnings.length && !shouldShowButton) {
+  if ((!modelWarnings.length && !otherWarnings.length) || isLoading || !shouldShowButton) {
    return null;
  }
  if ((!modelWarnings.length && !otherWarnings.length) || isLoading) {
    return null;
  }
--- a/invokeai/frontend/web/src/services/api/schema.ts
+++ b/invokeai/frontend/web/src/services/api/schema.ts
--- a/pyproject.toml
+++ b/pyproject.toml
@ -33,31 +33,35 @@ classifiers = [
 ]
 dependencies = [
  # Core generation dependencies, pinned for reproducible builds.
-  "accelerate==0.30.1",
+  "accelerate==0.33.0",
  "clip_anytorch==2.6.0",       # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
  "compel==2.0.2",
  "controlnet-aux==0.0.7",
-  "diffusers[torch]==0.27.2",
+  # TODO(ryand): Bump this once the next diffusers release is ready.
  "diffusers[torch] @ git+https://github.com/huggingface/diffusers.git@4c6152c2fb0ade468aadb417102605a07a8635d3",
  "invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids
  "mediapipe==0.10.7",          # needed for "mediapipeface" controlnet model
  "numpy==1.26.4",              # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal()
  "onnx==1.15.0",
  "onnxruntime==1.16.3",
  "opencv-python==4.9.0.80",
  "optimum-quanto==0.2.4",
  "pytorch-lightning==2.1.3",
  "safetensors==0.4.3",
  # sentencepiece is required to load T5TokenizerFast (used by FLUX).
  "sentencepiece==0.2.0",
  "spandrel==0.3.4",
  "timm==0.6.13",               # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
-  "torch==2.2.2",
+  "torch==2.4.0",
  "torchmetrics==0.11.4",
  "torchsde==0.2.6",
-  "torchvision==0.17.2",
+  "torchvision==0.19.0",
  "transformers==4.41.1",
  # Core application dependencies, pinned for reproducible builds.
  "fastapi-events==0.11.1",
  "fastapi==0.111.0",
-  "huggingface-hub==0.23.1",
+  "huggingface-hub==0.24.5",
  "pydantic-settings==2.2.1",
  "pydantic==2.7.2",
  "python-socketio==5.11.1",
--- a/tests/backend/model_manager/util/test_hf_model_select.py
+++ b/tests/backend/model_manager/util/test_hf_model_select.py
@ -326,3 +326,80 @@ def test_select_multiple_weights(
 ) -> None:
    filtered_files = filter_files(sd15_test_files, variant)
    assert set(filtered_files) == {Path(f) for f in expected_files}
@pytest.fixture
 def flux_schnell_test_files() -> list[Path]:
    return [
        Path(f)
        for f in [
            "FLUX.1-schnell/.gitattributes",
            "FLUX.1-schnell/README.md",
            "FLUX.1-schnell/ae.safetensors",
            "FLUX.1-schnell/flux1-schnell.safetensors",
            "FLUX.1-schnell/model_index.json",
            "FLUX.1-schnell/scheduler/scheduler_config.json",
            "FLUX.1-schnell/schnell_grid.jpeg",
            "FLUX.1-schnell/text_encoder/config.json",
            "FLUX.1-schnell/text_encoder/model.safetensors",
            "FLUX.1-schnell/text_encoder_2/config.json",
            "FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors",
            "FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors",
            "FLUX.1-schnell/text_encoder_2/model.safetensors.index.json",
            "FLUX.1-schnell/tokenizer/merges.txt",
            "FLUX.1-schnell/tokenizer/special_tokens_map.json",
            "FLUX.1-schnell/tokenizer/tokenizer_config.json",
            "FLUX.1-schnell/tokenizer/vocab.json",
            "FLUX.1-schnell/tokenizer_2/special_tokens_map.json",
            "FLUX.1-schnell/tokenizer_2/spiece.model",
            "FLUX.1-schnell/tokenizer_2/tokenizer.json",
            "FLUX.1-schnell/tokenizer_2/tokenizer_config.json",
            "FLUX.1-schnell/transformer/config.json",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors",
            "FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json",
            "FLUX.1-schnell/vae/config.json",
            "FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors",
        ]
    ]
@pytest.mark.parametrize(
    ["variant", "expected_files"],
    [
        (
            ModelRepoVariant.Default,
            [
                "FLUX.1-schnell/model_index.json",
                "FLUX.1-schnell/scheduler/scheduler_config.json",
                "FLUX.1-schnell/text_encoder/config.json",
                "FLUX.1-schnell/text_encoder/model.safetensors",
                "FLUX.1-schnell/text_encoder_2/config.json",
                "FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors",
                "FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors",
                "FLUX.1-schnell/text_encoder_2/model.safetensors.index.json",
                "FLUX.1-schnell/tokenizer/merges.txt",
                "FLUX.1-schnell/tokenizer/special_tokens_map.json",
                "FLUX.1-schnell/tokenizer/tokenizer_config.json",
                "FLUX.1-schnell/tokenizer/vocab.json",
                "FLUX.1-schnell/tokenizer_2/special_tokens_map.json",
                "FLUX.1-schnell/tokenizer_2/spiece.model",
                "FLUX.1-schnell/tokenizer_2/tokenizer.json",
                "FLUX.1-schnell/tokenizer_2/tokenizer_config.json",
                "FLUX.1-schnell/transformer/config.json",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors",
                "FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json",
                "FLUX.1-schnell/vae/config.json",
                "FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors",
            ],
        ),
    ],
 )
 def test_select_flux_schnell_files(
    flux_schnell_test_files: list[Path], variant: ModelRepoVariant, expected_files: list[str]
 ) -> None:
    filtered_files = filter_files(flux_schnell_test_files, variant)
    assert set(filtered_files) == {Path(f) for f in expected_files}
Author	SHA1	Message	Date
Ryan Dick	a8a2fc106d	Make quantized loading fast for both T5XXL and FLUX transformer.	2024-08-09 19:54:09 +00:00
Ryan Dick	d23ad1818d	Make quantized loading fast.	2024-08-09 16:39:43 +00:00
Ryan Dick	4181ab654b	WIP - experimentation	2024-08-09 16:23:37 +00:00
Ryan Dick	1c97360f9f	Make float16 inference work with FLUX on 24GB GPU.	2024-08-08 18:12:04 -04:00
Ryan Dick	74d6fceeb6	Add support for 8-bit quantizatino of the FLUX T5XXL text encoder.	2024-08-08 18:23:20 +00:00
Ryan Dick	766ddc18dc	Make 8-bit quantization save/reload work for the FLUX transformer. Reload is still very slow with the current optimum.quanto implementation.	2024-08-08 16:40:11 +00:00
Ryan Dick	e6ff7488a1	Minor improvements to FLUX workflow.	2024-08-07 22:10:09 +00:00
Ryan Dick	89a652cfcd	Got FLUX schnell working with 8-bit quantization. Still lots of rough edges to clean up.	2024-08-07 19:50:03 +00:00
Ryan Dick	b227b9059d	Use the FluxPipeline.encode_prompt() api rather than trying to run the two text encoders separately.	2024-08-07 15:12:01 +00:00
Ryan Dick	3599a4a3e4	Add sentencepiece dependency for the T5 tokenizer.	2024-08-07 14:18:19 +00:00
Ryan Dick	5dd619e137	First draft of FluxTextToImageInvocation.	2024-08-06 21:51:22 +00:00
Ryan Dick	7d447cbb88	Update HF download logic to work for black-forest-labs/FLUX.1-schnell.	2024-08-06 19:34:49 +00:00
Ryan Dick	3bbba7e4b1	Update imports for compatibility with bumped diffusers version.	2024-08-06 17:56:36 +00:00
Ryan Dick	b1845019fe	Bump diffusers version to include FLUX support.	2024-08-06 11:52:05 -04:00