mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Compare commits
14 Commits
lstein/doc
...
ryan/flux
Author | SHA1 | Date | |
---|---|---|---|
a8a2fc106d | |||
d23ad1818d | |||
4181ab654b | |||
1c97360f9f | |||
74d6fceeb6 | |||
766ddc18dc | |||
e6ff7488a1 | |||
89a652cfcd | |||
b227b9059d | |||
3599a4a3e4 | |||
5dd619e137 | |||
7d447cbb88 | |||
3bbba7e4b1 | |||
b1845019fe |
@ -197,22 +197,6 @@ tips to reduce the problem:
|
|||||||
|
|
||||||
This should be sufficient to generate larger images up to about 1280x1280.
|
This should be sufficient to generate larger images up to about 1280x1280.
|
||||||
|
|
||||||
## Checkpoint Models Load Slowly or Use Too Much RAM
|
|
||||||
|
|
||||||
The difference between diffusers models (a folder containing multiple
|
|
||||||
subfolders) and checkpoint models (a file ending with .safetensors or
|
|
||||||
.ckpt) is that InvokeAI is able to load diffusers models into memory
|
|
||||||
incrementally, while checkpoint models must be loaded all at
|
|
||||||
once. With very large models, or systems with limited RAM, you may
|
|
||||||
experience slowdowns and other memory-related issues when loading
|
|
||||||
checkpoint models.
|
|
||||||
|
|
||||||
To solve this, go to the Model Manager tab (the cube), select the
|
|
||||||
checkpoint model that's giving you trouble, and press the "Convert"
|
|
||||||
button in the upper right of your browser window. This will conver the
|
|
||||||
checkpoint into a diffusers model, after which loading should be
|
|
||||||
faster and less memory-intensive.
|
|
||||||
|
|
||||||
## Memory Leak (Linux)
|
## Memory Leak (Linux)
|
||||||
|
|
||||||
If you notice a memory leak, it could be caused to memory fragmentation as models are loaded and/or moved from CPU to GPU.
|
If you notice a memory leak, it could be caused to memory fragmentation as models are loaded and/or moved from CPU to GPU.
|
||||||
|
@ -218,8 +218,9 @@ async def get_image_workflow(
|
|||||||
raise HTTPException(status_code=404)
|
raise HTTPException(status_code=404)
|
||||||
|
|
||||||
|
|
||||||
@images_router.get(
|
@images_router.api_route(
|
||||||
"/i/{image_name}/full",
|
"/i/{image_name}/full",
|
||||||
|
methods=["GET", "HEAD"],
|
||||||
operation_id="get_image_full",
|
operation_id="get_image_full",
|
||||||
response_class=Response,
|
response_class=Response,
|
||||||
responses={
|
responses={
|
||||||
@ -230,18 +231,6 @@ async def get_image_workflow(
|
|||||||
404: {"description": "Image not found"},
|
404: {"description": "Image not found"},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@images_router.head(
|
|
||||||
"/i/{image_name}/full",
|
|
||||||
operation_id="get_image_full_head",
|
|
||||||
response_class=Response,
|
|
||||||
responses={
|
|
||||||
200: {
|
|
||||||
"description": "Return the full-resolution image",
|
|
||||||
"content": {"image/png": {}},
|
|
||||||
},
|
|
||||||
404: {"description": "Image not found"},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
async def get_image_full(
|
async def get_image_full(
|
||||||
image_name: str = Path(description="The name of full-resolution image file to get"),
|
image_name: str = Path(description="The name of full-resolution image file to get"),
|
||||||
) -> Response:
|
) -> Response:
|
||||||
@ -253,7 +242,6 @@ async def get_image_full(
|
|||||||
content = f.read()
|
content = f.read()
|
||||||
response = Response(content, media_type="image/png")
|
response = Response(content, media_type="image/png")
|
||||||
response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
|
response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
|
||||||
response.headers["Content-Disposition"] = f'inline; filename="{image_name}"'
|
|
||||||
return response
|
return response
|
||||||
except Exception:
|
except Exception:
|
||||||
raise HTTPException(status_code=404)
|
raise HTTPException(status_code=404)
|
||||||
|
@ -21,8 +21,6 @@ from controlnet_aux import (
|
|||||||
from controlnet_aux.util import HWC3, ade_palette
|
from controlnet_aux.util import HWC3, ade_palette
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||||
from transformers import pipeline
|
|
||||||
from transformers.pipelines import DepthEstimationPipeline
|
|
||||||
|
|
||||||
from invokeai.app.invocations.baseinvocation import (
|
from invokeai.app.invocations.baseinvocation import (
|
||||||
BaseInvocation,
|
BaseInvocation,
|
||||||
@ -46,12 +44,13 @@ from invokeai.app.invocations.util import validate_begin_end_step, validate_weig
|
|||||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||||
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
|
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
|
||||||
from invokeai.backend.image_util.canny import get_canny_edges
|
from invokeai.backend.image_util.canny import get_canny_edges
|
||||||
from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
|
from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
|
||||||
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
|
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
|
||||||
from invokeai.backend.image_util.hed import HEDProcessor
|
from invokeai.backend.image_util.hed import HEDProcessor
|
||||||
from invokeai.backend.image_util.lineart import LineartProcessor
|
from invokeai.backend.image_util.lineart import LineartProcessor
|
||||||
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
|
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
|
||||||
from invokeai.backend.image_util.util import np_to_pil, pil_to_np
|
from invokeai.backend.image_util.util import np_to_pil, pil_to_np
|
||||||
|
from invokeai.backend.util.devices import TorchDevice
|
||||||
|
|
||||||
|
|
||||||
class ControlField(BaseModel):
|
class ControlField(BaseModel):
|
||||||
@ -593,14 +592,7 @@ class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
|
|||||||
return color_map
|
return color_map
|
||||||
|
|
||||||
|
|
||||||
DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
|
DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
|
||||||
# DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
|
|
||||||
DEPTH_ANYTHING_MODELS = {
|
|
||||||
"large": "LiheYoung/depth-anything-large-hf",
|
|
||||||
"base": "LiheYoung/depth-anything-base-hf",
|
|
||||||
"small": "LiheYoung/depth-anything-small-hf",
|
|
||||||
"small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@invocation(
|
@invocation(
|
||||||
@ -608,33 +600,28 @@ DEPTH_ANYTHING_MODELS = {
|
|||||||
title="Depth Anything Processor",
|
title="Depth Anything Processor",
|
||||||
tags=["controlnet", "depth", "depth anything"],
|
tags=["controlnet", "depth", "depth anything"],
|
||||||
category="controlnet",
|
category="controlnet",
|
||||||
version="1.1.3",
|
version="1.1.2",
|
||||||
)
|
)
|
||||||
class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
|
class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
|
||||||
"""Generates a depth map based on the Depth Anything algorithm"""
|
"""Generates a depth map based on the Depth Anything algorithm"""
|
||||||
|
|
||||||
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
|
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
|
||||||
default="small_v2", description="The size of the depth model to use"
|
default="small", description="The size of the depth model to use"
|
||||||
)
|
)
|
||||||
resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
|
resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
|
||||||
|
|
||||||
def run_processor(self, image: Image.Image) -> Image.Image:
|
def run_processor(self, image: Image.Image) -> Image.Image:
|
||||||
def load_depth_anything(model_path: Path):
|
def loader(model_path: Path):
|
||||||
depth_anything_pipeline = pipeline(model=str(model_path), task="depth-estimation", local_files_only=True)
|
return DepthAnythingDetector.load_model(
|
||||||
assert isinstance(depth_anything_pipeline, DepthEstimationPipeline)
|
model_path, model_size=self.model_size, device=TorchDevice.choose_torch_device()
|
||||||
return DepthAnythingPipeline(depth_anything_pipeline)
|
)
|
||||||
|
|
||||||
with self._context.models.load_remote_model(
|
with self._context.models.load_remote_model(
|
||||||
source=DEPTH_ANYTHING_MODELS[self.model_size], loader=load_depth_anything
|
source=DEPTH_ANYTHING_MODELS[self.model_size], loader=loader
|
||||||
) as depth_anything_detector:
|
) as model:
|
||||||
assert isinstance(depth_anything_detector, DepthAnythingPipeline)
|
depth_anything_detector = DepthAnythingDetector(model, TorchDevice.choose_torch_device())
|
||||||
depth_map = depth_anything_detector.generate_depth(image)
|
processed_image = depth_anything_detector(image=image, resolution=self.resolution)
|
||||||
|
return processed_image
|
||||||
# Resizing to user target specified size
|
|
||||||
new_height = int(image.size[1] * (self.resolution / image.size[0]))
|
|
||||||
depth_map = depth_map.resize((self.resolution, new_height))
|
|
||||||
|
|
||||||
return depth_map
|
|
||||||
|
|
||||||
|
|
||||||
@invocation(
|
@invocation(
|
||||||
|
278
invokeai/app/invocations/flux_text_to_image.py
Normal file
278
invokeai/app/invocations/flux_text_to_image.py
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
|
||||||
|
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
|
||||||
|
from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
|
||||||
|
from optimum.quanto import qfloat8
|
||||||
|
from PIL import Image
|
||||||
|
from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
|
||||||
|
from transformers.models.auto import AutoModelForTextEncoding
|
||||||
|
|
||||||
|
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
|
||||||
|
from invokeai.app.invocations.fields import InputField, WithBoard, WithMetadata
|
||||||
|
from invokeai.app.invocations.primitives import ImageOutput
|
||||||
|
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||||
|
from invokeai.backend.quantization.fast_quantized_diffusion_model import FastQuantizedDiffusersModel
|
||||||
|
from invokeai.backend.quantization.fast_quantized_transformers_model import FastQuantizedTransformersModel
|
||||||
|
from invokeai.backend.util.devices import TorchDevice
|
||||||
|
|
||||||
|
TFluxModelKeys = Literal["flux-schnell"]
|
||||||
|
FLUX_MODELS: dict[TFluxModelKeys, str] = {"flux-schnell": "black-forest-labs/FLUX.1-schnell"}
|
||||||
|
|
||||||
|
|
||||||
|
class QuantizedFluxTransformer2DModel(FastQuantizedDiffusersModel):
|
||||||
|
base_class = FluxTransformer2DModel
|
||||||
|
|
||||||
|
|
||||||
|
class QuantizedModelForTextEncoding(FastQuantizedTransformersModel):
|
||||||
|
auto_class = AutoModelForTextEncoding
|
||||||
|
|
||||||
|
|
||||||
|
@invocation(
|
||||||
|
"flux_text_to_image",
|
||||||
|
title="FLUX Text to Image",
|
||||||
|
tags=["image"],
|
||||||
|
category="image",
|
||||||
|
version="1.0.0",
|
||||||
|
)
|
||||||
|
class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
|
||||||
|
"""Text-to-image generation using a FLUX model."""
|
||||||
|
|
||||||
|
model: TFluxModelKeys = InputField(description="The FLUX model to use for text-to-image generation.")
|
||||||
|
use_8bit: bool = InputField(
|
||||||
|
default=False, description="Whether to quantize the transformer model to 8-bit precision."
|
||||||
|
)
|
||||||
|
positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.")
|
||||||
|
width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
|
||||||
|
height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
|
||||||
|
num_steps: int = InputField(default=4, description="Number of diffusion steps.")
|
||||||
|
guidance: float = InputField(
|
||||||
|
default=4.0,
|
||||||
|
description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images.",
|
||||||
|
)
|
||||||
|
seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def invoke(self, context: InvocationContext) -> ImageOutput:
|
||||||
|
model_path = context.models.download_and_cache_model(FLUX_MODELS[self.model])
|
||||||
|
|
||||||
|
t5_embeddings, clip_embeddings = self._encode_prompt(context, model_path)
|
||||||
|
latents = self._run_diffusion(context, model_path, clip_embeddings, t5_embeddings)
|
||||||
|
image = self._run_vae_decoding(context, model_path, latents)
|
||||||
|
image_dto = context.images.save(image=image)
|
||||||
|
return ImageOutput.build(image_dto)
|
||||||
|
|
||||||
|
def _encode_prompt(self, context: InvocationContext, flux_model_dir: Path) -> tuple[torch.Tensor, torch.Tensor]:
|
||||||
|
# Determine the T5 max sequence length based on the model.
|
||||||
|
if self.model == "flux-schnell":
|
||||||
|
max_seq_len = 256
|
||||||
|
# elif self.model == "flux-dev":
|
||||||
|
# max_seq_len = 512
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown model: {self.model}")
|
||||||
|
|
||||||
|
# Load the CLIP tokenizer.
|
||||||
|
clip_tokenizer_path = flux_model_dir / "tokenizer"
|
||||||
|
clip_tokenizer = CLIPTokenizer.from_pretrained(clip_tokenizer_path, local_files_only=True)
|
||||||
|
assert isinstance(clip_tokenizer, CLIPTokenizer)
|
||||||
|
|
||||||
|
# Load the T5 tokenizer.
|
||||||
|
t5_tokenizer_path = flux_model_dir / "tokenizer_2"
|
||||||
|
t5_tokenizer = T5TokenizerFast.from_pretrained(t5_tokenizer_path, local_files_only=True)
|
||||||
|
assert isinstance(t5_tokenizer, T5TokenizerFast)
|
||||||
|
|
||||||
|
clip_text_encoder_path = flux_model_dir / "text_encoder"
|
||||||
|
t5_text_encoder_path = flux_model_dir / "text_encoder_2"
|
||||||
|
with (
|
||||||
|
context.models.load_local_model(
|
||||||
|
model_path=clip_text_encoder_path, loader=self._load_flux_text_encoder
|
||||||
|
) as clip_text_encoder,
|
||||||
|
context.models.load_local_model(
|
||||||
|
model_path=t5_text_encoder_path, loader=self._load_flux_text_encoder_2
|
||||||
|
) as t5_text_encoder,
|
||||||
|
):
|
||||||
|
assert isinstance(clip_text_encoder, CLIPTextModel)
|
||||||
|
assert isinstance(t5_text_encoder, T5EncoderModel)
|
||||||
|
pipeline = FluxPipeline(
|
||||||
|
scheduler=None,
|
||||||
|
vae=None,
|
||||||
|
text_encoder=clip_text_encoder,
|
||||||
|
tokenizer=clip_tokenizer,
|
||||||
|
text_encoder_2=t5_text_encoder,
|
||||||
|
tokenizer_2=t5_tokenizer,
|
||||||
|
transformer=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# prompt_embeds: T5 embeddings
|
||||||
|
# pooled_prompt_embeds: CLIP embeddings
|
||||||
|
prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
|
||||||
|
prompt=self.positive_prompt,
|
||||||
|
prompt_2=self.positive_prompt,
|
||||||
|
device=TorchDevice.choose_torch_device(),
|
||||||
|
max_sequence_length=max_seq_len,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert isinstance(prompt_embeds, torch.Tensor)
|
||||||
|
assert isinstance(pooled_prompt_embeds, torch.Tensor)
|
||||||
|
return prompt_embeds, pooled_prompt_embeds
|
||||||
|
|
||||||
|
def _run_diffusion(
|
||||||
|
self,
|
||||||
|
context: InvocationContext,
|
||||||
|
flux_model_dir: Path,
|
||||||
|
clip_embeddings: torch.Tensor,
|
||||||
|
t5_embeddings: torch.Tensor,
|
||||||
|
):
|
||||||
|
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(flux_model_dir / "scheduler", local_files_only=True)
|
||||||
|
|
||||||
|
# HACK(ryand): Manually empty the cache. Currently we don't check the size of the model before loading it from
|
||||||
|
# disk. Since the transformer model is large (24GB), there's a good chance that it will OOM on 32GB RAM systems
|
||||||
|
# if the cache is not empty.
|
||||||
|
context.models._services.model_manager.load.ram_cache.make_room(24 * 2**30)
|
||||||
|
|
||||||
|
transformer_path = flux_model_dir / "transformer"
|
||||||
|
with context.models.load_local_model(
|
||||||
|
model_path=transformer_path, loader=self._load_flux_transformer
|
||||||
|
) as transformer:
|
||||||
|
assert isinstance(transformer, FluxTransformer2DModel)
|
||||||
|
|
||||||
|
flux_pipeline_with_transformer = FluxPipeline(
|
||||||
|
scheduler=scheduler,
|
||||||
|
vae=None,
|
||||||
|
text_encoder=None,
|
||||||
|
tokenizer=None,
|
||||||
|
text_encoder_2=None,
|
||||||
|
tokenizer_2=None,
|
||||||
|
transformer=transformer,
|
||||||
|
)
|
||||||
|
|
||||||
|
t5_embeddings = t5_embeddings.to(dtype=transformer.dtype)
|
||||||
|
clip_embeddings = clip_embeddings.to(dtype=transformer.dtype)
|
||||||
|
|
||||||
|
latents = flux_pipeline_with_transformer(
|
||||||
|
height=self.height,
|
||||||
|
width=self.width,
|
||||||
|
num_inference_steps=self.num_steps,
|
||||||
|
guidance_scale=self.guidance,
|
||||||
|
generator=torch.Generator().manual_seed(self.seed),
|
||||||
|
prompt_embeds=t5_embeddings,
|
||||||
|
pooled_prompt_embeds=clip_embeddings,
|
||||||
|
output_type="latent",
|
||||||
|
return_dict=False,
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
assert isinstance(latents, torch.Tensor)
|
||||||
|
return latents
|
||||||
|
|
||||||
|
def _run_vae_decoding(
|
||||||
|
self,
|
||||||
|
context: InvocationContext,
|
||||||
|
flux_model_dir: Path,
|
||||||
|
latents: torch.Tensor,
|
||||||
|
) -> Image.Image:
|
||||||
|
vae_path = flux_model_dir / "vae"
|
||||||
|
with context.models.load_local_model(model_path=vae_path, loader=self._load_flux_vae) as vae:
|
||||||
|
assert isinstance(vae, AutoencoderKL)
|
||||||
|
|
||||||
|
flux_pipeline_with_vae = FluxPipeline(
|
||||||
|
scheduler=None,
|
||||||
|
vae=vae,
|
||||||
|
text_encoder=None,
|
||||||
|
tokenizer=None,
|
||||||
|
text_encoder_2=None,
|
||||||
|
tokenizer_2=None,
|
||||||
|
transformer=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
latents = flux_pipeline_with_vae._unpack_latents(
|
||||||
|
latents, self.height, self.width, flux_pipeline_with_vae.vae_scale_factor
|
||||||
|
)
|
||||||
|
latents = (
|
||||||
|
latents / flux_pipeline_with_vae.vae.config.scaling_factor
|
||||||
|
) + flux_pipeline_with_vae.vae.config.shift_factor
|
||||||
|
latents = latents.to(dtype=vae.dtype)
|
||||||
|
image = flux_pipeline_with_vae.vae.decode(latents, return_dict=False)[0]
|
||||||
|
image = flux_pipeline_with_vae.image_processor.postprocess(image, output_type="pil")[0]
|
||||||
|
|
||||||
|
assert isinstance(image, Image.Image)
|
||||||
|
return image
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_flux_text_encoder(path: Path) -> CLIPTextModel:
|
||||||
|
model = CLIPTextModel.from_pretrained(path, local_files_only=True)
|
||||||
|
assert isinstance(model, CLIPTextModel)
|
||||||
|
return model
|
||||||
|
|
||||||
|
def _load_flux_text_encoder_2(self, path: Path) -> T5EncoderModel:
|
||||||
|
if self.use_8bit:
|
||||||
|
model_8bit_path = path / "quantized"
|
||||||
|
if model_8bit_path.exists():
|
||||||
|
# The quantized model exists, load it.
|
||||||
|
# TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like
|
||||||
|
# something that we should be able to make much faster.
|
||||||
|
q_model = QuantizedModelForTextEncoding.from_pretrained(model_8bit_path)
|
||||||
|
|
||||||
|
# Access the underlying wrapped model.
|
||||||
|
# We access the wrapped model, even though it is private, because it simplifies the type checking by
|
||||||
|
# always returning a T5EncoderModel from this function.
|
||||||
|
model = q_model._wrapped
|
||||||
|
else:
|
||||||
|
# The quantized model does not exist yet, quantize and save it.
|
||||||
|
# TODO(ryand): dtype?
|
||||||
|
model = T5EncoderModel.from_pretrained(path, local_files_only=True)
|
||||||
|
assert isinstance(model, T5EncoderModel)
|
||||||
|
|
||||||
|
q_model = QuantizedModelForTextEncoding.quantize(model, weights=qfloat8)
|
||||||
|
|
||||||
|
model_8bit_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
q_model.save_pretrained(model_8bit_path)
|
||||||
|
|
||||||
|
# (See earlier comment about accessing the wrapped model.)
|
||||||
|
model = q_model._wrapped
|
||||||
|
else:
|
||||||
|
model = T5EncoderModel.from_pretrained(path, local_files_only=True)
|
||||||
|
|
||||||
|
assert isinstance(model, T5EncoderModel)
|
||||||
|
return model
|
||||||
|
|
||||||
|
def _load_flux_transformer(self, path: Path) -> FluxTransformer2DModel:
|
||||||
|
if self.use_8bit:
|
||||||
|
model_8bit_path = path / "quantized"
|
||||||
|
if model_8bit_path.exists():
|
||||||
|
# The quantized model exists, load it.
|
||||||
|
# TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like
|
||||||
|
# something that we should be able to make much faster.
|
||||||
|
q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path)
|
||||||
|
|
||||||
|
# Access the underlying wrapped model.
|
||||||
|
# We access the wrapped model, even though it is private, because it simplifies the type checking by
|
||||||
|
# always returning a FluxTransformer2DModel from this function.
|
||||||
|
model = q_model._wrapped
|
||||||
|
else:
|
||||||
|
# The quantized model does not exist yet, quantize and save it.
|
||||||
|
# TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on
|
||||||
|
# GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it
|
||||||
|
# here.
|
||||||
|
model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
|
||||||
|
assert isinstance(model, FluxTransformer2DModel)
|
||||||
|
|
||||||
|
q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8)
|
||||||
|
|
||||||
|
model_8bit_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
q_model.save_pretrained(model_8bit_path)
|
||||||
|
|
||||||
|
# (See earlier comment about accessing the wrapped model.)
|
||||||
|
model = q_model._wrapped
|
||||||
|
else:
|
||||||
|
model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
|
||||||
|
|
||||||
|
assert isinstance(model, FluxTransformer2DModel)
|
||||||
|
return model
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _load_flux_vae(path: Path) -> AutoencoderKL:
|
||||||
|
model = AutoencoderKL.from_pretrained(path, local_files_only=True)
|
||||||
|
assert isinstance(model, AutoencoderKL)
|
||||||
|
return model
|
@ -81,7 +81,7 @@ def get_openapi_func(
|
|||||||
# Add the output map to the schema
|
# Add the output map to the schema
|
||||||
openapi_schema["components"]["schemas"]["InvocationOutputMap"] = {
|
openapi_schema["components"]["schemas"]["InvocationOutputMap"] = {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": dict(sorted(invocation_output_map_properties.items())),
|
"properties": invocation_output_map_properties,
|
||||||
"required": invocation_output_map_required,
|
"required": invocation_output_map_required,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
90
invokeai/backend/image_util/depth_anything/__init__.py
Normal file
90
invokeai/backend/image_util/depth_anything/__init__.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from einops import repeat
|
||||||
|
from PIL import Image
|
||||||
|
from torchvision.transforms import Compose
|
||||||
|
|
||||||
|
from invokeai.app.services.config.config_default import get_config
|
||||||
|
from invokeai.backend.image_util.depth_anything.model.dpt import DPT_DINOv2
|
||||||
|
from invokeai.backend.image_util.depth_anything.utilities.util import NormalizeImage, PrepareForNet, Resize
|
||||||
|
from invokeai.backend.util.logging import InvokeAILogger
|
||||||
|
|
||||||
|
config = get_config()
|
||||||
|
logger = InvokeAILogger.get_logger(config=config)
|
||||||
|
|
||||||
|
DEPTH_ANYTHING_MODELS = {
|
||||||
|
"large": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth?download=true",
|
||||||
|
"base": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitb14.pth?download=true",
|
||||||
|
"small": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vits14.pth?download=true",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
transform = Compose(
|
||||||
|
[
|
||||||
|
Resize(
|
||||||
|
width=518,
|
||||||
|
height=518,
|
||||||
|
resize_target=False,
|
||||||
|
keep_aspect_ratio=True,
|
||||||
|
ensure_multiple_of=14,
|
||||||
|
resize_method="lower_bound",
|
||||||
|
image_interpolation_method=cv2.INTER_CUBIC,
|
||||||
|
),
|
||||||
|
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||||
|
PrepareForNet(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DepthAnythingDetector:
|
||||||
|
def __init__(self, model: DPT_DINOv2, device: torch.device) -> None:
|
||||||
|
self.model = model
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load_model(
|
||||||
|
model_path: Path, device: torch.device, model_size: Literal["large", "base", "small"] = "small"
|
||||||
|
) -> DPT_DINOv2:
|
||||||
|
match model_size:
|
||||||
|
case "small":
|
||||||
|
model = DPT_DINOv2(encoder="vits", features=64, out_channels=[48, 96, 192, 384])
|
||||||
|
case "base":
|
||||||
|
model = DPT_DINOv2(encoder="vitb", features=128, out_channels=[96, 192, 384, 768])
|
||||||
|
case "large":
|
||||||
|
model = DPT_DINOv2(encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024])
|
||||||
|
|
||||||
|
model.load_state_dict(torch.load(model_path.as_posix(), map_location="cpu"))
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
model.to(device)
|
||||||
|
return model
|
||||||
|
|
||||||
|
def __call__(self, image: Image.Image, resolution: int = 512) -> Image.Image:
|
||||||
|
if not self.model:
|
||||||
|
logger.warn("DepthAnything model was not loaded. Returning original image")
|
||||||
|
return image
|
||||||
|
|
||||||
|
np_image = np.array(image, dtype=np.uint8)
|
||||||
|
np_image = np_image[:, :, ::-1] / 255.0
|
||||||
|
|
||||||
|
image_height, image_width = np_image.shape[:2]
|
||||||
|
np_image = transform({"image": np_image})["image"]
|
||||||
|
tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(self.device)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
depth = self.model(tensor_image)
|
||||||
|
depth = F.interpolate(depth[None], (image_height, image_width), mode="bilinear", align_corners=False)[0, 0]
|
||||||
|
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
||||||
|
|
||||||
|
depth_map = repeat(depth, "h w -> h w 3").cpu().numpy().astype(np.uint8)
|
||||||
|
depth_map = Image.fromarray(depth_map)
|
||||||
|
|
||||||
|
new_height = int(image_height * (resolution / image_width))
|
||||||
|
depth_map = depth_map.resize((resolution, new_height))
|
||||||
|
|
||||||
|
return depth_map
|
@ -1,31 +0,0 @@
|
|||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from PIL import Image
|
|
||||||
from transformers.pipelines import DepthEstimationPipeline
|
|
||||||
|
|
||||||
from invokeai.backend.raw_model import RawModel
|
|
||||||
|
|
||||||
|
|
||||||
class DepthAnythingPipeline(RawModel):
|
|
||||||
"""Custom wrapper for the Depth Estimation pipeline from transformers adding compatibility
|
|
||||||
for Invoke's Model Management System"""
|
|
||||||
|
|
||||||
def __init__(self, pipeline: DepthEstimationPipeline) -> None:
|
|
||||||
self._pipeline = pipeline
|
|
||||||
|
|
||||||
def generate_depth(self, image: Image.Image) -> Image.Image:
|
|
||||||
depth_map = self._pipeline(image)["depth"]
|
|
||||||
assert isinstance(depth_map, Image.Image)
|
|
||||||
return depth_map
|
|
||||||
|
|
||||||
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
|
|
||||||
if device is not None and device.type not in {"cpu", "cuda"}:
|
|
||||||
device = None
|
|
||||||
self._pipeline.model.to(device=device, dtype=dtype)
|
|
||||||
self._pipeline.device = self._pipeline.model.device
|
|
||||||
|
|
||||||
def calc_size(self) -> int:
|
|
||||||
from invokeai.backend.model_manager.load.model_util import calc_module_size
|
|
||||||
|
|
||||||
return calc_module_size(self._pipeline.model)
|
|
145
invokeai/backend/image_util/depth_anything/model/blocks.py
Normal file
145
invokeai/backend/image_util/depth_anything/model/blocks.py
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
def _make_scratch(in_shape, out_shape, groups=1, expand=False):
|
||||||
|
scratch = nn.Module()
|
||||||
|
|
||||||
|
out_shape1 = out_shape
|
||||||
|
out_shape2 = out_shape
|
||||||
|
out_shape3 = out_shape
|
||||||
|
if len(in_shape) >= 4:
|
||||||
|
out_shape4 = out_shape
|
||||||
|
|
||||||
|
if expand:
|
||||||
|
out_shape1 = out_shape
|
||||||
|
out_shape2 = out_shape * 2
|
||||||
|
out_shape3 = out_shape * 4
|
||||||
|
if len(in_shape) >= 4:
|
||||||
|
out_shape4 = out_shape * 8
|
||||||
|
|
||||||
|
scratch.layer1_rn = nn.Conv2d(
|
||||||
|
in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||||
|
)
|
||||||
|
scratch.layer2_rn = nn.Conv2d(
|
||||||
|
in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||||
|
)
|
||||||
|
scratch.layer3_rn = nn.Conv2d(
|
||||||
|
in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||||
|
)
|
||||||
|
if len(in_shape) >= 4:
|
||||||
|
scratch.layer4_rn = nn.Conv2d(
|
||||||
|
in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||||
|
)
|
||||||
|
|
||||||
|
return scratch
|
||||||
|
|
||||||
|
|
||||||
|
class ResidualConvUnit(nn.Module):
|
||||||
|
"""Residual convolution module."""
|
||||||
|
|
||||||
|
def __init__(self, features, activation, bn):
|
||||||
|
"""Init.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
features (int): number of features
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.bn = bn
|
||||||
|
|
||||||
|
self.groups = 1
|
||||||
|
|
||||||
|
self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
|
||||||
|
|
||||||
|
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
|
||||||
|
|
||||||
|
if self.bn:
|
||||||
|
self.bn1 = nn.BatchNorm2d(features)
|
||||||
|
self.bn2 = nn.BatchNorm2d(features)
|
||||||
|
|
||||||
|
self.activation = activation
|
||||||
|
|
||||||
|
self.skip_add = nn.quantized.FloatFunctional()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""Forward pass.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
x (tensor): input
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensor: output
|
||||||
|
"""
|
||||||
|
|
||||||
|
out = self.activation(x)
|
||||||
|
out = self.conv1(out)
|
||||||
|
if self.bn:
|
||||||
|
out = self.bn1(out)
|
||||||
|
|
||||||
|
out = self.activation(out)
|
||||||
|
out = self.conv2(out)
|
||||||
|
if self.bn:
|
||||||
|
out = self.bn2(out)
|
||||||
|
|
||||||
|
if self.groups > 1:
|
||||||
|
out = self.conv_merge(out)
|
||||||
|
|
||||||
|
return self.skip_add.add(out, x)
|
||||||
|
|
||||||
|
|
||||||
|
class FeatureFusionBlock(nn.Module):
|
||||||
|
"""Feature fusion block."""
|
||||||
|
|
||||||
|
def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
|
||||||
|
"""Init.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
features (int): number of features
|
||||||
|
"""
|
||||||
|
super(FeatureFusionBlock, self).__init__()
|
||||||
|
|
||||||
|
self.deconv = deconv
|
||||||
|
self.align_corners = align_corners
|
||||||
|
|
||||||
|
self.groups = 1
|
||||||
|
|
||||||
|
self.expand = expand
|
||||||
|
out_features = features
|
||||||
|
if self.expand:
|
||||||
|
out_features = features // 2
|
||||||
|
|
||||||
|
self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
|
||||||
|
|
||||||
|
self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
|
||||||
|
self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
|
||||||
|
|
||||||
|
self.skip_add = nn.quantized.FloatFunctional()
|
||||||
|
|
||||||
|
self.size = size
|
||||||
|
|
||||||
|
def forward(self, *xs, size=None):
|
||||||
|
"""Forward pass.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensor: output
|
||||||
|
"""
|
||||||
|
output = xs[0]
|
||||||
|
|
||||||
|
if len(xs) == 2:
|
||||||
|
res = self.resConfUnit1(xs[1])
|
||||||
|
output = self.skip_add.add(output, res)
|
||||||
|
|
||||||
|
output = self.resConfUnit2(output)
|
||||||
|
|
||||||
|
if (size is None) and (self.size is None):
|
||||||
|
modifier = {"scale_factor": 2}
|
||||||
|
elif size is None:
|
||||||
|
modifier = {"size": self.size}
|
||||||
|
else:
|
||||||
|
modifier = {"size": size}
|
||||||
|
|
||||||
|
output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
|
||||||
|
|
||||||
|
output = self.out_conv(output)
|
||||||
|
|
||||||
|
return output
|
183
invokeai/backend/image_util/depth_anything/model/dpt.py
Normal file
183
invokeai/backend/image_util/depth_anything/model/dpt.py
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
from invokeai.backend.image_util.depth_anything.model.blocks import FeatureFusionBlock, _make_scratch
|
||||||
|
|
||||||
|
torchhub_path = Path(__file__).parent.parent / "torchhub"
|
||||||
|
|
||||||
|
|
||||||
|
def _make_fusion_block(features, use_bn, size=None):
|
||||||
|
return FeatureFusionBlock(
|
||||||
|
features,
|
||||||
|
nn.ReLU(False),
|
||||||
|
deconv=False,
|
||||||
|
bn=use_bn,
|
||||||
|
expand=False,
|
||||||
|
align_corners=True,
|
||||||
|
size=size,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DPTHead(nn.Module):
|
||||||
|
def __init__(self, nclass, in_channels, features, out_channels, use_bn=False, use_clstoken=False):
|
||||||
|
super(DPTHead, self).__init__()
|
||||||
|
|
||||||
|
self.nclass = nclass
|
||||||
|
self.use_clstoken = use_clstoken
|
||||||
|
|
||||||
|
self.projects = nn.ModuleList(
|
||||||
|
[
|
||||||
|
nn.Conv2d(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=out_channel,
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
)
|
||||||
|
for out_channel in out_channels
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.resize_layers = nn.ModuleList(
|
||||||
|
[
|
||||||
|
nn.ConvTranspose2d(
|
||||||
|
in_channels=out_channels[0], out_channels=out_channels[0], kernel_size=4, stride=4, padding=0
|
||||||
|
),
|
||||||
|
nn.ConvTranspose2d(
|
||||||
|
in_channels=out_channels[1], out_channels=out_channels[1], kernel_size=2, stride=2, padding=0
|
||||||
|
),
|
||||||
|
nn.Identity(),
|
||||||
|
nn.Conv2d(
|
||||||
|
in_channels=out_channels[3], out_channels=out_channels[3], kernel_size=3, stride=2, padding=1
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if use_clstoken:
|
||||||
|
self.readout_projects = nn.ModuleList()
|
||||||
|
for _ in range(len(self.projects)):
|
||||||
|
self.readout_projects.append(nn.Sequential(nn.Linear(2 * in_channels, in_channels), nn.GELU()))
|
||||||
|
|
||||||
|
self.scratch = _make_scratch(
|
||||||
|
out_channels,
|
||||||
|
features,
|
||||||
|
groups=1,
|
||||||
|
expand=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.scratch.stem_transpose = None
|
||||||
|
|
||||||
|
self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
|
||||||
|
self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
|
||||||
|
self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
|
||||||
|
self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
|
||||||
|
|
||||||
|
head_features_1 = features
|
||||||
|
head_features_2 = 32
|
||||||
|
|
||||||
|
if nclass > 1:
|
||||||
|
self.scratch.output_conv = nn.Sequential(
|
||||||
|
nn.Conv2d(head_features_1, head_features_1, kernel_size=3, stride=1, padding=1),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Conv2d(head_features_1, nclass, kernel_size=1, stride=1, padding=0),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.scratch.output_conv1 = nn.Conv2d(
|
||||||
|
head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1
|
||||||
|
)
|
||||||
|
|
||||||
|
self.scratch.output_conv2 = nn.Sequential(
|
||||||
|
nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
|
||||||
|
nn.ReLU(True),
|
||||||
|
nn.Identity(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, out_features, patch_h, patch_w):
|
||||||
|
out = []
|
||||||
|
for i, x in enumerate(out_features):
|
||||||
|
if self.use_clstoken:
|
||||||
|
x, cls_token = x[0], x[1]
|
||||||
|
readout = cls_token.unsqueeze(1).expand_as(x)
|
||||||
|
x = self.readout_projects[i](torch.cat((x, readout), -1))
|
||||||
|
else:
|
||||||
|
x = x[0]
|
||||||
|
|
||||||
|
x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
|
||||||
|
|
||||||
|
x = self.projects[i](x)
|
||||||
|
x = self.resize_layers[i](x)
|
||||||
|
|
||||||
|
out.append(x)
|
||||||
|
|
||||||
|
layer_1, layer_2, layer_3, layer_4 = out
|
||||||
|
|
||||||
|
layer_1_rn = self.scratch.layer1_rn(layer_1)
|
||||||
|
layer_2_rn = self.scratch.layer2_rn(layer_2)
|
||||||
|
layer_3_rn = self.scratch.layer3_rn(layer_3)
|
||||||
|
layer_4_rn = self.scratch.layer4_rn(layer_4)
|
||||||
|
|
||||||
|
path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
|
||||||
|
path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
|
||||||
|
path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
|
||||||
|
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
||||||
|
|
||||||
|
out = self.scratch.output_conv1(path_1)
|
||||||
|
out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
|
||||||
|
out = self.scratch.output_conv2(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class DPT_DINOv2(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
features,
|
||||||
|
out_channels,
|
||||||
|
encoder="vitl",
|
||||||
|
use_bn=False,
|
||||||
|
use_clstoken=False,
|
||||||
|
):
|
||||||
|
super(DPT_DINOv2, self).__init__()
|
||||||
|
|
||||||
|
assert encoder in ["vits", "vitb", "vitl"]
|
||||||
|
|
||||||
|
# # in case the Internet connection is not stable, please load the DINOv2 locally
|
||||||
|
# if use_local:
|
||||||
|
# self.pretrained = torch.hub.load(
|
||||||
|
# torchhub_path / "facebookresearch_dinov2_main",
|
||||||
|
# "dinov2_{:}14".format(encoder),
|
||||||
|
# source="local",
|
||||||
|
# pretrained=False,
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# self.pretrained = torch.hub.load(
|
||||||
|
# "facebookresearch/dinov2",
|
||||||
|
# "dinov2_{:}14".format(encoder),
|
||||||
|
# )
|
||||||
|
|
||||||
|
self.pretrained = torch.hub.load(
|
||||||
|
"facebookresearch/dinov2",
|
||||||
|
"dinov2_{:}14".format(encoder),
|
||||||
|
)
|
||||||
|
|
||||||
|
dim = self.pretrained.blocks[0].attn.qkv.in_features
|
||||||
|
|
||||||
|
self.depth_head = DPTHead(1, dim, features, out_channels=out_channels, use_bn=use_bn, use_clstoken=use_clstoken)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
h, w = x.shape[-2:]
|
||||||
|
|
||||||
|
features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
|
||||||
|
|
||||||
|
patch_h, patch_w = h // 14, w // 14
|
||||||
|
|
||||||
|
depth = self.depth_head(features, patch_h, patch_w)
|
||||||
|
depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
|
||||||
|
depth = F.relu(depth)
|
||||||
|
|
||||||
|
return depth.squeeze(1)
|
227
invokeai/backend/image_util/depth_anything/utilities/util.py
Normal file
227
invokeai/backend/image_util/depth_anything/utilities/util.py
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
||||||
|
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sample (dict): sample
|
||||||
|
size (tuple): image size
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: new size
|
||||||
|
"""
|
||||||
|
shape = list(sample["disparity"].shape)
|
||||||
|
|
||||||
|
if shape[0] >= size[0] and shape[1] >= size[1]:
|
||||||
|
return sample
|
||||||
|
|
||||||
|
scale = [0, 0]
|
||||||
|
scale[0] = size[0] / shape[0]
|
||||||
|
scale[1] = size[1] / shape[1]
|
||||||
|
|
||||||
|
scale = max(scale)
|
||||||
|
|
||||||
|
shape[0] = math.ceil(scale * shape[0])
|
||||||
|
shape[1] = math.ceil(scale * shape[1])
|
||||||
|
|
||||||
|
# resize
|
||||||
|
sample["image"] = cv2.resize(sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method)
|
||||||
|
|
||||||
|
sample["disparity"] = cv2.resize(sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST)
|
||||||
|
sample["mask"] = cv2.resize(
|
||||||
|
sample["mask"].astype(np.float32),
|
||||||
|
tuple(shape[::-1]),
|
||||||
|
interpolation=cv2.INTER_NEAREST,
|
||||||
|
)
|
||||||
|
sample["mask"] = sample["mask"].astype(bool)
|
||||||
|
|
||||||
|
return tuple(shape)
|
||||||
|
|
||||||
|
|
||||||
|
class Resize(object):
|
||||||
|
"""Resize sample to given size (width, height)."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
resize_target=True,
|
||||||
|
keep_aspect_ratio=False,
|
||||||
|
ensure_multiple_of=1,
|
||||||
|
resize_method="lower_bound",
|
||||||
|
image_interpolation_method=cv2.INTER_AREA,
|
||||||
|
):
|
||||||
|
"""Init.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
width (int): desired output width
|
||||||
|
height (int): desired output height
|
||||||
|
resize_target (bool, optional):
|
||||||
|
True: Resize the full sample (image, mask, target).
|
||||||
|
False: Resize image only.
|
||||||
|
Defaults to True.
|
||||||
|
keep_aspect_ratio (bool, optional):
|
||||||
|
True: Keep the aspect ratio of the input sample.
|
||||||
|
Output sample might not have the given width and height, and
|
||||||
|
resize behaviour depends on the parameter 'resize_method'.
|
||||||
|
Defaults to False.
|
||||||
|
ensure_multiple_of (int, optional):
|
||||||
|
Output width and height is constrained to be multiple of this parameter.
|
||||||
|
Defaults to 1.
|
||||||
|
resize_method (str, optional):
|
||||||
|
"lower_bound": Output will be at least as large as the given size.
|
||||||
|
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller
|
||||||
|
than given size.)
|
||||||
|
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
||||||
|
Defaults to "lower_bound".
|
||||||
|
"""
|
||||||
|
self.__width = width
|
||||||
|
self.__height = height
|
||||||
|
|
||||||
|
self.__resize_target = resize_target
|
||||||
|
self.__keep_aspect_ratio = keep_aspect_ratio
|
||||||
|
self.__multiple_of = ensure_multiple_of
|
||||||
|
self.__resize_method = resize_method
|
||||||
|
self.__image_interpolation_method = image_interpolation_method
|
||||||
|
|
||||||
|
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
||||||
|
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||||
|
|
||||||
|
if max_val is not None and y > max_val:
|
||||||
|
y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||||
|
|
||||||
|
if y < min_val:
|
||||||
|
y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||||
|
|
||||||
|
return y
|
||||||
|
|
||||||
|
def get_size(self, width, height):
|
||||||
|
# determine new height and width
|
||||||
|
scale_height = self.__height / height
|
||||||
|
scale_width = self.__width / width
|
||||||
|
|
||||||
|
if self.__keep_aspect_ratio:
|
||||||
|
if self.__resize_method == "lower_bound":
|
||||||
|
# scale such that output size is lower bound
|
||||||
|
if scale_width > scale_height:
|
||||||
|
# fit width
|
||||||
|
scale_height = scale_width
|
||||||
|
else:
|
||||||
|
# fit height
|
||||||
|
scale_width = scale_height
|
||||||
|
elif self.__resize_method == "upper_bound":
|
||||||
|
# scale such that output size is upper bound
|
||||||
|
if scale_width < scale_height:
|
||||||
|
# fit width
|
||||||
|
scale_height = scale_width
|
||||||
|
else:
|
||||||
|
# fit height
|
||||||
|
scale_width = scale_height
|
||||||
|
elif self.__resize_method == "minimal":
|
||||||
|
# scale as least as possbile
|
||||||
|
if abs(1 - scale_width) < abs(1 - scale_height):
|
||||||
|
# fit width
|
||||||
|
scale_height = scale_width
|
||||||
|
else:
|
||||||
|
# fit height
|
||||||
|
scale_width = scale_height
|
||||||
|
else:
|
||||||
|
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
||||||
|
|
||||||
|
if self.__resize_method == "lower_bound":
|
||||||
|
new_height = self.constrain_to_multiple_of(scale_height * height, min_val=self.__height)
|
||||||
|
new_width = self.constrain_to_multiple_of(scale_width * width, min_val=self.__width)
|
||||||
|
elif self.__resize_method == "upper_bound":
|
||||||
|
new_height = self.constrain_to_multiple_of(scale_height * height, max_val=self.__height)
|
||||||
|
new_width = self.constrain_to_multiple_of(scale_width * width, max_val=self.__width)
|
||||||
|
elif self.__resize_method == "minimal":
|
||||||
|
new_height = self.constrain_to_multiple_of(scale_height * height)
|
||||||
|
new_width = self.constrain_to_multiple_of(scale_width * width)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
||||||
|
|
||||||
|
return (new_width, new_height)
|
||||||
|
|
||||||
|
def __call__(self, sample):
|
||||||
|
width, height = self.get_size(sample["image"].shape[1], sample["image"].shape[0])
|
||||||
|
|
||||||
|
# resize sample
|
||||||
|
sample["image"] = cv2.resize(
|
||||||
|
sample["image"],
|
||||||
|
(width, height),
|
||||||
|
interpolation=self.__image_interpolation_method,
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.__resize_target:
|
||||||
|
if "disparity" in sample:
|
||||||
|
sample["disparity"] = cv2.resize(
|
||||||
|
sample["disparity"],
|
||||||
|
(width, height),
|
||||||
|
interpolation=cv2.INTER_NEAREST,
|
||||||
|
)
|
||||||
|
|
||||||
|
if "depth" in sample:
|
||||||
|
sample["depth"] = cv2.resize(sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST)
|
||||||
|
|
||||||
|
if "semseg_mask" in sample:
|
||||||
|
# sample["semseg_mask"] = cv2.resize(
|
||||||
|
# sample["semseg_mask"], (width, height), interpolation=cv2.INTER_NEAREST
|
||||||
|
# )
|
||||||
|
sample["semseg_mask"] = F.interpolate(
|
||||||
|
torch.from_numpy(sample["semseg_mask"]).float()[None, None, ...], (height, width), mode="nearest"
|
||||||
|
).numpy()[0, 0]
|
||||||
|
|
||||||
|
if "mask" in sample:
|
||||||
|
sample["mask"] = cv2.resize(
|
||||||
|
sample["mask"].astype(np.float32),
|
||||||
|
(width, height),
|
||||||
|
interpolation=cv2.INTER_NEAREST,
|
||||||
|
)
|
||||||
|
# sample["mask"] = sample["mask"].astype(bool)
|
||||||
|
|
||||||
|
# print(sample['image'].shape, sample['depth'].shape)
|
||||||
|
return sample
|
||||||
|
|
||||||
|
|
||||||
|
class NormalizeImage(object):
|
||||||
|
"""Normlize image by given mean and std."""
|
||||||
|
|
||||||
|
def __init__(self, mean, std):
|
||||||
|
self.__mean = mean
|
||||||
|
self.__std = std
|
||||||
|
|
||||||
|
def __call__(self, sample):
|
||||||
|
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
||||||
|
|
||||||
|
return sample
|
||||||
|
|
||||||
|
|
||||||
|
class PrepareForNet(object):
|
||||||
|
"""Prepare sample for usage as network input."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __call__(self, sample):
|
||||||
|
image = np.transpose(sample["image"], (2, 0, 1))
|
||||||
|
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
||||||
|
|
||||||
|
if "mask" in sample:
|
||||||
|
sample["mask"] = sample["mask"].astype(np.float32)
|
||||||
|
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
||||||
|
|
||||||
|
if "depth" in sample:
|
||||||
|
depth = sample["depth"].astype(np.float32)
|
||||||
|
sample["depth"] = np.ascontiguousarray(depth)
|
||||||
|
|
||||||
|
if "semseg_mask" in sample:
|
||||||
|
sample["semseg_mask"] = sample["semseg_mask"].astype(np.float32)
|
||||||
|
sample["semseg_mask"] = np.ascontiguousarray(sample["semseg_mask"])
|
||||||
|
|
||||||
|
return sample
|
129
invokeai/backend/load_flux_model.py
Normal file
129
invokeai/backend/load_flux_model.py
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from diffusers.models.model_loading_utils import load_state_dict
|
||||||
|
from diffusers.models.transformers.transformer_flux import FluxTransformer2DModel
|
||||||
|
from diffusers.utils import (
|
||||||
|
CONFIG_NAME,
|
||||||
|
SAFE_WEIGHTS_INDEX_NAME,
|
||||||
|
SAFETENSORS_WEIGHTS_NAME,
|
||||||
|
_get_checkpoint_shard_files,
|
||||||
|
is_accelerate_available,
|
||||||
|
)
|
||||||
|
from optimum.quanto import qfloat8
|
||||||
|
from optimum.quanto.models import QuantizedDiffusersModel
|
||||||
|
from optimum.quanto.models.shared_dict import ShardedStateDict
|
||||||
|
|
||||||
|
from invokeai.backend.requantize import requantize
|
||||||
|
|
||||||
|
|
||||||
|
class QuantizedFluxTransformer2DModel(QuantizedDiffusersModel):
|
||||||
|
base_class = FluxTransformer2DModel
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]):
|
||||||
|
if cls.base_class is None:
|
||||||
|
raise ValueError("The `base_class` attribute needs to be configured.")
|
||||||
|
|
||||||
|
if not is_accelerate_available():
|
||||||
|
raise ValueError("Reloading a quantized diffusers model requires the accelerate library.")
|
||||||
|
from accelerate import init_empty_weights
|
||||||
|
|
||||||
|
if os.path.isdir(model_name_or_path):
|
||||||
|
# Look for a quantization map
|
||||||
|
qmap_path = os.path.join(model_name_or_path, cls._qmap_name())
|
||||||
|
if not os.path.exists(qmap_path):
|
||||||
|
raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?")
|
||||||
|
|
||||||
|
# Look for original model config file.
|
||||||
|
model_config_path = os.path.join(model_name_or_path, CONFIG_NAME)
|
||||||
|
if not os.path.exists(model_config_path):
|
||||||
|
raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.")
|
||||||
|
|
||||||
|
with open(qmap_path, "r", encoding="utf-8") as f:
|
||||||
|
qmap = json.load(f)
|
||||||
|
|
||||||
|
with open(model_config_path, "r", encoding="utf-8") as f:
|
||||||
|
original_model_cls_name = json.load(f)["_class_name"]
|
||||||
|
configured_cls_name = cls.base_class.__name__
|
||||||
|
if configured_cls_name != original_model_cls_name:
|
||||||
|
raise ValueError(
|
||||||
|
f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an empty model
|
||||||
|
config = cls.base_class.load_config(model_name_or_path)
|
||||||
|
with init_empty_weights():
|
||||||
|
model = cls.base_class.from_config(config)
|
||||||
|
|
||||||
|
# Look for the index of a sharded checkpoint
|
||||||
|
checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME)
|
||||||
|
if os.path.exists(checkpoint_file):
|
||||||
|
# Convert the checkpoint path to a list of shards
|
||||||
|
_, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file)
|
||||||
|
# Create a mapping for the sharded safetensor files
|
||||||
|
state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"])
|
||||||
|
else:
|
||||||
|
# Look for a single checkpoint file
|
||||||
|
checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME)
|
||||||
|
if not os.path.exists(checkpoint_file):
|
||||||
|
raise ValueError(f"No safetensor weights found in {model_name_or_path}.")
|
||||||
|
# Get state_dict from model checkpoint
|
||||||
|
state_dict = load_state_dict(checkpoint_file)
|
||||||
|
|
||||||
|
# Requantize and load quantized weights from state_dict
|
||||||
|
requantize(model, state_dict=state_dict, quantization_map=qmap)
|
||||||
|
model.eval()
|
||||||
|
return cls(model)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.")
|
||||||
|
|
||||||
|
|
||||||
|
def load_flux_transformer(path: Path) -> FluxTransformer2DModel:
|
||||||
|
# model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
|
||||||
|
model_8bit_path = path / "quantized"
|
||||||
|
if model_8bit_path.exists():
|
||||||
|
# The quantized model exists, load it.
|
||||||
|
# TODO(ryand): The requantize(...) operation in from_pretrained(...) is very slow. This seems like
|
||||||
|
# something that we should be able to make much faster.
|
||||||
|
q_model = QuantizedFluxTransformer2DModel.from_pretrained(model_8bit_path)
|
||||||
|
|
||||||
|
# Access the underlying wrapped model.
|
||||||
|
# We access the wrapped model, even though it is private, because it simplifies the type checking by
|
||||||
|
# always returning a FluxTransformer2DModel from this function.
|
||||||
|
model = q_model._wrapped
|
||||||
|
else:
|
||||||
|
# The quantized model does not exist yet, quantize and save it.
|
||||||
|
# TODO(ryand): Loading in float16 and then quantizing seems to result in NaNs. In order to run this on
|
||||||
|
# GPUs that don't support bfloat16, we would need to host the quantized model instead of generating it
|
||||||
|
# here.
|
||||||
|
model = FluxTransformer2DModel.from_pretrained(path, local_files_only=True, torch_dtype=torch.bfloat16)
|
||||||
|
assert isinstance(model, FluxTransformer2DModel)
|
||||||
|
|
||||||
|
q_model = QuantizedFluxTransformer2DModel.quantize(model, weights=qfloat8)
|
||||||
|
|
||||||
|
model_8bit_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
q_model.save_pretrained(model_8bit_path)
|
||||||
|
|
||||||
|
# (See earlier comment about accessing the wrapped model.)
|
||||||
|
model = q_model._wrapped
|
||||||
|
|
||||||
|
assert isinstance(model, FluxTransformer2DModel)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
start = time.time()
|
||||||
|
model = load_flux_transformer(
|
||||||
|
Path("/data/invokeai/models/.download_cache/black-forest-labs_flux.1-schnell/FLUX.1-schnell/transformer/")
|
||||||
|
)
|
||||||
|
print(f"Time to load: {time.time() - start}s")
|
||||||
|
print("hi")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -220,17 +220,11 @@ class LoKRLayer(LoRALayerBase):
|
|||||||
if self.w1 is None:
|
if self.w1 is None:
|
||||||
self.w1_a = values["lokr_w1_a"]
|
self.w1_a = values["lokr_w1_a"]
|
||||||
self.w1_b = values["lokr_w1_b"]
|
self.w1_b = values["lokr_w1_b"]
|
||||||
else:
|
|
||||||
self.w1_b = None
|
|
||||||
self.w1_a = None
|
|
||||||
|
|
||||||
self.w2 = values.get("lokr_w2", None)
|
self.w2 = values.get("lokr_w2", None)
|
||||||
if self.w2 is None:
|
if self.w2 is None:
|
||||||
self.w2_a = values["lokr_w2_a"]
|
self.w2_a = values["lokr_w2_a"]
|
||||||
self.w2_b = values["lokr_w2_b"]
|
self.w2_b = values["lokr_w2_b"]
|
||||||
else:
|
|
||||||
self.w2_a = None
|
|
||||||
self.w2_b = None
|
|
||||||
|
|
||||||
self.t2 = values.get("lokr_t2", None)
|
self.t2 = values.get("lokr_t2", None)
|
||||||
|
|
||||||
@ -378,39 +372,7 @@ class IA3Layer(LoRALayerBase):
|
|||||||
self.on_input = self.on_input.to(device=device, dtype=dtype)
|
self.on_input = self.on_input.to(device=device, dtype=dtype)
|
||||||
|
|
||||||
|
|
||||||
class NormLayer(LoRALayerBase):
|
AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer]
|
||||||
# bias handled in LoRALayerBase(calc_size, to)
|
|
||||||
# weight: torch.Tensor
|
|
||||||
# bias: Optional[torch.Tensor]
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
layer_key: str,
|
|
||||||
values: Dict[str, torch.Tensor],
|
|
||||||
):
|
|
||||||
super().__init__(layer_key, values)
|
|
||||||
|
|
||||||
self.weight = values["w_norm"]
|
|
||||||
self.bias = values.get("b_norm", None)
|
|
||||||
|
|
||||||
self.rank = None # unscaled
|
|
||||||
self.check_keys(values, {"w_norm", "b_norm"})
|
|
||||||
|
|
||||||
def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
|
|
||||||
return self.weight
|
|
||||||
|
|
||||||
def calc_size(self) -> int:
|
|
||||||
model_size = super().calc_size()
|
|
||||||
model_size += self.weight.nelement() * self.weight.element_size()
|
|
||||||
return model_size
|
|
||||||
|
|
||||||
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
|
|
||||||
super().to(device=device, dtype=dtype)
|
|
||||||
|
|
||||||
self.weight = self.weight.to(device=device, dtype=dtype)
|
|
||||||
|
|
||||||
|
|
||||||
AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer, NormLayer]
|
|
||||||
|
|
||||||
|
|
||||||
class LoRAModelRaw(RawModel): # (torch.nn.Module):
|
class LoRAModelRaw(RawModel): # (torch.nn.Module):
|
||||||
@ -551,10 +513,6 @@ class LoRAModelRaw(RawModel): # (torch.nn.Module):
|
|||||||
elif "on_input" in values:
|
elif "on_input" in values:
|
||||||
layer = IA3Layer(layer_key, values)
|
layer = IA3Layer(layer_key, values)
|
||||||
|
|
||||||
# norms
|
|
||||||
elif "w_norm" in values:
|
|
||||||
layer = NormLayer(layer_key, values)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
|
print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
|
||||||
raise Exception("Unknown lora format!")
|
raise Exception("Unknown lora format!")
|
||||||
|
@ -11,7 +11,6 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
|||||||
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
||||||
from transformers import CLIPTokenizer
|
from transformers import CLIPTokenizer
|
||||||
|
|
||||||
from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
|
|
||||||
from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
|
from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
|
||||||
from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
|
from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
|
||||||
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
|
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
|
||||||
@ -46,7 +45,6 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
|
|||||||
SpandrelImageToImageModel,
|
SpandrelImageToImageModel,
|
||||||
GroundingDinoPipeline,
|
GroundingDinoPipeline,
|
||||||
SegmentAnythingPipeline,
|
SegmentAnythingPipeline,
|
||||||
DepthAnythingPipeline,
|
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
return model.calc_size()
|
return model.calc_size()
|
||||||
|
@ -54,6 +54,7 @@ def filter_files(
|
|||||||
"lora_weights.safetensors",
|
"lora_weights.safetensors",
|
||||||
"weights.pb",
|
"weights.pb",
|
||||||
"onnx_data",
|
"onnx_data",
|
||||||
|
"spiece.model", # Added for `black-forest-labs/FLUX.1-schnell`.
|
||||||
)
|
)
|
||||||
):
|
):
|
||||||
paths.append(file)
|
paths.append(file)
|
||||||
@ -62,7 +63,7 @@ def filter_files(
|
|||||||
# downloading random checkpoints that might also be in the repo. However there is no guarantee
|
# downloading random checkpoints that might also be in the repo. However there is no guarantee
|
||||||
# that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models
|
# that a checkpoint doesn't contain "model" in its name, and no guarantee that future diffusers models
|
||||||
# will adhere to this naming convention, so this is an area to be careful of.
|
# will adhere to this naming convention, so this is an area to be careful of.
|
||||||
elif re.search(r"model(\.[^.]+)?\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
|
elif re.search(r"model.*\.(safetensors|bin|onnx|xml|pth|pt|ckpt|msgpack)$", file.name):
|
||||||
paths.append(file)
|
paths.append(file)
|
||||||
|
|
||||||
# limit search to subfolder if requested
|
# limit search to subfolder if requested
|
||||||
@ -97,7 +98,9 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
|
|||||||
if variant == ModelRepoVariant.Flax:
|
if variant == ModelRepoVariant.Flax:
|
||||||
result.add(path)
|
result.add(path)
|
||||||
|
|
||||||
elif path.suffix in [".json", ".txt"]:
|
# Note: '.model' was added to support:
|
||||||
|
# https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/768d12a373ed5cc9ef9a9dea7504dc09fcc14842/tokenizer_2/spiece.model
|
||||||
|
elif path.suffix in [".json", ".txt", ".model"]:
|
||||||
result.add(path)
|
result.add(path)
|
||||||
|
|
||||||
elif variant in [
|
elif variant in [
|
||||||
@ -140,6 +143,23 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for candidate_list in subfolder_weights.values():
|
for candidate_list in subfolder_weights.values():
|
||||||
|
# Check if at least one of the files has the explicit fp16 variant.
|
||||||
|
at_least_one_fp16 = False
|
||||||
|
for candidate in candidate_list:
|
||||||
|
if len(candidate.path.suffixes) == 2 and candidate.path.suffixes[0] == ".fp16":
|
||||||
|
at_least_one_fp16 = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not at_least_one_fp16:
|
||||||
|
# If none of the candidates in this candidate_list have the explicit fp16 variant label, then this
|
||||||
|
# candidate_list probably doesn't adhere to the variant naming convention that we expected. In this case,
|
||||||
|
# we'll simply keep all the candidates. An example of a model that hits this case is
|
||||||
|
# `black-forest-labs/FLUX.1-schnell` (as of commit 012d2fd).
|
||||||
|
for candidate in candidate_list:
|
||||||
|
result.add(candidate.path)
|
||||||
|
|
||||||
|
# The candidate_list seems to have the expected variant naming convention. We'll select the highest scoring
|
||||||
|
# candidate.
|
||||||
highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score)
|
highest_score_candidate = max(candidate_list, key=lambda candidate: candidate.score)
|
||||||
if highest_score_candidate:
|
if highest_score_candidate:
|
||||||
result.add(highest_score_candidate.path)
|
result.add(highest_score_candidate.path)
|
||||||
|
@ -0,0 +1,77 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from diffusers.models.model_loading_utils import load_state_dict
|
||||||
|
from diffusers.utils import (
|
||||||
|
CONFIG_NAME,
|
||||||
|
SAFE_WEIGHTS_INDEX_NAME,
|
||||||
|
SAFETENSORS_WEIGHTS_NAME,
|
||||||
|
_get_checkpoint_shard_files,
|
||||||
|
is_accelerate_available,
|
||||||
|
)
|
||||||
|
from optimum.quanto.models import QuantizedDiffusersModel
|
||||||
|
from optimum.quanto.models.shared_dict import ShardedStateDict
|
||||||
|
|
||||||
|
from invokeai.backend.requantize import requantize
|
||||||
|
|
||||||
|
|
||||||
|
class FastQuantizedDiffusersModel(QuantizedDiffusersModel):
|
||||||
|
@classmethod
|
||||||
|
def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]):
|
||||||
|
"""We override the `from_pretrained()` method in order to use our custom `requantize()` implementation."""
|
||||||
|
if cls.base_class is None:
|
||||||
|
raise ValueError("The `base_class` attribute needs to be configured.")
|
||||||
|
|
||||||
|
if not is_accelerate_available():
|
||||||
|
raise ValueError("Reloading a quantized diffusers model requires the accelerate library.")
|
||||||
|
from accelerate import init_empty_weights
|
||||||
|
|
||||||
|
if os.path.isdir(model_name_or_path):
|
||||||
|
# Look for a quantization map
|
||||||
|
qmap_path = os.path.join(model_name_or_path, cls._qmap_name())
|
||||||
|
if not os.path.exists(qmap_path):
|
||||||
|
raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?")
|
||||||
|
|
||||||
|
# Look for original model config file.
|
||||||
|
model_config_path = os.path.join(model_name_or_path, CONFIG_NAME)
|
||||||
|
if not os.path.exists(model_config_path):
|
||||||
|
raise ValueError(f"{CONFIG_NAME} not found in {model_name_or_path}.")
|
||||||
|
|
||||||
|
with open(qmap_path, "r", encoding="utf-8") as f:
|
||||||
|
qmap = json.load(f)
|
||||||
|
|
||||||
|
with open(model_config_path, "r", encoding="utf-8") as f:
|
||||||
|
original_model_cls_name = json.load(f)["_class_name"]
|
||||||
|
configured_cls_name = cls.base_class.__name__
|
||||||
|
if configured_cls_name != original_model_cls_name:
|
||||||
|
raise ValueError(
|
||||||
|
f"Configured base class ({configured_cls_name}) differs from what was derived from the provided configuration ({original_model_cls_name})."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an empty model
|
||||||
|
config = cls.base_class.load_config(model_name_or_path)
|
||||||
|
with init_empty_weights():
|
||||||
|
model = cls.base_class.from_config(config)
|
||||||
|
|
||||||
|
# Look for the index of a sharded checkpoint
|
||||||
|
checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME)
|
||||||
|
if os.path.exists(checkpoint_file):
|
||||||
|
# Convert the checkpoint path to a list of shards
|
||||||
|
_, sharded_metadata = _get_checkpoint_shard_files(model_name_or_path, checkpoint_file)
|
||||||
|
# Create a mapping for the sharded safetensor files
|
||||||
|
state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"])
|
||||||
|
else:
|
||||||
|
# Look for a single checkpoint file
|
||||||
|
checkpoint_file = os.path.join(model_name_or_path, SAFETENSORS_WEIGHTS_NAME)
|
||||||
|
if not os.path.exists(checkpoint_file):
|
||||||
|
raise ValueError(f"No safetensor weights found in {model_name_or_path}.")
|
||||||
|
# Get state_dict from model checkpoint
|
||||||
|
state_dict = load_state_dict(checkpoint_file)
|
||||||
|
|
||||||
|
# Requantize and load quantized weights from state_dict
|
||||||
|
requantize(model, state_dict=state_dict, quantization_map=qmap)
|
||||||
|
model.eval()
|
||||||
|
return cls(model)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.")
|
@ -0,0 +1,61 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from optimum.quanto.models import QuantizedTransformersModel
|
||||||
|
from optimum.quanto.models.shared_dict import ShardedStateDict
|
||||||
|
from transformers import AutoConfig
|
||||||
|
from transformers.modeling_utils import get_checkpoint_shard_files, load_state_dict
|
||||||
|
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, is_accelerate_available
|
||||||
|
|
||||||
|
from invokeai.backend.requantize import requantize
|
||||||
|
|
||||||
|
|
||||||
|
class FastQuantizedTransformersModel(QuantizedTransformersModel):
|
||||||
|
@classmethod
|
||||||
|
def from_pretrained(cls, model_name_or_path: Union[str, os.PathLike]):
|
||||||
|
"""We override the `from_pretrained()` method in order to use our custom `requantize()` implementation."""
|
||||||
|
if cls.auto_class is None:
|
||||||
|
raise ValueError(
|
||||||
|
"Quantized models cannot be reloaded using {cls}: use a specialized quantized class such as QuantizedModelForCausalLM instead."
|
||||||
|
)
|
||||||
|
if not is_accelerate_available():
|
||||||
|
raise ValueError("Reloading a quantized transformers model requires the accelerate library.")
|
||||||
|
from accelerate import init_empty_weights
|
||||||
|
|
||||||
|
if os.path.isdir(model_name_or_path):
|
||||||
|
# Look for a quantization map
|
||||||
|
qmap_path = os.path.join(model_name_or_path, cls._qmap_name())
|
||||||
|
if not os.path.exists(qmap_path):
|
||||||
|
raise ValueError(f"No quantization map found in {model_name_or_path}: is this a quantized model ?")
|
||||||
|
with open(qmap_path, "r", encoding="utf-8") as f:
|
||||||
|
qmap = json.load(f)
|
||||||
|
# Create an empty model
|
||||||
|
config = AutoConfig.from_pretrained(model_name_or_path)
|
||||||
|
with init_empty_weights():
|
||||||
|
model = cls.auto_class.from_config(config)
|
||||||
|
# Look for the index of a sharded checkpoint
|
||||||
|
checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_INDEX_NAME)
|
||||||
|
if os.path.exists(checkpoint_file):
|
||||||
|
# Convert the checkpoint path to a list of shards
|
||||||
|
checkpoint_file, sharded_metadata = get_checkpoint_shard_files(model_name_or_path, checkpoint_file)
|
||||||
|
# Create a mapping for the sharded safetensor files
|
||||||
|
state_dict = ShardedStateDict(model_name_or_path, sharded_metadata["weight_map"])
|
||||||
|
else:
|
||||||
|
# Look for a single checkpoint file
|
||||||
|
checkpoint_file = os.path.join(model_name_or_path, SAFE_WEIGHTS_NAME)
|
||||||
|
if not os.path.exists(checkpoint_file):
|
||||||
|
raise ValueError(f"No safetensor weights found in {model_name_or_path}.")
|
||||||
|
# Get state_dict from model checkpoint
|
||||||
|
state_dict = load_state_dict(checkpoint_file)
|
||||||
|
# Requantize and load quantized weights from state_dict
|
||||||
|
requantize(model, state_dict=state_dict, quantization_map=qmap)
|
||||||
|
if getattr(model.config, "tie_word_embeddings", True):
|
||||||
|
# Tie output weight embeddings to input weight embeddings
|
||||||
|
# Note that if they were quantized they would NOT be tied
|
||||||
|
model.tie_weights()
|
||||||
|
# Set model in evaluation mode as it is done in transformers
|
||||||
|
model.eval()
|
||||||
|
return cls(model)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("Reloading quantized models directly from the hub is not supported yet.")
|
53
invokeai/backend/requantize.py
Normal file
53
invokeai/backend/requantize.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from optimum.quanto.quantize import _quantize_submodule
|
||||||
|
|
||||||
|
# def custom_freeze(model: torch.nn.Module):
|
||||||
|
# for name, m in model.named_modules():
|
||||||
|
# if isinstance(m, QModuleMixin):
|
||||||
|
# m.weight =
|
||||||
|
# m.freeze()
|
||||||
|
|
||||||
|
|
||||||
|
def requantize(
|
||||||
|
model: torch.nn.Module,
|
||||||
|
state_dict: Dict[str, Any],
|
||||||
|
quantization_map: Dict[str, Dict[str, str]],
|
||||||
|
device: torch.device = None,
|
||||||
|
):
|
||||||
|
if device is None:
|
||||||
|
device = next(model.parameters()).device
|
||||||
|
if device.type == "meta":
|
||||||
|
device = torch.device("cpu")
|
||||||
|
|
||||||
|
# Quantize the model with parameters from the quantization map
|
||||||
|
for name, m in model.named_modules():
|
||||||
|
qconfig = quantization_map.get(name, None)
|
||||||
|
if qconfig is not None:
|
||||||
|
weights = qconfig["weights"]
|
||||||
|
if weights == "none":
|
||||||
|
weights = None
|
||||||
|
activations = qconfig["activations"]
|
||||||
|
if activations == "none":
|
||||||
|
activations = None
|
||||||
|
_quantize_submodule(model, name, m, weights=weights, activations=activations)
|
||||||
|
|
||||||
|
# Move model parameters and buffers to CPU before materializing quantized weights
|
||||||
|
for name, m in model.named_modules():
|
||||||
|
|
||||||
|
def move_tensor(t, device):
|
||||||
|
if t.device.type == "meta":
|
||||||
|
return torch.empty_like(t, device=device)
|
||||||
|
return t.to(device)
|
||||||
|
|
||||||
|
for name, param in m.named_parameters(recurse=False):
|
||||||
|
setattr(m, name, torch.nn.Parameter(move_tensor(param, "cpu")))
|
||||||
|
for name, param in m.named_buffers(recurse=False):
|
||||||
|
setattr(m, name, move_tensor(param, "cpu"))
|
||||||
|
# Freeze model and move to target device
|
||||||
|
# freeze(model)
|
||||||
|
# model.to(device)
|
||||||
|
|
||||||
|
# Load the quantized model weights
|
||||||
|
model.load_state_dict(state_dict, strict=False)
|
@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|||||||
import diffusers
|
import diffusers
|
||||||
import torch
|
import torch
|
||||||
from diffusers.configuration_utils import ConfigMixin, register_to_config
|
from diffusers.configuration_utils import ConfigMixin, register_to_config
|
||||||
from diffusers.loaders import FromOriginalControlNetMixin
|
from diffusers.loaders.single_file_model import FromOriginalModelMixin
|
||||||
from diffusers.models.attention_processor import AttentionProcessor, AttnProcessor
|
from diffusers.models.attention_processor import AttentionProcessor, AttnProcessor
|
||||||
from diffusers.models.controlnet import ControlNetConditioningEmbedding, ControlNetOutput, zero_module
|
from diffusers.models.controlnet import ControlNetConditioningEmbedding, ControlNetOutput, zero_module
|
||||||
from diffusers.models.embeddings import (
|
from diffusers.models.embeddings import (
|
||||||
@ -32,7 +32,7 @@ from invokeai.backend.util.logging import InvokeAILogger
|
|||||||
logger = InvokeAILogger.get_logger(__name__)
|
logger = InvokeAILogger.get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlNetMixin):
|
class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalModelMixin):
|
||||||
"""
|
"""
|
||||||
A ControlNet model.
|
A ControlNet model.
|
||||||
|
|
||||||
|
@ -53,61 +53,61 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@chakra-ui/react-use-size": "^2.1.0",
|
"@chakra-ui/react-use-size": "^2.1.0",
|
||||||
"@dagrejs/dagre": "^1.1.3",
|
"@dagrejs/dagre": "^1.1.2",
|
||||||
"@dagrejs/graphlib": "^2.2.3",
|
"@dagrejs/graphlib": "^2.2.2",
|
||||||
"@dnd-kit/core": "^6.1.0",
|
"@dnd-kit/core": "^6.1.0",
|
||||||
"@dnd-kit/sortable": "^8.0.0",
|
"@dnd-kit/sortable": "^8.0.0",
|
||||||
"@dnd-kit/utilities": "^3.2.2",
|
"@dnd-kit/utilities": "^3.2.2",
|
||||||
"@fontsource-variable/inter": "^5.0.20",
|
"@fontsource-variable/inter": "^5.0.18",
|
||||||
"@invoke-ai/ui-library": "^0.0.25",
|
"@invoke-ai/ui-library": "^0.0.25",
|
||||||
"@nanostores/react": "^0.7.3",
|
"@nanostores/react": "^0.7.2",
|
||||||
"@reduxjs/toolkit": "2.2.3",
|
"@reduxjs/toolkit": "2.2.3",
|
||||||
"@roarr/browser-log-writer": "^1.3.0",
|
"@roarr/browser-log-writer": "^1.3.0",
|
||||||
"chakra-react-select": "^4.9.1",
|
"chakra-react-select": "^4.7.6",
|
||||||
"compare-versions": "^6.1.1",
|
"compare-versions": "^6.1.0",
|
||||||
"dateformat": "^5.0.3",
|
"dateformat": "^5.0.3",
|
||||||
"fracturedjsonjs": "^4.0.2",
|
"fracturedjsonjs": "^4.0.1",
|
||||||
"framer-motion": "^11.3.24",
|
"framer-motion": "^11.1.8",
|
||||||
"i18next": "^23.12.2",
|
"i18next": "^23.11.3",
|
||||||
"i18next-http-backend": "^2.5.2",
|
"i18next-http-backend": "^2.5.1",
|
||||||
"idb-keyval": "^6.2.1",
|
"idb-keyval": "^6.2.1",
|
||||||
"jsondiffpatch": "^0.6.0",
|
"jsondiffpatch": "^0.6.0",
|
||||||
"konva": "^9.3.14",
|
"konva": "^9.3.6",
|
||||||
"lodash-es": "^4.17.21",
|
"lodash-es": "^4.17.21",
|
||||||
"nanostores": "^0.11.2",
|
"nanostores": "^0.10.3",
|
||||||
"new-github-issue-url": "^1.0.0",
|
"new-github-issue-url": "^1.0.0",
|
||||||
"overlayscrollbars": "^2.10.0",
|
"overlayscrollbars": "^2.7.3",
|
||||||
"overlayscrollbars-react": "^0.5.6",
|
"overlayscrollbars-react": "^0.5.6",
|
||||||
"query-string": "^9.1.0",
|
"query-string": "^9.0.0",
|
||||||
"react": "^18.3.1",
|
"react": "^18.3.1",
|
||||||
"react-colorful": "^5.6.1",
|
"react-colorful": "^5.6.1",
|
||||||
"react-dom": "^18.3.1",
|
"react-dom": "^18.3.1",
|
||||||
"react-dropzone": "^14.2.3",
|
"react-dropzone": "^14.2.3",
|
||||||
"react-error-boundary": "^4.0.13",
|
"react-error-boundary": "^4.0.13",
|
||||||
"react-hook-form": "^7.52.2",
|
"react-hook-form": "^7.51.4",
|
||||||
"react-hotkeys-hook": "4.5.0",
|
"react-hotkeys-hook": "4.5.0",
|
||||||
"react-i18next": "^14.1.3",
|
"react-i18next": "^14.1.1",
|
||||||
"react-icons": "^5.2.1",
|
"react-icons": "^5.2.0",
|
||||||
"react-konva": "^18.2.10",
|
"react-konva": "^18.2.10",
|
||||||
"react-redux": "9.1.2",
|
"react-redux": "9.1.2",
|
||||||
"react-resizable-panels": "^2.0.23",
|
"react-resizable-panels": "^2.0.19",
|
||||||
"react-select": "5.8.0",
|
"react-select": "5.8.0",
|
||||||
"react-use": "^17.5.1",
|
"react-use": "^17.5.0",
|
||||||
"react-virtuoso": "^4.9.0",
|
"react-virtuoso": "^4.7.10",
|
||||||
"reactflow": "^11.11.4",
|
"reactflow": "^11.11.3",
|
||||||
"redux-dynamic-middlewares": "^2.2.0",
|
"redux-dynamic-middlewares": "^2.2.0",
|
||||||
"redux-remember": "^5.1.0",
|
"redux-remember": "^5.1.0",
|
||||||
"redux-undo": "^1.1.0",
|
"redux-undo": "^1.1.0",
|
||||||
"rfdc": "^1.4.1",
|
"rfdc": "^1.3.1",
|
||||||
"roarr": "^7.21.1",
|
"roarr": "^7.21.1",
|
||||||
"serialize-error": "^11.0.3",
|
"serialize-error": "^11.0.3",
|
||||||
"socket.io-client": "^4.7.5",
|
"socket.io-client": "^4.7.5",
|
||||||
"use-debounce": "^10.0.2",
|
"use-debounce": "^10.0.0",
|
||||||
"use-device-pixel-ratio": "^1.1.2",
|
"use-device-pixel-ratio": "^1.1.2",
|
||||||
"use-image": "^1.1.1",
|
"use-image": "^1.1.1",
|
||||||
"uuid": "^10.0.0",
|
"uuid": "^9.0.1",
|
||||||
"zod": "^3.23.8",
|
"zod": "^3.23.6",
|
||||||
"zod-validation-error": "^3.3.1"
|
"zod-validation-error": "^3.2.0"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"@chakra-ui/react": "^2.8.2",
|
"@chakra-ui/react": "^2.8.2",
|
||||||
@ -118,38 +118,38 @@
|
|||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@invoke-ai/eslint-config-react": "^0.0.14",
|
"@invoke-ai/eslint-config-react": "^0.0.14",
|
||||||
"@invoke-ai/prettier-config-react": "^0.0.7",
|
"@invoke-ai/prettier-config-react": "^0.0.7",
|
||||||
"@storybook/addon-essentials": "^8.2.8",
|
"@storybook/addon-essentials": "^8.0.10",
|
||||||
"@storybook/addon-interactions": "^8.2.8",
|
"@storybook/addon-interactions": "^8.0.10",
|
||||||
"@storybook/addon-links": "^8.2.8",
|
"@storybook/addon-links": "^8.0.10",
|
||||||
"@storybook/addon-storysource": "^8.2.8",
|
"@storybook/addon-storysource": "^8.0.10",
|
||||||
"@storybook/manager-api": "^8.2.8",
|
"@storybook/manager-api": "^8.0.10",
|
||||||
"@storybook/react": "^8.2.8",
|
"@storybook/react": "^8.0.10",
|
||||||
"@storybook/react-vite": "^8.2.8",
|
"@storybook/react-vite": "^8.0.10",
|
||||||
"@storybook/theming": "^8.2.8",
|
"@storybook/theming": "^8.0.10",
|
||||||
"@types/dateformat": "^5.0.2",
|
"@types/dateformat": "^5.0.2",
|
||||||
"@types/lodash-es": "^4.17.12",
|
"@types/lodash-es": "^4.17.12",
|
||||||
"@types/node": "^20.14.15",
|
"@types/node": "^20.12.10",
|
||||||
"@types/react": "^18.3.3",
|
"@types/react": "^18.3.1",
|
||||||
"@types/react-dom": "^18.3.0",
|
"@types/react-dom": "^18.3.0",
|
||||||
"@types/uuid": "^10.0.0",
|
"@types/uuid": "^9.0.8",
|
||||||
"@vitejs/plugin-react-swc": "^3.7.0",
|
"@vitejs/plugin-react-swc": "^3.6.0",
|
||||||
"@vitest/coverage-v8": "^1.5.0",
|
"@vitest/coverage-v8": "^1.5.0",
|
||||||
"@vitest/ui": "^1.5.0",
|
"@vitest/ui": "^1.5.0",
|
||||||
"concurrently": "^8.2.2",
|
"concurrently": "^8.2.2",
|
||||||
"dpdm": "^3.14.0",
|
"dpdm": "^3.14.0",
|
||||||
"eslint": "^8.57.0",
|
"eslint": "^8.57.0",
|
||||||
"eslint-plugin-i18next": "^6.0.9",
|
"eslint-plugin-i18next": "^6.0.3",
|
||||||
"eslint-plugin-path": "^1.3.0",
|
"eslint-plugin-path": "^1.3.0",
|
||||||
"knip": "^5.27.2",
|
"knip": "^5.12.3",
|
||||||
"openapi-types": "^12.1.3",
|
"openapi-types": "^12.1.3",
|
||||||
"openapi-typescript": "^7.3.0",
|
"openapi-typescript": "^6.7.5",
|
||||||
"prettier": "^3.3.3",
|
"prettier": "^3.2.5",
|
||||||
"rollup-plugin-visualizer": "^5.12.0",
|
"rollup-plugin-visualizer": "^5.12.0",
|
||||||
"storybook": "^8.2.8",
|
"storybook": "^8.0.10",
|
||||||
"ts-toolbelt": "^9.6.0",
|
"ts-toolbelt": "^9.6.0",
|
||||||
"tsafe": "^1.7.2",
|
"tsafe": "^1.6.6",
|
||||||
"typescript": "^5.5.4",
|
"typescript": "^5.4.5",
|
||||||
"vite": "^5.4.0",
|
"vite": "^5.2.11",
|
||||||
"vite-plugin-css-injected-by-js": "^3.5.1",
|
"vite-plugin-css-injected-by-js": "^3.5.1",
|
||||||
"vite-plugin-dts": "^3.9.1",
|
"vite-plugin-dts": "^3.9.1",
|
||||||
"vite-plugin-eslint": "^1.8.1",
|
"vite-plugin-eslint": "^1.8.1",
|
||||||
|
5129
invokeai/frontend/web/pnpm-lock.yaml
generated
5129
invokeai/frontend/web/pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@ -200,7 +200,6 @@
|
|||||||
"delete": "Delete",
|
"delete": "Delete",
|
||||||
"depthAnything": "Depth Anything",
|
"depthAnything": "Depth Anything",
|
||||||
"depthAnythingDescription": "Depth map generation using the Depth Anything technique",
|
"depthAnythingDescription": "Depth map generation using the Depth Anything technique",
|
||||||
"depthAnythingSmallV2": "Small V2",
|
|
||||||
"depthMidas": "Depth (Midas)",
|
"depthMidas": "Depth (Midas)",
|
||||||
"depthMidasDescription": "Depth map generation using Midas",
|
"depthMidasDescription": "Depth map generation using Midas",
|
||||||
"depthZoe": "Depth (Zoe)",
|
"depthZoe": "Depth (Zoe)",
|
||||||
|
@ -1,40 +1,26 @@
|
|||||||
/* eslint-disable no-console */
|
/* eslint-disable no-console */
|
||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
|
|
||||||
import openapiTS, { astToString } from 'openapi-typescript';
|
import openapiTS from 'openapi-typescript';
|
||||||
import ts from 'typescript';
|
|
||||||
|
|
||||||
const OPENAPI_URL = 'http://127.0.0.1:9090/openapi.json';
|
const OPENAPI_URL = 'http://127.0.0.1:9090/openapi.json';
|
||||||
const OUTPUT_FILE = 'src/services/api/schema.ts';
|
const OUTPUT_FILE = 'src/services/api/schema.ts';
|
||||||
|
|
||||||
async function generateTypes(schema) {
|
async function generateTypes(schema) {
|
||||||
process.stdout.write(`Generating types ${OUTPUT_FILE}...`);
|
process.stdout.write(`Generating types ${OUTPUT_FILE}...`);
|
||||||
|
|
||||||
// Use https://ts-ast-viewer.com to figure out how to create these AST nodes - define a type and use the bottom-left pane's output
|
|
||||||
// `Blob` type
|
|
||||||
const BLOB = ts.factory.createTypeReferenceNode(ts.factory.createIdentifier('Blob'));
|
|
||||||
// `null` type
|
|
||||||
const NULL = ts.factory.createLiteralTypeNode(ts.factory.createNull());
|
|
||||||
// `Record<string, unknown>` type
|
|
||||||
const RECORD_STRING_UNKNOWN = ts.factory.createTypeReferenceNode(ts.factory.createIdentifier('Record'), [
|
|
||||||
ts.factory.createKeywordTypeNode(ts.SyntaxKind.StringKeyword),
|
|
||||||
ts.factory.createKeywordTypeNode(ts.SyntaxKind.UnknownKeyword),
|
|
||||||
]);
|
|
||||||
|
|
||||||
const types = await openapiTS(schema, {
|
const types = await openapiTS(schema, {
|
||||||
exportType: true,
|
exportType: true,
|
||||||
transform: (schemaObject) => {
|
transform: (schemaObject) => {
|
||||||
if ('format' in schemaObject && schemaObject.format === 'binary') {
|
if ('format' in schemaObject && schemaObject.format === 'binary') {
|
||||||
return schemaObject.nullable ? ts.factory.createUnionTypeNode([BLOB, NULL]) : BLOB;
|
return schemaObject.nullable ? 'Blob | null' : 'Blob';
|
||||||
}
|
}
|
||||||
if (schemaObject.title === 'MetadataField') {
|
if (schemaObject.title === 'MetadataField') {
|
||||||
// This is `Record<string, never>` by default, but it actually accepts any a dict of any valid JSON value.
|
// This is `Record<string, never>` by default, but it actually accepts any a dict of any valid JSON value.
|
||||||
return RECORD_STRING_UNKNOWN;
|
return 'Record<string, unknown>';
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
defaultNonNullable: false,
|
|
||||||
});
|
});
|
||||||
fs.writeFileSync(OUTPUT_FILE, astToString(types));
|
fs.writeFileSync(OUTPUT_FILE, types);
|
||||||
process.stdout.write(`\nOK!\r\n`);
|
process.stdout.write(`\nOK!\r\n`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +42,6 @@ const DepthAnythingProcessor = (props: Props) => {
|
|||||||
|
|
||||||
const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
|
const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
|
||||||
() => [
|
() => [
|
||||||
{ label: t('controlnet.depthAnythingSmallV2'), value: 'small_v2' },
|
|
||||||
{ label: t('controlnet.small'), value: 'small' },
|
{ label: t('controlnet.small'), value: 'small' },
|
||||||
{ label: t('controlnet.base'), value: 'base' },
|
{ label: t('controlnet.base'), value: 'base' },
|
||||||
{ label: t('controlnet.large'), value: 'large' },
|
{ label: t('controlnet.large'), value: 'large' },
|
||||||
|
@ -94,7 +94,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = {
|
|||||||
buildDefaults: (baseModel?: BaseModelType) => ({
|
buildDefaults: (baseModel?: BaseModelType) => ({
|
||||||
id: 'depth_anything_image_processor',
|
id: 'depth_anything_image_processor',
|
||||||
type: 'depth_anything_image_processor',
|
type: 'depth_anything_image_processor',
|
||||||
model_size: 'small_v2',
|
model_size: 'small',
|
||||||
resolution: baseModel === 'sdxl' ? 1024 : 512,
|
resolution: baseModel === 'sdxl' ? 1024 : 512,
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
|
@ -84,7 +84,7 @@ export type RequiredDepthAnythingImageProcessorInvocation = O.Required<
|
|||||||
'type' | 'model_size' | 'resolution' | 'offload'
|
'type' | 'model_size' | 'resolution' | 'offload'
|
||||||
>;
|
>;
|
||||||
|
|
||||||
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small', 'small_v2']);
|
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);
|
||||||
export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
|
export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
|
||||||
export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
|
export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
|
||||||
zDepthAnythingModelSize.safeParse(v).success;
|
zDepthAnythingModelSize.safeParse(v).success;
|
||||||
|
@ -24,7 +24,6 @@ export const DepthAnythingProcessor = memo(({ onChange, config }: Props) => {
|
|||||||
|
|
||||||
const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
|
const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
|
||||||
() => [
|
() => [
|
||||||
{ label: t('controlnet.depthAnythingSmallV2'), value: 'small_v2' },
|
|
||||||
{ label: t('controlnet.small'), value: 'small' },
|
{ label: t('controlnet.small'), value: 'small' },
|
||||||
{ label: t('controlnet.base'), value: 'base' },
|
{ label: t('controlnet.base'), value: 'base' },
|
||||||
{ label: t('controlnet.large'), value: 'large' },
|
{ label: t('controlnet.large'), value: 'large' },
|
||||||
|
@ -36,7 +36,7 @@ const zContentShuffleProcessorConfig = z.object({
|
|||||||
});
|
});
|
||||||
export type ContentShuffleProcessorConfig = z.infer<typeof zContentShuffleProcessorConfig>;
|
export type ContentShuffleProcessorConfig = z.infer<typeof zContentShuffleProcessorConfig>;
|
||||||
|
|
||||||
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small', 'small_v2']);
|
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);
|
||||||
export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
|
export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
|
||||||
export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
|
export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
|
||||||
zDepthAnythingModelSize.safeParse(v).success;
|
zDepthAnythingModelSize.safeParse(v).success;
|
||||||
@ -298,7 +298,7 @@ export const CA_PROCESSOR_DATA: CAProcessorsData = {
|
|||||||
buildDefaults: () => ({
|
buildDefaults: () => ({
|
||||||
id: 'depth_anything_image_processor',
|
id: 'depth_anything_image_processor',
|
||||||
type: 'depth_anything_image_processor',
|
type: 'depth_anything_image_processor',
|
||||||
model_size: 'small_v2',
|
model_size: 'small',
|
||||||
}),
|
}),
|
||||||
buildNode: (image, config) => ({
|
buildNode: (image, config) => ({
|
||||||
...config,
|
...config,
|
||||||
|
@ -57,11 +57,7 @@ export const UpscaleWarning = () => {
|
|||||||
$installModelsTab.set(3);
|
$installModelsTab.set(3);
|
||||||
}, [dispatch]);
|
}, [dispatch]);
|
||||||
|
|
||||||
if (modelWarnings.length && !shouldShowButton) {
|
if ((!modelWarnings.length && !otherWarnings.length) || isLoading || !shouldShowButton) {
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((!modelWarnings.length && !otherWarnings.length) || isLoading) {
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
File diff suppressed because one or more lines are too long
@ -33,31 +33,35 @@ classifiers = [
|
|||||||
]
|
]
|
||||||
dependencies = [
|
dependencies = [
|
||||||
# Core generation dependencies, pinned for reproducible builds.
|
# Core generation dependencies, pinned for reproducible builds.
|
||||||
"accelerate==0.30.1",
|
"accelerate==0.33.0",
|
||||||
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
|
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
|
||||||
"compel==2.0.2",
|
"compel==2.0.2",
|
||||||
"controlnet-aux==0.0.7",
|
"controlnet-aux==0.0.7",
|
||||||
"diffusers[torch]==0.27.2",
|
# TODO(ryand): Bump this once the next diffusers release is ready.
|
||||||
|
"diffusers[torch] @ git+https://github.com/huggingface/diffusers.git@4c6152c2fb0ade468aadb417102605a07a8635d3",
|
||||||
"invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids
|
"invisible-watermark==0.2.0", # needed to install SDXL base and refiner using their repo_ids
|
||||||
"mediapipe==0.10.7", # needed for "mediapipeface" controlnet model
|
"mediapipe==0.10.7", # needed for "mediapipeface" controlnet model
|
||||||
"numpy==1.26.4", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal()
|
"numpy==1.26.4", # >1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal()
|
||||||
"onnx==1.15.0",
|
"onnx==1.15.0",
|
||||||
"onnxruntime==1.16.3",
|
"onnxruntime==1.16.3",
|
||||||
"opencv-python==4.9.0.80",
|
"opencv-python==4.9.0.80",
|
||||||
|
"optimum-quanto==0.2.4",
|
||||||
"pytorch-lightning==2.1.3",
|
"pytorch-lightning==2.1.3",
|
||||||
"safetensors==0.4.3",
|
"safetensors==0.4.3",
|
||||||
|
# sentencepiece is required to load T5TokenizerFast (used by FLUX).
|
||||||
|
"sentencepiece==0.2.0",
|
||||||
"spandrel==0.3.4",
|
"spandrel==0.3.4",
|
||||||
"timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26
|
"timm==0.6.13", # needed to override timm latest in controlnet_aux, see https://github.com/isl-org/ZoeDepth/issues/26
|
||||||
"torch==2.2.2",
|
"torch==2.4.0",
|
||||||
"torchmetrics==0.11.4",
|
"torchmetrics==0.11.4",
|
||||||
"torchsde==0.2.6",
|
"torchsde==0.2.6",
|
||||||
"torchvision==0.17.2",
|
"torchvision==0.19.0",
|
||||||
"transformers==4.41.1",
|
"transformers==4.41.1",
|
||||||
|
|
||||||
# Core application dependencies, pinned for reproducible builds.
|
# Core application dependencies, pinned for reproducible builds.
|
||||||
"fastapi-events==0.11.1",
|
"fastapi-events==0.11.1",
|
||||||
"fastapi==0.111.0",
|
"fastapi==0.111.0",
|
||||||
"huggingface-hub==0.23.1",
|
"huggingface-hub==0.24.5",
|
||||||
"pydantic-settings==2.2.1",
|
"pydantic-settings==2.2.1",
|
||||||
"pydantic==2.7.2",
|
"pydantic==2.7.2",
|
||||||
"python-socketio==5.11.1",
|
"python-socketio==5.11.1",
|
||||||
|
@ -326,3 +326,80 @@ def test_select_multiple_weights(
|
|||||||
) -> None:
|
) -> None:
|
||||||
filtered_files = filter_files(sd15_test_files, variant)
|
filtered_files = filter_files(sd15_test_files, variant)
|
||||||
assert set(filtered_files) == {Path(f) for f in expected_files}
|
assert set(filtered_files) == {Path(f) for f in expected_files}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def flux_schnell_test_files() -> list[Path]:
|
||||||
|
return [
|
||||||
|
Path(f)
|
||||||
|
for f in [
|
||||||
|
"FLUX.1-schnell/.gitattributes",
|
||||||
|
"FLUX.1-schnell/README.md",
|
||||||
|
"FLUX.1-schnell/ae.safetensors",
|
||||||
|
"FLUX.1-schnell/flux1-schnell.safetensors",
|
||||||
|
"FLUX.1-schnell/model_index.json",
|
||||||
|
"FLUX.1-schnell/scheduler/scheduler_config.json",
|
||||||
|
"FLUX.1-schnell/schnell_grid.jpeg",
|
||||||
|
"FLUX.1-schnell/text_encoder/config.json",
|
||||||
|
"FLUX.1-schnell/text_encoder/model.safetensors",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/config.json",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/model.safetensors.index.json",
|
||||||
|
"FLUX.1-schnell/tokenizer/merges.txt",
|
||||||
|
"FLUX.1-schnell/tokenizer/special_tokens_map.json",
|
||||||
|
"FLUX.1-schnell/tokenizer/tokenizer_config.json",
|
||||||
|
"FLUX.1-schnell/tokenizer/vocab.json",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/special_tokens_map.json",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/spiece.model",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/tokenizer.json",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/tokenizer_config.json",
|
||||||
|
"FLUX.1-schnell/transformer/config.json",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json",
|
||||||
|
"FLUX.1-schnell/vae/config.json",
|
||||||
|
"FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors",
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
["variant", "expected_files"],
|
||||||
|
[
|
||||||
|
(
|
||||||
|
ModelRepoVariant.Default,
|
||||||
|
[
|
||||||
|
"FLUX.1-schnell/model_index.json",
|
||||||
|
"FLUX.1-schnell/scheduler/scheduler_config.json",
|
||||||
|
"FLUX.1-schnell/text_encoder/config.json",
|
||||||
|
"FLUX.1-schnell/text_encoder/model.safetensors",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/config.json",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/model-00001-of-00002.safetensors",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/model-00002-of-00002.safetensors",
|
||||||
|
"FLUX.1-schnell/text_encoder_2/model.safetensors.index.json",
|
||||||
|
"FLUX.1-schnell/tokenizer/merges.txt",
|
||||||
|
"FLUX.1-schnell/tokenizer/special_tokens_map.json",
|
||||||
|
"FLUX.1-schnell/tokenizer/tokenizer_config.json",
|
||||||
|
"FLUX.1-schnell/tokenizer/vocab.json",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/special_tokens_map.json",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/spiece.model",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/tokenizer.json",
|
||||||
|
"FLUX.1-schnell/tokenizer_2/tokenizer_config.json",
|
||||||
|
"FLUX.1-schnell/transformer/config.json",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model-00001-of-00003.safetensors",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model-00002-of-00003.safetensors",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model-00003-of-00003.safetensors",
|
||||||
|
"FLUX.1-schnell/transformer/diffusion_pytorch_model.safetensors.index.json",
|
||||||
|
"FLUX.1-schnell/vae/config.json",
|
||||||
|
"FLUX.1-schnell/vae/diffusion_pytorch_model.safetensors",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_select_flux_schnell_files(
|
||||||
|
flux_schnell_test_files: list[Path], variant: ModelRepoVariant, expected_files: list[str]
|
||||||
|
) -> None:
|
||||||
|
filtered_files = filter_files(flux_schnell_test_files, variant)
|
||||||
|
assert set(filtered_files) == {Path(f) for f in expected_files}
|
||||||
|
Reference in New Issue
Block a user