mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Depth Anything V2 (#6674)
- Updated the previous DepthAnything manual implementation to use the `transformers` implementation instead. So we can get upstream features. - Plugged in the DepthAnything models to be handled by Invoke's Model Manager. - `small_v2` model will use DepthAnythingV2. This has been added as a new model option and is now also the default in the Linear UI. ![opera_TxRhmbFole](https://github.com/user-attachments/assets/2a25abe3-ba0b-4f97-b75a-2ce5fd6246e6) # Merge Review and merge.
This commit is contained in:
commit
6cd40965c4
@ -21,6 +21,8 @@ from controlnet_aux import (
|
||||
from controlnet_aux.util import HWC3, ade_palette
|
||||
from PIL import Image
|
||||
from pydantic import BaseModel, Field, field_validator, model_validator
|
||||
from transformers import pipeline
|
||||
from transformers.pipelines import DepthEstimationPipeline
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import (
|
||||
BaseInvocation,
|
||||
@ -44,13 +46,12 @@ from invokeai.app.invocations.util import validate_begin_end_step, validate_weig
|
||||
from invokeai.app.services.shared.invocation_context import InvocationContext
|
||||
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
|
||||
from invokeai.backend.image_util.canny import get_canny_edges
|
||||
from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
|
||||
from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
|
||||
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
|
||||
from invokeai.backend.image_util.hed import HEDProcessor
|
||||
from invokeai.backend.image_util.lineart import LineartProcessor
|
||||
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
|
||||
from invokeai.backend.image_util.util import np_to_pil, pil_to_np
|
||||
from invokeai.backend.util.devices import TorchDevice
|
||||
|
||||
|
||||
class ControlField(BaseModel):
|
||||
@ -592,7 +593,14 @@ class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
|
||||
return color_map
|
||||
|
||||
|
||||
DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
|
||||
DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
|
||||
# DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
|
||||
DEPTH_ANYTHING_MODELS = {
|
||||
"large": "LiheYoung/depth-anything-large-hf",
|
||||
"base": "LiheYoung/depth-anything-base-hf",
|
||||
"small": "LiheYoung/depth-anything-small-hf",
|
||||
"small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
|
||||
}
|
||||
|
||||
|
||||
@invocation(
|
||||
@ -600,28 +608,33 @@ DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
|
||||
title="Depth Anything Processor",
|
||||
tags=["controlnet", "depth", "depth anything"],
|
||||
category="controlnet",
|
||||
version="1.1.2",
|
||||
version="1.1.3",
|
||||
)
|
||||
class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
|
||||
"""Generates a depth map based on the Depth Anything algorithm"""
|
||||
|
||||
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
|
||||
default="small", description="The size of the depth model to use"
|
||||
default="small_v2", description="The size of the depth model to use"
|
||||
)
|
||||
resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
|
||||
|
||||
def run_processor(self, image: Image.Image) -> Image.Image:
|
||||
def loader(model_path: Path):
|
||||
return DepthAnythingDetector.load_model(
|
||||
model_path, model_size=self.model_size, device=TorchDevice.choose_torch_device()
|
||||
)
|
||||
def load_depth_anything(model_path: Path):
|
||||
depth_anything_pipeline = pipeline(model=str(model_path), task="depth-estimation", local_files_only=True)
|
||||
assert isinstance(depth_anything_pipeline, DepthEstimationPipeline)
|
||||
return DepthAnythingPipeline(depth_anything_pipeline)
|
||||
|
||||
with self._context.models.load_remote_model(
|
||||
source=DEPTH_ANYTHING_MODELS[self.model_size], loader=loader
|
||||
) as model:
|
||||
depth_anything_detector = DepthAnythingDetector(model, TorchDevice.choose_torch_device())
|
||||
processed_image = depth_anything_detector(image=image, resolution=self.resolution)
|
||||
return processed_image
|
||||
source=DEPTH_ANYTHING_MODELS[self.model_size], loader=load_depth_anything
|
||||
) as depth_anything_detector:
|
||||
assert isinstance(depth_anything_detector, DepthAnythingPipeline)
|
||||
depth_map = depth_anything_detector.generate_depth(image)
|
||||
|
||||
# Resizing to user target specified size
|
||||
new_height = int(image.size[1] * (self.resolution / image.size[0]))
|
||||
depth_map = depth_map.resize((self.resolution, new_height))
|
||||
|
||||
return depth_map
|
||||
|
||||
|
||||
@invocation(
|
||||
|
@ -1,90 +0,0 @@
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from einops import repeat
|
||||
from PIL import Image
|
||||
from torchvision.transforms import Compose
|
||||
|
||||
from invokeai.app.services.config.config_default import get_config
|
||||
from invokeai.backend.image_util.depth_anything.model.dpt import DPT_DINOv2
|
||||
from invokeai.backend.image_util.depth_anything.utilities.util import NormalizeImage, PrepareForNet, Resize
|
||||
from invokeai.backend.util.logging import InvokeAILogger
|
||||
|
||||
config = get_config()
|
||||
logger = InvokeAILogger.get_logger(config=config)
|
||||
|
||||
DEPTH_ANYTHING_MODELS = {
|
||||
"large": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitl14.pth?download=true",
|
||||
"base": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vitb14.pth?download=true",
|
||||
"small": "https://huggingface.co/spaces/LiheYoung/Depth-Anything/resolve/main/checkpoints/depth_anything_vits14.pth?download=true",
|
||||
}
|
||||
|
||||
|
||||
transform = Compose(
|
||||
[
|
||||
Resize(
|
||||
width=518,
|
||||
height=518,
|
||||
resize_target=False,
|
||||
keep_aspect_ratio=True,
|
||||
ensure_multiple_of=14,
|
||||
resize_method="lower_bound",
|
||||
image_interpolation_method=cv2.INTER_CUBIC,
|
||||
),
|
||||
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
PrepareForNet(),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class DepthAnythingDetector:
|
||||
def __init__(self, model: DPT_DINOv2, device: torch.device) -> None:
|
||||
self.model = model
|
||||
self.device = device
|
||||
|
||||
@staticmethod
|
||||
def load_model(
|
||||
model_path: Path, device: torch.device, model_size: Literal["large", "base", "small"] = "small"
|
||||
) -> DPT_DINOv2:
|
||||
match model_size:
|
||||
case "small":
|
||||
model = DPT_DINOv2(encoder="vits", features=64, out_channels=[48, 96, 192, 384])
|
||||
case "base":
|
||||
model = DPT_DINOv2(encoder="vitb", features=128, out_channels=[96, 192, 384, 768])
|
||||
case "large":
|
||||
model = DPT_DINOv2(encoder="vitl", features=256, out_channels=[256, 512, 1024, 1024])
|
||||
|
||||
model.load_state_dict(torch.load(model_path.as_posix(), map_location="cpu"))
|
||||
model.eval()
|
||||
|
||||
model.to(device)
|
||||
return model
|
||||
|
||||
def __call__(self, image: Image.Image, resolution: int = 512) -> Image.Image:
|
||||
if not self.model:
|
||||
logger.warn("DepthAnything model was not loaded. Returning original image")
|
||||
return image
|
||||
|
||||
np_image = np.array(image, dtype=np.uint8)
|
||||
np_image = np_image[:, :, ::-1] / 255.0
|
||||
|
||||
image_height, image_width = np_image.shape[:2]
|
||||
np_image = transform({"image": np_image})["image"]
|
||||
tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(self.device)
|
||||
|
||||
with torch.no_grad():
|
||||
depth = self.model(tensor_image)
|
||||
depth = F.interpolate(depth[None], (image_height, image_width), mode="bilinear", align_corners=False)[0, 0]
|
||||
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
||||
|
||||
depth_map = repeat(depth, "h w -> h w 3").cpu().numpy().astype(np.uint8)
|
||||
depth_map = Image.fromarray(depth_map)
|
||||
|
||||
new_height = int(image_height * (resolution / image_width))
|
||||
depth_map = depth_map.resize((resolution, new_height))
|
||||
|
||||
return depth_map
|
@ -0,0 +1,31 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers.pipelines import DepthEstimationPipeline
|
||||
|
||||
from invokeai.backend.raw_model import RawModel
|
||||
|
||||
|
||||
class DepthAnythingPipeline(RawModel):
|
||||
"""Custom wrapper for the Depth Estimation pipeline from transformers adding compatibility
|
||||
for Invoke's Model Management System"""
|
||||
|
||||
def __init__(self, pipeline: DepthEstimationPipeline) -> None:
|
||||
self._pipeline = pipeline
|
||||
|
||||
def generate_depth(self, image: Image.Image) -> Image.Image:
|
||||
depth_map = self._pipeline(image)["depth"]
|
||||
assert isinstance(depth_map, Image.Image)
|
||||
return depth_map
|
||||
|
||||
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None):
|
||||
if device is not None and device.type not in {"cpu", "cuda"}:
|
||||
device = None
|
||||
self._pipeline.model.to(device=device, dtype=dtype)
|
||||
self._pipeline.device = self._pipeline.model.device
|
||||
|
||||
def calc_size(self) -> int:
|
||||
from invokeai.backend.model_manager.load.model_util import calc_module_size
|
||||
|
||||
return calc_module_size(self._pipeline.model)
|
@ -1,145 +0,0 @@
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
def _make_scratch(in_shape, out_shape, groups=1, expand=False):
|
||||
scratch = nn.Module()
|
||||
|
||||
out_shape1 = out_shape
|
||||
out_shape2 = out_shape
|
||||
out_shape3 = out_shape
|
||||
if len(in_shape) >= 4:
|
||||
out_shape4 = out_shape
|
||||
|
||||
if expand:
|
||||
out_shape1 = out_shape
|
||||
out_shape2 = out_shape * 2
|
||||
out_shape3 = out_shape * 4
|
||||
if len(in_shape) >= 4:
|
||||
out_shape4 = out_shape * 8
|
||||
|
||||
scratch.layer1_rn = nn.Conv2d(
|
||||
in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||
)
|
||||
scratch.layer2_rn = nn.Conv2d(
|
||||
in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||
)
|
||||
scratch.layer3_rn = nn.Conv2d(
|
||||
in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||
)
|
||||
if len(in_shape) >= 4:
|
||||
scratch.layer4_rn = nn.Conv2d(
|
||||
in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
|
||||
)
|
||||
|
||||
return scratch
|
||||
|
||||
|
||||
class ResidualConvUnit(nn.Module):
|
||||
"""Residual convolution module."""
|
||||
|
||||
def __init__(self, features, activation, bn):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
features (int): number of features
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
self.bn = bn
|
||||
|
||||
self.groups = 1
|
||||
|
||||
self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
|
||||
|
||||
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
|
||||
|
||||
if self.bn:
|
||||
self.bn1 = nn.BatchNorm2d(features)
|
||||
self.bn2 = nn.BatchNorm2d(features)
|
||||
|
||||
self.activation = activation
|
||||
|
||||
self.skip_add = nn.quantized.FloatFunctional()
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass.
|
||||
|
||||
Args:
|
||||
x (tensor): input
|
||||
|
||||
Returns:
|
||||
tensor: output
|
||||
"""
|
||||
|
||||
out = self.activation(x)
|
||||
out = self.conv1(out)
|
||||
if self.bn:
|
||||
out = self.bn1(out)
|
||||
|
||||
out = self.activation(out)
|
||||
out = self.conv2(out)
|
||||
if self.bn:
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.groups > 1:
|
||||
out = self.conv_merge(out)
|
||||
|
||||
return self.skip_add.add(out, x)
|
||||
|
||||
|
||||
class FeatureFusionBlock(nn.Module):
|
||||
"""Feature fusion block."""
|
||||
|
||||
def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True, size=None):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
features (int): number of features
|
||||
"""
|
||||
super(FeatureFusionBlock, self).__init__()
|
||||
|
||||
self.deconv = deconv
|
||||
self.align_corners = align_corners
|
||||
|
||||
self.groups = 1
|
||||
|
||||
self.expand = expand
|
||||
out_features = features
|
||||
if self.expand:
|
||||
out_features = features // 2
|
||||
|
||||
self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
|
||||
|
||||
self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
|
||||
self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
|
||||
|
||||
self.skip_add = nn.quantized.FloatFunctional()
|
||||
|
||||
self.size = size
|
||||
|
||||
def forward(self, *xs, size=None):
|
||||
"""Forward pass.
|
||||
|
||||
Returns:
|
||||
tensor: output
|
||||
"""
|
||||
output = xs[0]
|
||||
|
||||
if len(xs) == 2:
|
||||
res = self.resConfUnit1(xs[1])
|
||||
output = self.skip_add.add(output, res)
|
||||
|
||||
output = self.resConfUnit2(output)
|
||||
|
||||
if (size is None) and (self.size is None):
|
||||
modifier = {"scale_factor": 2}
|
||||
elif size is None:
|
||||
modifier = {"size": self.size}
|
||||
else:
|
||||
modifier = {"size": size}
|
||||
|
||||
output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
|
||||
|
||||
output = self.out_conv(output)
|
||||
|
||||
return output
|
@ -1,183 +0,0 @@
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from invokeai.backend.image_util.depth_anything.model.blocks import FeatureFusionBlock, _make_scratch
|
||||
|
||||
torchhub_path = Path(__file__).parent.parent / "torchhub"
|
||||
|
||||
|
||||
def _make_fusion_block(features, use_bn, size=None):
|
||||
return FeatureFusionBlock(
|
||||
features,
|
||||
nn.ReLU(False),
|
||||
deconv=False,
|
||||
bn=use_bn,
|
||||
expand=False,
|
||||
align_corners=True,
|
||||
size=size,
|
||||
)
|
||||
|
||||
|
||||
class DPTHead(nn.Module):
|
||||
def __init__(self, nclass, in_channels, features, out_channels, use_bn=False, use_clstoken=False):
|
||||
super(DPTHead, self).__init__()
|
||||
|
||||
self.nclass = nclass
|
||||
self.use_clstoken = use_clstoken
|
||||
|
||||
self.projects = nn.ModuleList(
|
||||
[
|
||||
nn.Conv2d(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channel,
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
)
|
||||
for out_channel in out_channels
|
||||
]
|
||||
)
|
||||
|
||||
self.resize_layers = nn.ModuleList(
|
||||
[
|
||||
nn.ConvTranspose2d(
|
||||
in_channels=out_channels[0], out_channels=out_channels[0], kernel_size=4, stride=4, padding=0
|
||||
),
|
||||
nn.ConvTranspose2d(
|
||||
in_channels=out_channels[1], out_channels=out_channels[1], kernel_size=2, stride=2, padding=0
|
||||
),
|
||||
nn.Identity(),
|
||||
nn.Conv2d(
|
||||
in_channels=out_channels[3], out_channels=out_channels[3], kernel_size=3, stride=2, padding=1
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
if use_clstoken:
|
||||
self.readout_projects = nn.ModuleList()
|
||||
for _ in range(len(self.projects)):
|
||||
self.readout_projects.append(nn.Sequential(nn.Linear(2 * in_channels, in_channels), nn.GELU()))
|
||||
|
||||
self.scratch = _make_scratch(
|
||||
out_channels,
|
||||
features,
|
||||
groups=1,
|
||||
expand=False,
|
||||
)
|
||||
|
||||
self.scratch.stem_transpose = None
|
||||
|
||||
self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
|
||||
self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
|
||||
self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
|
||||
self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
|
||||
|
||||
head_features_1 = features
|
||||
head_features_2 = 32
|
||||
|
||||
if nclass > 1:
|
||||
self.scratch.output_conv = nn.Sequential(
|
||||
nn.Conv2d(head_features_1, head_features_1, kernel_size=3, stride=1, padding=1),
|
||||
nn.ReLU(True),
|
||||
nn.Conv2d(head_features_1, nclass, kernel_size=1, stride=1, padding=0),
|
||||
)
|
||||
else:
|
||||
self.scratch.output_conv1 = nn.Conv2d(
|
||||
head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1
|
||||
)
|
||||
|
||||
self.scratch.output_conv2 = nn.Sequential(
|
||||
nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
|
||||
nn.ReLU(True),
|
||||
nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
|
||||
nn.ReLU(True),
|
||||
nn.Identity(),
|
||||
)
|
||||
|
||||
def forward(self, out_features, patch_h, patch_w):
|
||||
out = []
|
||||
for i, x in enumerate(out_features):
|
||||
if self.use_clstoken:
|
||||
x, cls_token = x[0], x[1]
|
||||
readout = cls_token.unsqueeze(1).expand_as(x)
|
||||
x = self.readout_projects[i](torch.cat((x, readout), -1))
|
||||
else:
|
||||
x = x[0]
|
||||
|
||||
x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
|
||||
|
||||
x = self.projects[i](x)
|
||||
x = self.resize_layers[i](x)
|
||||
|
||||
out.append(x)
|
||||
|
||||
layer_1, layer_2, layer_3, layer_4 = out
|
||||
|
||||
layer_1_rn = self.scratch.layer1_rn(layer_1)
|
||||
layer_2_rn = self.scratch.layer2_rn(layer_2)
|
||||
layer_3_rn = self.scratch.layer3_rn(layer_3)
|
||||
layer_4_rn = self.scratch.layer4_rn(layer_4)
|
||||
|
||||
path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
|
||||
path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
|
||||
path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
|
||||
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
||||
|
||||
out = self.scratch.output_conv1(path_1)
|
||||
out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
|
||||
out = self.scratch.output_conv2(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DPT_DINOv2(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
features,
|
||||
out_channels,
|
||||
encoder="vitl",
|
||||
use_bn=False,
|
||||
use_clstoken=False,
|
||||
):
|
||||
super(DPT_DINOv2, self).__init__()
|
||||
|
||||
assert encoder in ["vits", "vitb", "vitl"]
|
||||
|
||||
# # in case the Internet connection is not stable, please load the DINOv2 locally
|
||||
# if use_local:
|
||||
# self.pretrained = torch.hub.load(
|
||||
# torchhub_path / "facebookresearch_dinov2_main",
|
||||
# "dinov2_{:}14".format(encoder),
|
||||
# source="local",
|
||||
# pretrained=False,
|
||||
# )
|
||||
# else:
|
||||
# self.pretrained = torch.hub.load(
|
||||
# "facebookresearch/dinov2",
|
||||
# "dinov2_{:}14".format(encoder),
|
||||
# )
|
||||
|
||||
self.pretrained = torch.hub.load(
|
||||
"facebookresearch/dinov2",
|
||||
"dinov2_{:}14".format(encoder),
|
||||
)
|
||||
|
||||
dim = self.pretrained.blocks[0].attn.qkv.in_features
|
||||
|
||||
self.depth_head = DPTHead(1, dim, features, out_channels=out_channels, use_bn=use_bn, use_clstoken=use_clstoken)
|
||||
|
||||
def forward(self, x):
|
||||
h, w = x.shape[-2:]
|
||||
|
||||
features = self.pretrained.get_intermediate_layers(x, 4, return_class_token=True)
|
||||
|
||||
patch_h, patch_w = h // 14, w // 14
|
||||
|
||||
depth = self.depth_head(features, patch_h, patch_w)
|
||||
depth = F.interpolate(depth, size=(h, w), mode="bilinear", align_corners=True)
|
||||
depth = F.relu(depth)
|
||||
|
||||
return depth.squeeze(1)
|
@ -1,227 +0,0 @@
|
||||
import math
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
|
||||
"""Rezise the sample to ensure the given size. Keeps aspect ratio.
|
||||
|
||||
Args:
|
||||
sample (dict): sample
|
||||
size (tuple): image size
|
||||
|
||||
Returns:
|
||||
tuple: new size
|
||||
"""
|
||||
shape = list(sample["disparity"].shape)
|
||||
|
||||
if shape[0] >= size[0] and shape[1] >= size[1]:
|
||||
return sample
|
||||
|
||||
scale = [0, 0]
|
||||
scale[0] = size[0] / shape[0]
|
||||
scale[1] = size[1] / shape[1]
|
||||
|
||||
scale = max(scale)
|
||||
|
||||
shape[0] = math.ceil(scale * shape[0])
|
||||
shape[1] = math.ceil(scale * shape[1])
|
||||
|
||||
# resize
|
||||
sample["image"] = cv2.resize(sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method)
|
||||
|
||||
sample["disparity"] = cv2.resize(sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST)
|
||||
sample["mask"] = cv2.resize(
|
||||
sample["mask"].astype(np.float32),
|
||||
tuple(shape[::-1]),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
sample["mask"] = sample["mask"].astype(bool)
|
||||
|
||||
return tuple(shape)
|
||||
|
||||
|
||||
class Resize(object):
|
||||
"""Resize sample to given size (width, height)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
width,
|
||||
height,
|
||||
resize_target=True,
|
||||
keep_aspect_ratio=False,
|
||||
ensure_multiple_of=1,
|
||||
resize_method="lower_bound",
|
||||
image_interpolation_method=cv2.INTER_AREA,
|
||||
):
|
||||
"""Init.
|
||||
|
||||
Args:
|
||||
width (int): desired output width
|
||||
height (int): desired output height
|
||||
resize_target (bool, optional):
|
||||
True: Resize the full sample (image, mask, target).
|
||||
False: Resize image only.
|
||||
Defaults to True.
|
||||
keep_aspect_ratio (bool, optional):
|
||||
True: Keep the aspect ratio of the input sample.
|
||||
Output sample might not have the given width and height, and
|
||||
resize behaviour depends on the parameter 'resize_method'.
|
||||
Defaults to False.
|
||||
ensure_multiple_of (int, optional):
|
||||
Output width and height is constrained to be multiple of this parameter.
|
||||
Defaults to 1.
|
||||
resize_method (str, optional):
|
||||
"lower_bound": Output will be at least as large as the given size.
|
||||
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller
|
||||
than given size.)
|
||||
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
|
||||
Defaults to "lower_bound".
|
||||
"""
|
||||
self.__width = width
|
||||
self.__height = height
|
||||
|
||||
self.__resize_target = resize_target
|
||||
self.__keep_aspect_ratio = keep_aspect_ratio
|
||||
self.__multiple_of = ensure_multiple_of
|
||||
self.__resize_method = resize_method
|
||||
self.__image_interpolation_method = image_interpolation_method
|
||||
|
||||
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
|
||||
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||
|
||||
if max_val is not None and y > max_val:
|
||||
y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||
|
||||
if y < min_val:
|
||||
y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
|
||||
|
||||
return y
|
||||
|
||||
def get_size(self, width, height):
|
||||
# determine new height and width
|
||||
scale_height = self.__height / height
|
||||
scale_width = self.__width / width
|
||||
|
||||
if self.__keep_aspect_ratio:
|
||||
if self.__resize_method == "lower_bound":
|
||||
# scale such that output size is lower bound
|
||||
if scale_width > scale_height:
|
||||
# fit width
|
||||
scale_height = scale_width
|
||||
else:
|
||||
# fit height
|
||||
scale_width = scale_height
|
||||
elif self.__resize_method == "upper_bound":
|
||||
# scale such that output size is upper bound
|
||||
if scale_width < scale_height:
|
||||
# fit width
|
||||
scale_height = scale_width
|
||||
else:
|
||||
# fit height
|
||||
scale_width = scale_height
|
||||
elif self.__resize_method == "minimal":
|
||||
# scale as least as possbile
|
||||
if abs(1 - scale_width) < abs(1 - scale_height):
|
||||
# fit width
|
||||
scale_height = scale_width
|
||||
else:
|
||||
# fit height
|
||||
scale_width = scale_height
|
||||
else:
|
||||
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
||||
|
||||
if self.__resize_method == "lower_bound":
|
||||
new_height = self.constrain_to_multiple_of(scale_height * height, min_val=self.__height)
|
||||
new_width = self.constrain_to_multiple_of(scale_width * width, min_val=self.__width)
|
||||
elif self.__resize_method == "upper_bound":
|
||||
new_height = self.constrain_to_multiple_of(scale_height * height, max_val=self.__height)
|
||||
new_width = self.constrain_to_multiple_of(scale_width * width, max_val=self.__width)
|
||||
elif self.__resize_method == "minimal":
|
||||
new_height = self.constrain_to_multiple_of(scale_height * height)
|
||||
new_width = self.constrain_to_multiple_of(scale_width * width)
|
||||
else:
|
||||
raise ValueError(f"resize_method {self.__resize_method} not implemented")
|
||||
|
||||
return (new_width, new_height)
|
||||
|
||||
def __call__(self, sample):
|
||||
width, height = self.get_size(sample["image"].shape[1], sample["image"].shape[0])
|
||||
|
||||
# resize sample
|
||||
sample["image"] = cv2.resize(
|
||||
sample["image"],
|
||||
(width, height),
|
||||
interpolation=self.__image_interpolation_method,
|
||||
)
|
||||
|
||||
if self.__resize_target:
|
||||
if "disparity" in sample:
|
||||
sample["disparity"] = cv2.resize(
|
||||
sample["disparity"],
|
||||
(width, height),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
|
||||
if "depth" in sample:
|
||||
sample["depth"] = cv2.resize(sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
if "semseg_mask" in sample:
|
||||
# sample["semseg_mask"] = cv2.resize(
|
||||
# sample["semseg_mask"], (width, height), interpolation=cv2.INTER_NEAREST
|
||||
# )
|
||||
sample["semseg_mask"] = F.interpolate(
|
||||
torch.from_numpy(sample["semseg_mask"]).float()[None, None, ...], (height, width), mode="nearest"
|
||||
).numpy()[0, 0]
|
||||
|
||||
if "mask" in sample:
|
||||
sample["mask"] = cv2.resize(
|
||||
sample["mask"].astype(np.float32),
|
||||
(width, height),
|
||||
interpolation=cv2.INTER_NEAREST,
|
||||
)
|
||||
# sample["mask"] = sample["mask"].astype(bool)
|
||||
|
||||
# print(sample['image'].shape, sample['depth'].shape)
|
||||
return sample
|
||||
|
||||
|
||||
class NormalizeImage(object):
|
||||
"""Normlize image by given mean and std."""
|
||||
|
||||
def __init__(self, mean, std):
|
||||
self.__mean = mean
|
||||
self.__std = std
|
||||
|
||||
def __call__(self, sample):
|
||||
sample["image"] = (sample["image"] - self.__mean) / self.__std
|
||||
|
||||
return sample
|
||||
|
||||
|
||||
class PrepareForNet(object):
|
||||
"""Prepare sample for usage as network input."""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __call__(self, sample):
|
||||
image = np.transpose(sample["image"], (2, 0, 1))
|
||||
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
|
||||
|
||||
if "mask" in sample:
|
||||
sample["mask"] = sample["mask"].astype(np.float32)
|
||||
sample["mask"] = np.ascontiguousarray(sample["mask"])
|
||||
|
||||
if "depth" in sample:
|
||||
depth = sample["depth"].astype(np.float32)
|
||||
sample["depth"] = np.ascontiguousarray(depth)
|
||||
|
||||
if "semseg_mask" in sample:
|
||||
sample["semseg_mask"] = sample["semseg_mask"].astype(np.float32)
|
||||
sample["semseg_mask"] = np.ascontiguousarray(sample["semseg_mask"])
|
||||
|
||||
return sample
|
@ -11,6 +11,7 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
|
||||
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
||||
from transformers import CLIPTokenizer
|
||||
|
||||
from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
|
||||
from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
|
||||
from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
|
||||
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
|
||||
@ -45,6 +46,7 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
|
||||
SpandrelImageToImageModel,
|
||||
GroundingDinoPipeline,
|
||||
SegmentAnythingPipeline,
|
||||
DepthAnythingPipeline,
|
||||
),
|
||||
):
|
||||
return model.calc_size()
|
||||
|
@ -200,6 +200,7 @@
|
||||
"delete": "Delete",
|
||||
"depthAnything": "Depth Anything",
|
||||
"depthAnythingDescription": "Depth map generation using the Depth Anything technique",
|
||||
"depthAnythingSmallV2": "Small V2",
|
||||
"depthMidas": "Depth (Midas)",
|
||||
"depthMidasDescription": "Depth map generation using Midas",
|
||||
"depthZoe": "Depth (Zoe)",
|
||||
|
@ -42,6 +42,7 @@ const DepthAnythingProcessor = (props: Props) => {
|
||||
|
||||
const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
|
||||
() => [
|
||||
{ label: t('controlnet.depthAnythingSmallV2'), value: 'small_v2' },
|
||||
{ label: t('controlnet.small'), value: 'small' },
|
||||
{ label: t('controlnet.base'), value: 'base' },
|
||||
{ label: t('controlnet.large'), value: 'large' },
|
||||
|
@ -94,7 +94,7 @@ export const CONTROLNET_PROCESSORS: ControlNetProcessorsDict = {
|
||||
buildDefaults: (baseModel?: BaseModelType) => ({
|
||||
id: 'depth_anything_image_processor',
|
||||
type: 'depth_anything_image_processor',
|
||||
model_size: 'small',
|
||||
model_size: 'small_v2',
|
||||
resolution: baseModel === 'sdxl' ? 1024 : 512,
|
||||
}),
|
||||
},
|
||||
|
@ -84,7 +84,7 @@ export type RequiredDepthAnythingImageProcessorInvocation = O.Required<
|
||||
'type' | 'model_size' | 'resolution' | 'offload'
|
||||
>;
|
||||
|
||||
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);
|
||||
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small', 'small_v2']);
|
||||
export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
|
||||
export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
|
||||
zDepthAnythingModelSize.safeParse(v).success;
|
||||
|
@ -24,6 +24,7 @@ export const DepthAnythingProcessor = memo(({ onChange, config }: Props) => {
|
||||
|
||||
const options: { label: string; value: DepthAnythingModelSize }[] = useMemo(
|
||||
() => [
|
||||
{ label: t('controlnet.depthAnythingSmallV2'), value: 'small_v2' },
|
||||
{ label: t('controlnet.small'), value: 'small' },
|
||||
{ label: t('controlnet.base'), value: 'base' },
|
||||
{ label: t('controlnet.large'), value: 'large' },
|
||||
|
@ -36,7 +36,7 @@ const zContentShuffleProcessorConfig = z.object({
|
||||
});
|
||||
export type ContentShuffleProcessorConfig = z.infer<typeof zContentShuffleProcessorConfig>;
|
||||
|
||||
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small']);
|
||||
const zDepthAnythingModelSize = z.enum(['large', 'base', 'small', 'small_v2']);
|
||||
export type DepthAnythingModelSize = z.infer<typeof zDepthAnythingModelSize>;
|
||||
export const isDepthAnythingModelSize = (v: unknown): v is DepthAnythingModelSize =>
|
||||
zDepthAnythingModelSize.safeParse(v).success;
|
||||
@ -298,7 +298,7 @@ export const CA_PROCESSOR_DATA: CAProcessorsData = {
|
||||
buildDefaults: () => ({
|
||||
id: 'depth_anything_image_processor',
|
||||
type: 'depth_anything_image_processor',
|
||||
model_size: 'small',
|
||||
model_size: 'small_v2',
|
||||
}),
|
||||
buildNode: (image, config) => ({
|
||||
...config,
|
||||
|
@ -3679,10 +3679,10 @@ export type components = {
|
||||
/**
|
||||
* Model Size
|
||||
* @description The size of the depth model to use
|
||||
* @default small
|
||||
* @default small_v2
|
||||
* @enum {string}
|
||||
*/
|
||||
model_size?: "large" | "base" | "small";
|
||||
model_size?: "large" | "base" | "small" | "small_v2";
|
||||
/**
|
||||
* Resolution
|
||||
* @description Pixel resolution for output image
|
||||
@ -7306,147 +7306,147 @@ export type components = {
|
||||
project_id: string | null;
|
||||
};
|
||||
InvocationOutputMap: {
|
||||
dynamic_prompt: components["schemas"]["StringCollectionOutput"];
|
||||
img_ilerp: components["schemas"]["ImageOutput"];
|
||||
random_range: components["schemas"]["IntegerCollectionOutput"];
|
||||
ideal_size: components["schemas"]["IdealSizeOutput"];
|
||||
lblend: components["schemas"]["LatentsOutput"];
|
||||
tiled_multi_diffusion_denoise_latents: components["schemas"]["LatentsOutput"];
|
||||
color_correct: components["schemas"]["ImageOutput"];
|
||||
sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"];
|
||||
img_hue_adjust: components["schemas"]["ImageOutput"];
|
||||
depth_anything_image_processor: components["schemas"]["ImageOutput"];
|
||||
lscale: components["schemas"]["LatentsOutput"];
|
||||
infill_patchmatch: components["schemas"]["ImageOutput"];
|
||||
mask_edge: components["schemas"]["ImageOutput"];
|
||||
spandrel_image_to_image: components["schemas"]["ImageOutput"];
|
||||
round_float: components["schemas"]["FloatOutput"];
|
||||
color: components["schemas"]["ColorOutput"];
|
||||
img_channel_offset: components["schemas"]["ImageOutput"];
|
||||
create_denoise_mask: components["schemas"]["DenoiseMaskOutput"];
|
||||
string_join_three: components["schemas"]["StringOutput"];
|
||||
unsharp_mask: components["schemas"]["ImageOutput"];
|
||||
canvas_paste_back: components["schemas"]["ImageOutput"];
|
||||
string_collection: components["schemas"]["StringCollectionOutput"];
|
||||
face_mask_detection: components["schemas"]["FaceMaskOutput"];
|
||||
color_map_image_processor: components["schemas"]["ImageOutput"];
|
||||
normalbae_image_processor: components["schemas"]["ImageOutput"];
|
||||
create_gradient_mask: components["schemas"]["GradientMaskOutput"];
|
||||
vae_loader: components["schemas"]["VAEOutput"];
|
||||
integer: components["schemas"]["IntegerOutput"];
|
||||
image_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
esrgan: components["schemas"]["ImageOutput"];
|
||||
mlsd_image_processor: components["schemas"]["ImageOutput"];
|
||||
blank_image: components["schemas"]["ImageOutput"];
|
||||
zoe_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
merge_metadata: components["schemas"]["MetadataOutput"];
|
||||
infill_rgba: components["schemas"]["ImageOutput"];
|
||||
mask_from_id: components["schemas"]["ImageOutput"];
|
||||
string_split: components["schemas"]["String2Output"];
|
||||
boolean_collection: components["schemas"]["BooleanCollectionOutput"];
|
||||
img_resize: components["schemas"]["ImageOutput"];
|
||||
float_collection: components["schemas"]["FloatCollectionOutput"];
|
||||
img_conv: components["schemas"]["ImageOutput"];
|
||||
boolean: components["schemas"]["BooleanOutput"];
|
||||
mul: components["schemas"]["IntegerOutput"];
|
||||
img_watermark: components["schemas"]["ImageOutput"];
|
||||
heuristic_resize: components["schemas"]["ImageOutput"];
|
||||
tile_image_processor: components["schemas"]["ImageOutput"];
|
||||
face_identifier: components["schemas"]["ImageOutput"];
|
||||
denoise_latents: components["schemas"]["LatentsOutput"];
|
||||
img_lerp: components["schemas"]["ImageOutput"];
|
||||
pair_tile_image: components["schemas"]["PairTileImageOutput"];
|
||||
add: components["schemas"]["IntegerOutput"];
|
||||
range_of_size: components["schemas"]["IntegerCollectionOutput"];
|
||||
img_scale: components["schemas"]["ImageOutput"];
|
||||
latents_collection: components["schemas"]["LatentsCollectionOutput"];
|
||||
range: components["schemas"]["IntegerCollectionOutput"];
|
||||
prompt_from_file: components["schemas"]["StringCollectionOutput"];
|
||||
sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"];
|
||||
canvas_paste_back: components["schemas"]["ImageOutput"];
|
||||
mul: components["schemas"]["IntegerOutput"];
|
||||
infill_rgba: components["schemas"]["ImageOutput"];
|
||||
metadata_item: components["schemas"]["MetadataItemOutput"];
|
||||
canny_image_processor: components["schemas"]["ImageOutput"];
|
||||
float_math: components["schemas"]["FloatOutput"];
|
||||
alpha_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
lineart_anime_image_processor: components["schemas"]["ImageOutput"];
|
||||
string: components["schemas"]["StringOutput"];
|
||||
merge_tiles_to_image: components["schemas"]["ImageOutput"];
|
||||
img_ilerp: components["schemas"]["ImageOutput"];
|
||||
sdxl_lora_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
t2i_adapter: components["schemas"]["T2IAdapterOutput"];
|
||||
face_mask_detection: components["schemas"]["FaceMaskOutput"];
|
||||
img_chan: components["schemas"]["ImageOutput"];
|
||||
img_watermark: components["schemas"]["ImageOutput"];
|
||||
l2i: components["schemas"]["ImageOutput"];
|
||||
create_gradient_mask: components["schemas"]["GradientMaskOutput"];
|
||||
div: components["schemas"]["IntegerOutput"];
|
||||
i2l: components["schemas"]["LatentsOutput"];
|
||||
integer: components["schemas"]["IntegerOutput"];
|
||||
sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
spandrel_image_to_image_autoscale: components["schemas"]["ImageOutput"];
|
||||
iterate: components["schemas"]["IterateInvocationOutput"];
|
||||
img_resize: components["schemas"]["ImageOutput"];
|
||||
mask_edge: components["schemas"]["ImageOutput"];
|
||||
core_metadata: components["schemas"]["MetadataOutput"];
|
||||
lresize: components["schemas"]["LatentsOutput"];
|
||||
lblend: components["schemas"]["LatentsOutput"];
|
||||
model_identifier: components["schemas"]["ModelIdentifierOutput"];
|
||||
infill_tile: components["schemas"]["ImageOutput"];
|
||||
controlnet: components["schemas"]["ControlOutput"];
|
||||
dw_openpose_image_processor: components["schemas"]["ImageOutput"];
|
||||
img_chan: components["schemas"]["ImageOutput"];
|
||||
conditioning_collection: components["schemas"]["ConditioningCollectionOutput"];
|
||||
lresize: components["schemas"]["LatentsOutput"];
|
||||
sdxl_refiner_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
noise: components["schemas"]["NoiseOutput"];
|
||||
ip_adapter: components["schemas"]["IPAdapterOutput"];
|
||||
sdxl_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
sub: components["schemas"]["IntegerOutput"];
|
||||
seamless: components["schemas"]["SeamlessModeOutput"];
|
||||
div: components["schemas"]["IntegerOutput"];
|
||||
img_channel_multiply: components["schemas"]["ImageOutput"];
|
||||
compel: components["schemas"]["ConditioningOutput"];
|
||||
freeu: components["schemas"]["UNetOutput"];
|
||||
conditioning: components["schemas"]["ConditioningOutput"];
|
||||
rand_int: components["schemas"]["IntegerOutput"];
|
||||
rectangle_mask: components["schemas"]["MaskOutput"];
|
||||
string_split_neg: components["schemas"]["StringPosNegOutput"];
|
||||
collect: components["schemas"]["CollectInvocationOutput"];
|
||||
float_range: components["schemas"]["FloatCollectionOutput"];
|
||||
string_join: components["schemas"]["StringOutput"];
|
||||
metadata_item: components["schemas"]["MetadataItemOutput"];
|
||||
l2i: components["schemas"]["ImageOutput"];
|
||||
img_paste: components["schemas"]["ImageOutput"];
|
||||
calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"];
|
||||
calculate_image_tiles_min_overlap: components["schemas"]["CalculateImageTilesOutput"];
|
||||
show_image: components["schemas"]["ImageOutput"];
|
||||
invert_tensor_mask: components["schemas"]["MaskOutput"];
|
||||
spandrel_image_to_image_autoscale: components["schemas"]["ImageOutput"];
|
||||
face_off: components["schemas"]["FaceOffOutput"];
|
||||
image_collection: components["schemas"]["ImageCollectionOutput"];
|
||||
i2l: components["schemas"]["LatentsOutput"];
|
||||
mediapipe_face_processor: components["schemas"]["ImageOutput"];
|
||||
scheduler: components["schemas"]["SchedulerOutput"];
|
||||
content_shuffle_image_processor: components["schemas"]["ImageOutput"];
|
||||
lineart_anime_image_processor: components["schemas"]["ImageOutput"];
|
||||
float_to_int: components["schemas"]["IntegerOutput"];
|
||||
pidi_image_processor: components["schemas"]["ImageOutput"];
|
||||
leres_image_processor: components["schemas"]["ImageOutput"];
|
||||
lora_selector: components["schemas"]["LoRASelectorOutput"];
|
||||
depth_anything_image_processor: components["schemas"]["ImageOutput"];
|
||||
img_pad_crop: components["schemas"]["ImageOutput"];
|
||||
calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"];
|
||||
prompt_from_file: components["schemas"]["StringCollectionOutput"];
|
||||
t2i_adapter: components["schemas"]["T2IAdapterOutput"];
|
||||
lora_collection_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
img_crop: components["schemas"]["ImageOutput"];
|
||||
img_mul: components["schemas"]["ImageOutput"];
|
||||
sdxl_refiner_model_loader: components["schemas"]["SDXLRefinerModelLoaderOutput"];
|
||||
tomask: components["schemas"]["ImageOutput"];
|
||||
tile_to_properties: components["schemas"]["TileToPropertiesOutput"];
|
||||
string_replace: components["schemas"]["StringOutput"];
|
||||
merge_tiles_to_image: components["schemas"]["ImageOutput"];
|
||||
integer_collection: components["schemas"]["IntegerCollectionOutput"];
|
||||
crop_latents: components["schemas"]["LatentsOutput"];
|
||||
lineart_image_processor: components["schemas"]["ImageOutput"];
|
||||
midas_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
segment_anything_processor: components["schemas"]["ImageOutput"];
|
||||
save_image: components["schemas"]["ImageOutput"];
|
||||
infill_cv2: components["schemas"]["ImageOutput"];
|
||||
cv_inpaint: components["schemas"]["ImageOutput"];
|
||||
lora_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
latents: components["schemas"]["LatentsOutput"];
|
||||
model_identifier: components["schemas"]["ModelIdentifierOutput"];
|
||||
step_param_easing: components["schemas"]["FloatCollectionOutput"];
|
||||
metadata: components["schemas"]["MetadataOutput"];
|
||||
range: components["schemas"]["IntegerCollectionOutput"];
|
||||
img_blur: components["schemas"]["ImageOutput"];
|
||||
core_metadata: components["schemas"]["MetadataOutput"];
|
||||
mask_combine: components["schemas"]["ImageOutput"];
|
||||
sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
infill_lama: components["schemas"]["ImageOutput"];
|
||||
iterate: components["schemas"]["IterateInvocationOutput"];
|
||||
hed_image_processor: components["schemas"]["ImageOutput"];
|
||||
image_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
merge_metadata: components["schemas"]["MetadataOutput"];
|
||||
tile_image_processor: components["schemas"]["ImageOutput"];
|
||||
mask_from_id: components["schemas"]["ImageOutput"];
|
||||
pidi_image_processor: components["schemas"]["ImageOutput"];
|
||||
img_mul: components["schemas"]["ImageOutput"];
|
||||
cv_inpaint: components["schemas"]["ImageOutput"];
|
||||
latents: components["schemas"]["LatentsOutput"];
|
||||
conditioning: components["schemas"]["ConditioningOutput"];
|
||||
img_channel_multiply: components["schemas"]["ImageOutput"];
|
||||
seamless: components["schemas"]["SeamlessModeOutput"];
|
||||
string_collection: components["schemas"]["StringCollectionOutput"];
|
||||
img_crop: components["schemas"]["ImageOutput"];
|
||||
lora_selector: components["schemas"]["LoRASelectorOutput"];
|
||||
infill_patchmatch: components["schemas"]["ImageOutput"];
|
||||
tiled_multi_diffusion_denoise_latents: components["schemas"]["LatentsOutput"];
|
||||
img_hue_adjust: components["schemas"]["ImageOutput"];
|
||||
sub: components["schemas"]["IntegerOutput"];
|
||||
content_shuffle_image_processor: components["schemas"]["ImageOutput"];
|
||||
boolean_collection: components["schemas"]["BooleanCollectionOutput"];
|
||||
img_pad_crop: components["schemas"]["ImageOutput"];
|
||||
latents_collection: components["schemas"]["LatentsCollectionOutput"];
|
||||
segment_anything_processor: components["schemas"]["ImageOutput"];
|
||||
dw_openpose_image_processor: components["schemas"]["ImageOutput"];
|
||||
calculate_image_tiles: components["schemas"]["CalculateImageTilesOutput"];
|
||||
tomask: components["schemas"]["ImageOutput"];
|
||||
img_nsfw: components["schemas"]["ImageOutput"];
|
||||
main_model_loader: components["schemas"]["ModelLoaderOutput"];
|
||||
string: components["schemas"]["StringOutput"];
|
||||
sdxl_lora_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
infill_lama: components["schemas"]["ImageOutput"];
|
||||
midas_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
image: components["schemas"]["ImageOutput"];
|
||||
rand_float: components["schemas"]["FloatOutput"];
|
||||
float: components["schemas"]["FloatOutput"];
|
||||
alpha_mask_to_tensor: components["schemas"]["MaskOutput"];
|
||||
img_blur: components["schemas"]["ImageOutput"];
|
||||
rectangle_mask: components["schemas"]["MaskOutput"];
|
||||
lora_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
mediapipe_face_processor: components["schemas"]["ImageOutput"];
|
||||
create_denoise_mask: components["schemas"]["DenoiseMaskOutput"];
|
||||
integer_math: components["schemas"]["IntegerOutput"];
|
||||
metadata: components["schemas"]["MetadataOutput"];
|
||||
img_lerp: components["schemas"]["ImageOutput"];
|
||||
color_correct: components["schemas"]["ImageOutput"];
|
||||
rand_int: components["schemas"]["IntegerOutput"];
|
||||
clip_skip: components["schemas"]["CLIPSkipInvocationOutput"];
|
||||
heuristic_resize: components["schemas"]["ImageOutput"];
|
||||
float_collection: components["schemas"]["FloatCollectionOutput"];
|
||||
crop_latents: components["schemas"]["LatentsOutput"];
|
||||
step_param_easing: components["schemas"]["FloatCollectionOutput"];
|
||||
calculate_image_tiles_min_overlap: components["schemas"]["CalculateImageTilesOutput"];
|
||||
noise: components["schemas"]["NoiseOutput"];
|
||||
sdxl_model_loader: components["schemas"]["SDXLModelLoaderOutput"];
|
||||
collect: components["schemas"]["CollectInvocationOutput"];
|
||||
vae_loader: components["schemas"]["VAEOutput"];
|
||||
color_map_image_processor: components["schemas"]["ImageOutput"];
|
||||
show_image: components["schemas"]["ImageOutput"];
|
||||
face_off: components["schemas"]["FaceOffOutput"];
|
||||
float: components["schemas"]["FloatOutput"];
|
||||
denoise_latents: components["schemas"]["LatentsOutput"];
|
||||
string_join_three: components["schemas"]["StringOutput"];
|
||||
img_scale: components["schemas"]["ImageOutput"];
|
||||
zoe_depth_image_processor: components["schemas"]["ImageOutput"];
|
||||
esrgan: components["schemas"]["ImageOutput"];
|
||||
main_model_loader: components["schemas"]["ModelLoaderOutput"];
|
||||
infill_cv2: components["schemas"]["ImageOutput"];
|
||||
mlsd_image_processor: components["schemas"]["ImageOutput"];
|
||||
boolean: components["schemas"]["BooleanOutput"];
|
||||
image_collection: components["schemas"]["ImageCollectionOutput"];
|
||||
random_range: components["schemas"]["IntegerCollectionOutput"];
|
||||
save_image: components["schemas"]["ImageOutput"];
|
||||
string_split: components["schemas"]["String2Output"];
|
||||
tile_to_properties: components["schemas"]["TileToPropertiesOutput"];
|
||||
float_to_int: components["schemas"]["IntegerOutput"];
|
||||
sdxl_lora_collection_loader: components["schemas"]["SDXLLoRALoaderOutput"];
|
||||
conditioning_collection: components["schemas"]["ConditioningCollectionOutput"];
|
||||
string_join: components["schemas"]["StringOutput"];
|
||||
range_of_size: components["schemas"]["IntegerCollectionOutput"];
|
||||
float_range: components["schemas"]["FloatCollectionOutput"];
|
||||
integer_collection: components["schemas"]["IntegerCollectionOutput"];
|
||||
scheduler: components["schemas"]["SchedulerOutput"];
|
||||
ideal_size: components["schemas"]["IdealSizeOutput"];
|
||||
freeu: components["schemas"]["UNetOutput"];
|
||||
lineart_image_processor: components["schemas"]["ImageOutput"];
|
||||
string_split_neg: components["schemas"]["StringPosNegOutput"];
|
||||
rand_float: components["schemas"]["FloatOutput"];
|
||||
ip_adapter: components["schemas"]["IPAdapterOutput"];
|
||||
calculate_image_tiles_even_split: components["schemas"]["CalculateImageTilesOutput"];
|
||||
pair_tile_image: components["schemas"]["PairTileImageOutput"];
|
||||
round_float: components["schemas"]["FloatOutput"];
|
||||
spandrel_image_to_image: components["schemas"]["ImageOutput"];
|
||||
img_paste: components["schemas"]["ImageOutput"];
|
||||
compel: components["schemas"]["ConditioningOutput"];
|
||||
leres_image_processor: components["schemas"]["ImageOutput"];
|
||||
dynamic_prompt: components["schemas"]["StringCollectionOutput"];
|
||||
unsharp_mask: components["schemas"]["ImageOutput"];
|
||||
color: components["schemas"]["ColorOutput"];
|
||||
img_channel_offset: components["schemas"]["ImageOutput"];
|
||||
sdxl_compel_prompt: components["schemas"]["ConditioningOutput"];
|
||||
hed_image_processor: components["schemas"]["ImageOutput"];
|
||||
invert_tensor_mask: components["schemas"]["MaskOutput"];
|
||||
string_replace: components["schemas"]["StringOutput"];
|
||||
blank_image: components["schemas"]["ImageOutput"];
|
||||
lora_collection_loader: components["schemas"]["LoRALoaderOutput"];
|
||||
normalbae_image_processor: components["schemas"]["ImageOutput"];
|
||||
face_identifier: components["schemas"]["ImageOutput"];
|
||||
img_conv: components["schemas"]["ImageOutput"];
|
||||
float_math: components["schemas"]["FloatOutput"];
|
||||
};
|
||||
/**
|
||||
* InvocationStartedEvent
|
||||
|
Loading…
Reference in New Issue
Block a user