Merge branch 'main' into feat/ui/upscale

This commit is contained in:
Lincoln Stein 2023-07-18 19:18:55 -04:00 committed by GitHub
commit 893e199677
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 731 additions and 202 deletions

View File

@ -4,17 +4,12 @@ from typing import Literal
import numpy as np
from pydantic import Field, validator
from invokeai.app.models.image import ImageField
from invokeai.app.models.image import ImageField
from invokeai.app.util.misc import SEED_MAX, get_random_seed
from .baseinvocation import (
BaseInvocation,
InvocationConfig,
InvocationContext,
BaseInvocationOutput,
UIConfig,
)
from .baseinvocation import (BaseInvocation, BaseInvocationOutput,
InvocationConfig, InvocationContext, UIConfig)
class IntCollectionOutput(BaseInvocationOutput):
@ -32,7 +27,8 @@ class FloatCollectionOutput(BaseInvocationOutput):
type: Literal["float_collection"] = "float_collection"
# Outputs
collection: list[float] = Field(default=[], description="The float collection")
collection: list[float] = Field(
default=[], description="The float collection")
class ImageCollectionOutput(BaseInvocationOutput):
@ -41,7 +37,8 @@ class ImageCollectionOutput(BaseInvocationOutput):
type: Literal["image_collection"] = "image_collection"
# Outputs
collection: list[ImageField] = Field(default=[], description="The output images")
collection: list[ImageField] = Field(
default=[], description="The output images")
class Config:
schema_extra = {"required": ["type", "collection"]}
@ -57,6 +54,14 @@ class RangeInvocation(BaseInvocation):
stop: int = Field(default=10, description="The stop of the range")
step: int = Field(default=1, description="The step of the range")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Range",
"tags": ["range", "integer", "collection"]
},
}
@validator("stop")
def stop_gt_start(cls, v, values):
if "start" in values and v <= values["start"]:
@ -79,10 +84,20 @@ class RangeOfSizeInvocation(BaseInvocation):
size: int = Field(default=1, description="The number of values")
step: int = Field(default=1, description="The step of the range")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Sized Range",
"tags": ["range", "integer", "size", "collection"]
},
}
def invoke(self, context: InvocationContext) -> IntCollectionOutput:
return IntCollectionOutput(
collection=list(range(self.start, self.start + self.size, self.step))
)
collection=list(
range(
self.start, self.start + self.size,
self.step)))
class RandomRangeInvocation(BaseInvocation):
@ -103,11 +118,21 @@ class RandomRangeInvocation(BaseInvocation):
default_factory=get_random_seed,
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Random Range",
"tags": ["range", "integer", "random", "collection"]
},
}
def invoke(self, context: InvocationContext) -> IntCollectionOutput:
rng = np.random.default_rng(self.seed)
return IntCollectionOutput(
collection=list(rng.integers(low=self.low, high=self.high, size=self.size))
)
collection=list(
rng.integers(
low=self.low, high=self.high,
size=self.size)))
class ImageCollectionInvocation(BaseInvocation):
@ -121,6 +146,7 @@ class ImageCollectionInvocation(BaseInvocation):
default=[], description="The image collection to load"
)
# fmt: on
def invoke(self, context: InvocationContext) -> ImageCollectionOutput:
return ImageCollectionOutput(collection=self.images)
@ -128,6 +154,7 @@ class ImageCollectionInvocation(BaseInvocation):
schema_extra = {
"ui": {
"type_hints": {
"title": "Image Collection",
"images": "image_collection",
}
},

View File

@ -148,6 +148,8 @@ class CompelInvocation(BaseInvocation):
cross_attention_control_args=options.get(
"cross_attention_control", None),)
c = c.detach().to("cpu")
conditioning_data = ConditioningFieldData(
conditionings=[
BasicConditioningInfo(
@ -230,6 +232,10 @@ class SDXLPromptInvocationBase:
del tokenizer_info
del text_encoder_info
c = c.detach().to("cpu")
if c_pooled is not None:
c_pooled = c_pooled.detach().to("cpu")
return c, c_pooled, None
def run_clip_compel(self, context, clip_field, prompt, get_pooled):
@ -306,6 +312,10 @@ class SDXLPromptInvocationBase:
del tokenizer_info
del text_encoder_info
c = c.detach().to("cpu")
if c_pooled is not None:
c_pooled = c_pooled.detach().to("cpu")
return c, c_pooled, ec
class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
@ -321,8 +331,8 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
crop_left: int = Field(0, description="")
target_width: int = Field(1024, description="")
target_height: int = Field(1024, description="")
clip1: ClipField = Field(None, description="Clip to use")
clip2: ClipField = Field(None, description="Clip to use")
clip: ClipField = Field(None, description="Clip to use")
clip2: ClipField = Field(None, description="Clip2 to use")
# Schema customisation
class Config(InvocationConfig):
@ -338,7 +348,7 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CompelOutput:
c1, c1_pooled, ec1 = self.run_clip_compel(context, self.clip1, self.prompt, False)
c1, c1_pooled, ec1 = self.run_clip_compel(context, self.clip, self.prompt, False)
if self.style.strip() == "":
c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.prompt, True)
else:
@ -441,8 +451,8 @@ class SDXLRawPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
crop_left: int = Field(0, description="")
target_width: int = Field(1024, description="")
target_height: int = Field(1024, description="")
clip1: ClipField = Field(None, description="Clip to use")
clip2: ClipField = Field(None, description="Clip to use")
clip: ClipField = Field(None, description="Clip to use")
clip2: ClipField = Field(None, description="Clip2 to use")
# Schema customisation
class Config(InvocationConfig):
@ -458,7 +468,7 @@ class SDXLRawPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CompelOutput:
c1, c1_pooled, ec1 = self.run_clip_raw(context, self.clip1, self.prompt, False)
c1, c1_pooled, ec1 = self.run_clip_raw(context, self.clip, self.prompt, False)
if self.style.strip() == "":
c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.prompt, True)
else:
@ -561,6 +571,14 @@ class ClipSkipInvocation(BaseInvocation):
clip: ClipField = Field(None, description="Clip to use")
skipped_layers: int = Field(0, description="Number of layers to skip in text_encoder")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "CLIP Skip",
"tags": ["clip", "skip"]
},
}
def invoke(self, context: InvocationContext) -> ClipSkipInvocationOutput:
self.clip.skipped_layers += self.skipped_layers
return ClipSkipInvocationOutput(

View File

@ -1,43 +1,25 @@
# Invocations for ControlNet image preprocessors
# initial implementation by Gregg Helt, 2023
# heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux
from builtins import float, bool
from builtins import bool, float
from typing import Dict, List, Literal, Optional, Union
import cv2
import numpy as np
from typing import Literal, Optional, Union, List, Dict
from controlnet_aux import (CannyDetector, ContentShuffleDetector, HEDdetector,
LeresDetector, LineartAnimeDetector,
LineartDetector, MediapipeFaceDetector,
MidasDetector, MLSDdetector, NormalBaeDetector,
OpenposeDetector, PidiNetDetector, SamDetector,
ZoeDetector)
from controlnet_aux.util import HWC3, ade_palette
from PIL import Image
from pydantic import BaseModel, Field, validator
from ...backend.model_management import BaseModelType, ModelType
from ..models.image import ImageField, ImageCategory, ResourceOrigin
from .baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
InvocationContext,
InvocationConfig,
)
from controlnet_aux import (
CannyDetector,
HEDdetector,
LineartDetector,
LineartAnimeDetector,
MidasDetector,
MLSDdetector,
NormalBaeDetector,
OpenposeDetector,
PidiNetDetector,
ContentShuffleDetector,
ZoeDetector,
MediapipeFaceDetector,
SamDetector,
LeresDetector,
)
from controlnet_aux.util import HWC3, ade_palette
from ..models.image import ImageCategory, ImageField, ResourceOrigin
from .baseinvocation import (BaseInvocation, BaseInvocationOutput,
InvocationConfig, InvocationContext)
from .image import ImageOutput, PILInvocationConfig
CONTROLNET_DEFAULT_MODELS = [
@ -75,33 +57,34 @@ CONTROLNET_DEFAULT_MODELS = [
"lllyasviel/control_v11e_sd15_ip2p",
"lllyasviel/control_v11f1e_sd15_tile",
#################################################
# thibaud sd v2.1 models (ControlNet v1.0? or v1.1?
##################################################
"thibaud/controlnet-sd21-openpose-diffusers",
"thibaud/controlnet-sd21-canny-diffusers",
"thibaud/controlnet-sd21-depth-diffusers",
"thibaud/controlnet-sd21-scribble-diffusers",
"thibaud/controlnet-sd21-hed-diffusers",
"thibaud/controlnet-sd21-zoedepth-diffusers",
"thibaud/controlnet-sd21-color-diffusers",
"thibaud/controlnet-sd21-openposev2-diffusers",
"thibaud/controlnet-sd21-lineart-diffusers",
"thibaud/controlnet-sd21-normalbae-diffusers",
"thibaud/controlnet-sd21-ade20k-diffusers",
#################################################
# thibaud sd v2.1 models (ControlNet v1.0? or v1.1?
##################################################
"thibaud/controlnet-sd21-openpose-diffusers",
"thibaud/controlnet-sd21-canny-diffusers",
"thibaud/controlnet-sd21-depth-diffusers",
"thibaud/controlnet-sd21-scribble-diffusers",
"thibaud/controlnet-sd21-hed-diffusers",
"thibaud/controlnet-sd21-zoedepth-diffusers",
"thibaud/controlnet-sd21-color-diffusers",
"thibaud/controlnet-sd21-openposev2-diffusers",
"thibaud/controlnet-sd21-lineart-diffusers",
"thibaud/controlnet-sd21-normalbae-diffusers",
"thibaud/controlnet-sd21-ade20k-diffusers",
##############################################
# ControlNetMediaPipeface, ControlNet v1.1
##############################################
# ["CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"], # SD 1.5
# diffusion_sd15 needs to be passed to from_pretrained() as subfolder arg
# hacked t2l to split to model & subfolder if format is "model,subfolder"
"CrucibleAI/ControlNetMediaPipeFace,diffusion_sd15", # SD 1.5
"CrucibleAI/ControlNetMediaPipeFace", # SD 2.1?
##############################################
# ControlNetMediaPipeface, ControlNet v1.1
##############################################
# ["CrucibleAI/ControlNetMediaPipeFace", "diffusion_sd15"], # SD 1.5
# diffusion_sd15 needs to be passed to from_pretrained() as subfolder arg
# hacked t2l to split to model & subfolder if format is "model,subfolder"
"CrucibleAI/ControlNetMediaPipeFace,diffusion_sd15", # SD 1.5
"CrucibleAI/ControlNetMediaPipeFace", # SD 2.1?
]
CONTROLNET_NAME_VALUES = Literal[tuple(CONTROLNET_DEFAULT_MODELS)]
CONTROLNET_MODE_VALUES = Literal[tuple(["balanced", "more_prompt", "more_control", "unbalanced"])]
CONTROLNET_MODE_VALUES = Literal[tuple(
["balanced", "more_prompt", "more_control", "unbalanced"])]
# crop and fill options not ready yet
# CONTROLNET_RESIZE_VALUES = Literal[tuple(["just_resize", "crop_resize", "fill_resize"])]
@ -112,16 +95,22 @@ class ControlNetModelField(BaseModel):
model_name: str = Field(description="Name of the ControlNet model")
base_model: BaseModelType = Field(description="Base model")
class ControlField(BaseModel):
image: ImageField = Field(default=None, description="The control image")
control_model: Optional[ControlNetModelField] = Field(default=None, description="The ControlNet model to use")
control_model: Optional[ControlNetModelField] = Field(
default=None, description="The ControlNet model to use")
# control_weight: Optional[float] = Field(default=1, description="weight given to controlnet")
control_weight: Union[float, List[float]] = Field(default=1, description="The weight given to the ControlNet")
begin_step_percent: float = Field(default=0, ge=0, le=1,
description="When the ControlNet is first applied (% of total steps)")
end_step_percent: float = Field(default=1, ge=0, le=1,
description="When the ControlNet is last applied (% of total steps)")
control_mode: CONTROLNET_MODE_VALUES = Field(default="balanced", description="The control mode to use")
control_weight: Union[float, List[float]] = Field(
default=1, description="The weight given to the ControlNet")
begin_step_percent: float = Field(
default=0, ge=0, le=1,
description="When the ControlNet is first applied (% of total steps)")
end_step_percent: float = Field(
default=1, ge=0, le=1,
description="When the ControlNet is last applied (% of total steps)")
control_mode: CONTROLNET_MODE_VALUES = Field(
default="balanced", description="The control mode to use")
# resize_mode: CONTROLNET_RESIZE_VALUES = Field(default="just_resize", description="The resize mode to use")
@validator("control_weight")
@ -130,11 +119,13 @@ class ControlField(BaseModel):
if isinstance(v, list):
for i in v:
if i < -1 or i > 2:
raise ValueError('Control weights must be within -1 to 2 range')
raise ValueError(
'Control weights must be within -1 to 2 range')
else:
if v < -1 or v > 2:
raise ValueError('Control weights must be within -1 to 2 range')
return v
class Config:
schema_extra = {
"required": ["image", "control_model", "control_weight", "begin_step_percent", "end_step_percent"],
@ -175,13 +166,14 @@ class ControlNetInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["latents"],
"title": "ControlNet",
"tags": ["controlnet", "latents"],
"type_hints": {
"model": "model",
"control": "control",
# "cfg_scale": "float",
"cfg_scale": "number",
"control_weight": "float",
"model": "model",
"control": "control",
# "cfg_scale": "float",
"cfg_scale": "number",
"control_weight": "float",
}
},
}
@ -208,6 +200,13 @@ class ImageProcessorInvocation(BaseInvocation, PILInvocationConfig):
image: ImageField = Field(default=None, description="The image to process")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Image Processor",
"tags": ["image", "processor"]
},
}
def run_processor(self, image):
# superclass just passes through image without processing
@ -239,14 +238,15 @@ class ImageProcessorInvocation(BaseInvocation, PILInvocationConfig):
return ImageOutput(
image=processed_image_field,
# width=processed_image.width,
width = image_dto.width,
width=image_dto.width,
# height=processed_image.height,
height = image_dto.height,
height=image_dto.height,
# mode=processed_image.mode,
)
class CannyImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class CannyImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Canny edge detection for ControlNet"""
# fmt: off
type: Literal["canny_image_processor"] = "canny_image_processor"
@ -255,13 +255,23 @@ class CannyImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfi
high_threshold: int = Field(default=200, ge=0, le=255, description="The high threshold of the Canny pixel gradient (0-255)")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Canny Processor",
"tags": ["controlnet", "canny", "image", "processor"]
},
}
def run_processor(self, image):
canny_processor = CannyDetector()
processed_image = canny_processor(image, self.low_threshold, self.high_threshold)
processed_image = canny_processor(
image, self.low_threshold, self.high_threshold)
return processed_image
class HedImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class HedImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies HED edge detection to image"""
# fmt: off
type: Literal["hed_image_processor"] = "hed_image_processor"
@ -273,6 +283,14 @@ class HedImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig)
scribble: bool = Field(default=False, description="Whether to use scribble mode")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Softedge(HED) Processor",
"tags": ["controlnet", "softedge", "hed", "image", "processor"]
},
}
def run_processor(self, image):
hed_processor = HEDdetector.from_pretrained("lllyasviel/Annotators")
processed_image = hed_processor(image,
@ -285,7 +303,8 @@ class HedImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig)
return processed_image
class LineartImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class LineartImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies line art processing to image"""
# fmt: off
type: Literal["lineart_image_processor"] = "lineart_image_processor"
@ -295,16 +314,25 @@ class LineartImageProcessorInvocation(ImageProcessorInvocation, PILInvocationCon
coarse: bool = Field(default=False, description="Whether to use coarse mode")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Lineart Processor",
"tags": ["controlnet", "lineart", "image", "processor"]
},
}
def run_processor(self, image):
lineart_processor = LineartDetector.from_pretrained("lllyasviel/Annotators")
processed_image = lineart_processor(image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
coarse=self.coarse)
lineart_processor = LineartDetector.from_pretrained(
"lllyasviel/Annotators")
processed_image = lineart_processor(
image, detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution, coarse=self.coarse)
return processed_image
class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class LineartAnimeImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies line art anime processing to image"""
# fmt: off
type: Literal["lineart_anime_image_processor"] = "lineart_anime_image_processor"
@ -313,8 +341,17 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation, PILInvocati
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Lineart Anime Processor",
"tags": ["controlnet", "lineart", "anime", "image", "processor"]
},
}
def run_processor(self, image):
processor = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
processor = LineartAnimeDetector.from_pretrained(
"lllyasviel/Annotators")
processed_image = processor(image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
@ -322,7 +359,8 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation, PILInvocati
return processed_image
class OpenposeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class OpenposeImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies Openpose processing to image"""
# fmt: off
type: Literal["openpose_image_processor"] = "openpose_image_processor"
@ -332,17 +370,26 @@ class OpenposeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Openpose Processor",
"tags": ["controlnet", "openpose", "image", "processor"]
},
}
def run_processor(self, image):
openpose_processor = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
processed_image = openpose_processor(image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
hand_and_face=self.hand_and_face,
)
openpose_processor = OpenposeDetector.from_pretrained(
"lllyasviel/Annotators")
processed_image = openpose_processor(
image, detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
hand_and_face=self.hand_and_face,)
return processed_image
class MidasDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class MidasDepthImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies Midas depth processing to image"""
# fmt: off
type: Literal["midas_depth_image_processor"] = "midas_depth_image_processor"
@ -353,6 +400,14 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocation
# depth_and_normal: bool = Field(default=False, description="whether to use depth and normal mode")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Midas (Depth) Processor",
"tags": ["controlnet", "midas", "depth", "image", "processor"]
},
}
def run_processor(self, image):
midas_processor = MidasDetector.from_pretrained("lllyasviel/Annotators")
processed_image = midas_processor(image,
@ -364,7 +419,8 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocation
return processed_image
class NormalbaeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class NormalbaeImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies NormalBae processing to image"""
# fmt: off
type: Literal["normalbae_image_processor"] = "normalbae_image_processor"
@ -373,15 +429,25 @@ class NormalbaeImageProcessorInvocation(ImageProcessorInvocation, PILInvocationC
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Normal BAE Processor",
"tags": ["controlnet", "normal", "bae", "image", "processor"]
},
}
def run_processor(self, image):
normalbae_processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
processed_image = normalbae_processor(image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution)
normalbae_processor = NormalBaeDetector.from_pretrained(
"lllyasviel/Annotators")
processed_image = normalbae_processor(
image, detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution)
return processed_image
class MlsdImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class MlsdImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies MLSD processing to image"""
# fmt: off
type: Literal["mlsd_image_processor"] = "mlsd_image_processor"
@ -392,17 +458,25 @@ class MlsdImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig
thr_d: float = Field(default=0.1, ge=0, description="MLSD parameter `thr_d`")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "MLSD Processor",
"tags": ["controlnet", "mlsd", "image", "processor"]
},
}
def run_processor(self, image):
mlsd_processor = MLSDdetector.from_pretrained("lllyasviel/Annotators")
processed_image = mlsd_processor(image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
thr_v=self.thr_v,
thr_d=self.thr_d)
processed_image = mlsd_processor(
image, detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution, thr_v=self.thr_v,
thr_d=self.thr_d)
return processed_image
class PidiImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class PidiImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies PIDI processing to image"""
# fmt: off
type: Literal["pidi_image_processor"] = "pidi_image_processor"
@ -413,17 +487,26 @@ class PidiImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig
scribble: bool = Field(default=False, description="Whether to use scribble mode")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "PIDI Processor",
"tags": ["controlnet", "pidi", "image", "processor"]
},
}
def run_processor(self, image):
pidi_processor = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
processed_image = pidi_processor(image,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution,
safe=self.safe,
scribble=self.scribble)
pidi_processor = PidiNetDetector.from_pretrained(
"lllyasviel/Annotators")
processed_image = pidi_processor(
image, detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution, safe=self.safe,
scribble=self.scribble)
return processed_image
class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class ContentShuffleImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies content shuffle processing to image"""
# fmt: off
type: Literal["content_shuffle_image_processor"] = "content_shuffle_image_processor"
@ -435,6 +518,14 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvoca
f: Optional[int] = Field(default=256, ge=0, description="Content shuffle `f` parameter")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Content Shuffle Processor",
"tags": ["controlnet", "contentshuffle", "image", "processor"]
},
}
def run_processor(self, image):
content_shuffle_processor = ContentShuffleDetector()
processed_image = content_shuffle_processor(image,
@ -448,19 +539,30 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation, PILInvoca
# should work with controlnet_aux >= 0.0.4 and timm <= 0.6.13
class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class ZoeDepthImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies Zoe depth processing to image"""
# fmt: off
type: Literal["zoe_depth_image_processor"] = "zoe_depth_image_processor"
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Zoe (Depth) Processor",
"tags": ["controlnet", "zoe", "depth", "image", "processor"]
},
}
def run_processor(self, image):
zoe_depth_processor = ZoeDetector.from_pretrained("lllyasviel/Annotators")
zoe_depth_processor = ZoeDetector.from_pretrained(
"lllyasviel/Annotators")
processed_image = zoe_depth_processor(image)
return processed_image
class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class MediapipeFaceProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies mediapipe face processing to image"""
# fmt: off
type: Literal["mediapipe_face_processor"] = "mediapipe_face_processor"
@ -469,16 +571,27 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
min_confidence: float = Field(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Mediapipe Processor",
"tags": ["controlnet", "mediapipe", "image", "processor"]
},
}
def run_processor(self, image):
# MediaPipeFaceDetector throws an error if image has alpha channel
# so convert to RGB if needed
if image.mode == 'RGBA':
image = image.convert('RGB')
mediapipe_face_processor = MediapipeFaceDetector()
processed_image = mediapipe_face_processor(image, max_faces=self.max_faces, min_confidence=self.min_confidence)
processed_image = mediapipe_face_processor(
image, max_faces=self.max_faces, min_confidence=self.min_confidence)
return processed_image
class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class LeresImageProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies leres processing to image"""
# fmt: off
type: Literal["leres_image_processor"] = "leres_image_processor"
@ -490,18 +603,25 @@ class LeresImageProcessorInvocation(ImageProcessorInvocation, PILInvocationConfi
image_resolution: int = Field(default=512, ge=0, description="The pixel resolution for the output image")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Leres (Depth) Processor",
"tags": ["controlnet", "leres", "depth", "image", "processor"]
},
}
def run_processor(self, image):
leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators")
processed_image = leres_processor(image,
thr_a=self.thr_a,
thr_b=self.thr_b,
boost=self.boost,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution)
processed_image = leres_processor(
image, thr_a=self.thr_a, thr_b=self.thr_b, boost=self.boost,
detect_resolution=self.detect_resolution,
image_resolution=self.image_resolution)
return processed_image
class TileResamplerProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class TileResamplerProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
# fmt: off
type: Literal["tile_image_processor"] = "tile_image_processor"
@ -510,6 +630,14 @@ class TileResamplerProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
down_sampling_rate: float = Field(default=1.0, ge=1.0, le=8.0, description="Down sampling rate")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Tile Resample Processor",
"tags": ["controlnet", "tile", "resample", "image", "processor"]
},
}
# tile_resample copied from sd-webui-controlnet/scripts/processor.py
def tile_resample(self,
np_img: np.ndarray,
@ -528,28 +656,33 @@ class TileResamplerProcessorInvocation(ImageProcessorInvocation, PILInvocationCo
def run_processor(self, img):
np_img = np.array(img, dtype=np.uint8)
processed_np_image = self.tile_resample(np_img,
#res=self.tile_size,
# res=self.tile_size,
down_sampling_rate=self.down_sampling_rate
)
processed_image = Image.fromarray(processed_np_image)
return processed_image
class SegmentAnythingProcessorInvocation(ImageProcessorInvocation, PILInvocationConfig):
class SegmentAnythingProcessorInvocation(
ImageProcessorInvocation, PILInvocationConfig):
"""Applies segment anything processing to image"""
# fmt: off
type: Literal["segment_anything_processor"] = "segment_anything_processor"
# fmt: on
class Config(InvocationConfig):
schema_extra = {"ui": {"title": "Segment Anything Processor", "tags": [
"controlnet", "segment", "anything", "sam", "image", "processor"]}, }
def run_processor(self, image):
# segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
segment_anything_processor = SamDetectorReproducibleColors.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
segment_anything_processor = SamDetectorReproducibleColors.from_pretrained(
"ybelkada/segment-anything", subfolder="checkpoints")
np_img = np.array(image, dtype=np.uint8)
processed_image = segment_anything_processor(np_img)
return processed_image
class SamDetectorReproducibleColors(SamDetector):
# overriding SamDetector.show_anns() method to use reproducible colors for segmentation image
@ -561,7 +694,8 @@ class SamDetectorReproducibleColors(SamDetector):
return
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
h, w = anns[0]['segmentation'].shape
final_img = Image.fromarray(np.zeros((h, w, 3), dtype=np.uint8), mode="RGB")
final_img = Image.fromarray(
np.zeros((h, w, 3), dtype=np.uint8), mode="RGB")
palette = ade_palette()
for i, ann in enumerate(sorted_anns):
m = ann['segmentation']
@ -569,5 +703,8 @@ class SamDetectorReproducibleColors(SamDetector):
# doing modulo just in case number of annotated regions exceeds number of colors in palette
ann_color = palette[i % len(palette)]
img[:, :] = ann_color
final_img.paste(Image.fromarray(img, mode="RGB"), (0, 0), Image.fromarray(np.uint8(m * 255)))
final_img.paste(
Image.fromarray(img, mode="RGB"),
(0, 0),
Image.fromarray(np.uint8(m * 255)))
return np.array(final_img, dtype=np.uint8)

View File

@ -35,6 +35,14 @@ class CvInpaintInvocation(BaseInvocation, CvInvocationConfig):
mask: ImageField = Field(default=None, description="The mask to use when inpainting")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "OpenCV Inpaint",
"tags": ["opencv", "inpaint"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
mask = context.services.images.get_pil_image(self.mask.image_name)

View File

@ -130,6 +130,7 @@ class InpaintInvocation(BaseInvocation):
schema_extra = {
"ui": {
"tags": ["stable-diffusion", "image"],
"title": "Inpaint"
},
}
@ -146,9 +147,13 @@ class InpaintInvocation(BaseInvocation):
source_node_id=source_node_id,
)
def get_conditioning(self, context):
c, extra_conditioning_info = context.services.latents.get(self.positive_conditioning.conditioning_name)
uc, _ = context.services.latents.get(self.negative_conditioning.conditioning_name)
def get_conditioning(self, context, unet):
positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name)
c = positive_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
extra_conditioning_info = positive_cond_data.conditionings[0].extra_conditioning
negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name)
uc = negative_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
return (uc, c, extra_conditioning_info)
@ -209,7 +214,6 @@ class InpaintInvocation(BaseInvocation):
)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
conditioning = self.get_conditioning(context)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
@ -217,6 +221,8 @@ class InpaintInvocation(BaseInvocation):
)
with self.load_model_old_way(context, scheduler) as model:
conditioning = self.get_conditioning(context, model.context.model.unet)
outputs = Inpaint(model).generate(
conditioning=conditioning,
scheduler=scheduler,

View File

@ -71,6 +71,15 @@ class LoadImageInvocation(BaseInvocation):
default=None, description="The image to load"
)
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Load Image",
"tags": ["image", "load"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -91,6 +100,14 @@ class ShowImageInvocation(BaseInvocation):
default=None, description="The image to show"
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Show Image",
"tags": ["image", "show"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
if image:
@ -119,6 +136,14 @@ class ImageCropInvocation(BaseInvocation, PILInvocationConfig):
height: int = Field(default=512, gt=0, description="The height of the crop rectangle")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Crop Image",
"tags": ["image", "crop"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -157,6 +182,14 @@ class ImagePasteInvocation(BaseInvocation, PILInvocationConfig):
y: int = Field(default=0, description="The top y coordinate at which to paste the image")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Paste Image",
"tags": ["image", "paste"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
base_image = context.services.images.get_pil_image(self.base_image.image_name)
image = context.services.images.get_pil_image(self.image.image_name)
@ -207,6 +240,14 @@ class MaskFromAlphaInvocation(BaseInvocation, PILInvocationConfig):
invert: bool = Field(default=False, description="Whether or not to invert the mask")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Mask From Alpha",
"tags": ["image", "mask", "alpha"]
},
}
def invoke(self, context: InvocationContext) -> MaskOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -241,6 +282,14 @@ class ImageMultiplyInvocation(BaseInvocation, PILInvocationConfig):
image2: Optional[ImageField] = Field(default=None, description="The second image to multiply")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Multiply Images",
"tags": ["image", "multiply"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image1 = context.services.images.get_pil_image(self.image1.image_name)
image2 = context.services.images.get_pil_image(self.image2.image_name)
@ -277,6 +326,14 @@ class ImageChannelInvocation(BaseInvocation, PILInvocationConfig):
channel: IMAGE_CHANNELS = Field(default="A", description="The channel to get")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Image Channel",
"tags": ["image", "channel"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -312,6 +369,14 @@ class ImageConvertInvocation(BaseInvocation, PILInvocationConfig):
mode: IMAGE_MODES = Field(default="L", description="The mode to convert to")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Convert Image",
"tags": ["image", "convert"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -345,6 +410,14 @@ class ImageBlurInvocation(BaseInvocation, PILInvocationConfig):
blur_type: Literal["gaussian", "box"] = Field(default="gaussian", description="The type of blur")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Blur Image",
"tags": ["image", "blur"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -404,6 +477,14 @@ class ImageResizeInvocation(BaseInvocation, PILInvocationConfig):
resample_mode: PIL_RESAMPLING_MODES = Field(default="bicubic", description="The resampling mode")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Resize Image",
"tags": ["image", "resize"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -442,6 +523,14 @@ class ImageScaleInvocation(BaseInvocation, PILInvocationConfig):
resample_mode: PIL_RESAMPLING_MODES = Field(default="bicubic", description="The resampling mode")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Scale Image",
"tags": ["image", "scale"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -482,6 +571,14 @@ class ImageLerpInvocation(BaseInvocation, PILInvocationConfig):
max: int = Field(default=255, ge=0, le=255, description="The maximum output value")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Image Linear Interpolation",
"tags": ["image", "linear", "interpolation", "lerp"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -518,6 +615,14 @@ class ImageInverseLerpInvocation(BaseInvocation, PILInvocationConfig):
max: int = Field(default=255, ge=0, le=255, description="The maximum input value")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Image Inverse Linear Interpolation",
"tags": ["image", "linear", "interpolation", "inverse"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)

View File

@ -14,6 +14,7 @@ from invokeai.backend.image_util.patchmatch import PatchMatch
from ..models.image import ColorField, ImageCategory, ImageField, ResourceOrigin
from .baseinvocation import (
BaseInvocation,
InvocationConfig,
InvocationContext,
)
@ -133,6 +134,14 @@ class InfillColorInvocation(BaseInvocation):
description="The color to use to infill",
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Color Infill",
"tags": ["image", "inpaint", "color", "infill"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -173,6 +182,14 @@ class InfillTileInvocation(BaseInvocation):
default_factory=get_random_seed,
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Tile Infill",
"tags": ["image", "inpaint", "tile", "infill"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -206,6 +223,14 @@ class InfillPatchMatchInvocation(BaseInvocation):
default=None, description="The image to infill"
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Patch Match Infill",
"tags": ["image", "inpaint", "patchmatch", "infill"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)

View File

@ -139,6 +139,7 @@ class TextToLatentsInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Text To Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
@ -167,13 +168,14 @@ class TextToLatentsInvocation(BaseInvocation):
self,
context: InvocationContext,
scheduler,
unet,
) -> ConditioningData:
positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name)
c = positive_cond_data.conditionings[0].embeds
c = positive_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
extra_conditioning_info = positive_cond_data.conditionings[0].extra_conditioning
negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name)
uc = negative_cond_data.conditionings[0].embeds
uc = negative_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
conditioning_data = ConditioningData(
unconditioned_embeddings=uc,
@ -195,7 +197,7 @@ class TextToLatentsInvocation(BaseInvocation):
eta=0.0, # ddim_eta
# for ancestral and sde schedulers
generator=torch.Generator(device=uc.device).manual_seed(0),
generator=torch.Generator(device=unet.device).manual_seed(0),
)
return conditioning_data
@ -334,6 +336,8 @@ class TextToLatentsInvocation(BaseInvocation):
ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
@ -341,7 +345,7 @@ class TextToLatentsInvocation(BaseInvocation):
)
pipeline = self.create_pipeline(unet, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler, unet)
control_data = self.prep_control_data(
model=pipeline, context=context, control_input=self.control,
@ -362,6 +366,7 @@ class TextToLatentsInvocation(BaseInvocation):
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
name = f'{context.graph_execution_state_id}__{self.id}'
@ -385,6 +390,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Latent To Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
@ -424,6 +430,9 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()),\
unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype)
latent = latent.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
@ -431,7 +440,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
)
pipeline = self.create_pipeline(unet, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler, unet)
control_data = self.prep_control_data(
model=pipeline, context=context, control_input=self.control,
@ -463,6 +472,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
name = f'{context.graph_execution_state_id}__{self.id}'
@ -490,6 +500,7 @@ class LatentsToImageInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Latents To Image",
"tags": ["latents", "image"],
},
}
@ -503,6 +514,7 @@ class LatentsToImageInvocation(BaseInvocation):
)
with vae_info as vae:
latents = latents.to(vae.device)
if self.fp32:
vae.to(dtype=torch.float32)
@ -586,17 +598,29 @@ class ResizeLatentsInvocation(BaseInvocation):
antialias: bool = Field(
default=False,
description="Whether or not to antialias (applied in bilinear and bicubic modes only)")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Resize Latents",
"tags": ["latents", "resize"]
},
}
def invoke(self, context: InvocationContext) -> LatentsOutput:
latents = context.services.latents.get(self.latents.latents_name)
# TODO:
device=choose_torch_device()
resized_latents = torch.nn.functional.interpolate(
latents, size=(self.height // 8, self.width // 8),
latents.to(device), size=(self.height // 8, self.width // 8),
mode=self.mode, antialias=self.antialias
if self.mode in ["bilinear", "bicubic"] else False,
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
resized_latents = resized_latents.to("cpu")
torch.cuda.empty_cache()
name = f"{context.graph_execution_state_id}__{self.id}"
@ -620,18 +644,30 @@ class ScaleLatentsInvocation(BaseInvocation):
antialias: bool = Field(
default=False,
description="Whether or not to antialias (applied in bilinear and bicubic modes only)")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Scale Latents",
"tags": ["latents", "scale"]
},
}
def invoke(self, context: InvocationContext) -> LatentsOutput:
latents = context.services.latents.get(self.latents.latents_name)
# TODO:
device=choose_torch_device()
# resizing
resized_latents = torch.nn.functional.interpolate(
latents, scale_factor=self.scale_factor, mode=self.mode,
latents.to(device), scale_factor=self.scale_factor, mode=self.mode,
antialias=self.antialias
if self.mode in ["bilinear", "bicubic"] else False,
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
resized_latents = resized_latents.to("cpu")
torch.cuda.empty_cache()
name = f"{context.graph_execution_state_id}__{self.id}"
@ -658,7 +694,8 @@ class ImageToLatentsInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["latents", "image"],
"title": "Image To Latents",
"tags": ["latents", "image"]
},
}
@ -722,6 +759,6 @@ class ImageToLatentsInvocation(BaseInvocation):
latents = latents.to(dtype=orig_dtype)
name = f"{context.graph_execution_state_id}__{self.id}"
# context.services.latents.set(name, latents)
latents = latents.to("cpu")
context.services.latents.save(name, latents)
return build_latents_output(latents_name=name, latents=latents)

View File

@ -52,6 +52,14 @@ class AddInvocation(BaseInvocation, MathInvocationConfig):
b: int = Field(default=0, description="The second number")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Add",
"tags": ["math", "add"]
},
}
def invoke(self, context: InvocationContext) -> IntOutput:
return IntOutput(a=self.a + self.b)
@ -65,6 +73,14 @@ class SubtractInvocation(BaseInvocation, MathInvocationConfig):
b: int = Field(default=0, description="The second number")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Subtract",
"tags": ["math", "subtract"]
},
}
def invoke(self, context: InvocationContext) -> IntOutput:
return IntOutput(a=self.a - self.b)
@ -78,6 +94,14 @@ class MultiplyInvocation(BaseInvocation, MathInvocationConfig):
b: int = Field(default=0, description="The second number")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Multiply",
"tags": ["math", "multiply"]
},
}
def invoke(self, context: InvocationContext) -> IntOutput:
return IntOutput(a=self.a * self.b)
@ -91,6 +115,14 @@ class DivideInvocation(BaseInvocation, MathInvocationConfig):
b: int = Field(default=0, description="The second number")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Divide",
"tags": ["math", "divide"]
},
}
def invoke(self, context: InvocationContext) -> IntOutput:
return IntOutput(a=int(self.a / self.b))
@ -105,5 +137,14 @@ class RandomIntInvocation(BaseInvocation):
default=np.iinfo(np.int32).max, description="The exclusive high value"
)
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Random Integer",
"tags": ["math", "random", "integer"]
},
}
def invoke(self, context: InvocationContext) -> IntOutput:
return IntOutput(a=np.random.randint(self.low, self.high))

View File

@ -3,7 +3,7 @@ from typing import Literal, Optional, Union
from pydantic import BaseModel, Field
from invokeai.app.invocations.baseinvocation import (BaseInvocation,
BaseInvocationOutput,
BaseInvocationOutput, InvocationConfig,
InvocationContext)
from invokeai.app.invocations.controlnet_image_processors import ControlField
from invokeai.app.invocations.model import (LoRAModelField, MainModelField,
@ -97,6 +97,14 @@ class MetadataAccumulatorInvocation(BaseInvocation):
description="The VAE used for decoding, if the main model's default was not used",
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Metadata Accumulator",
"tags": ["image", "metadata", "generation"]
},
}
def invoke(self, context: InvocationContext) -> MetadataAccumulatorOutput:
"""Collects and outputs a CoreMetadata object"""

View File

@ -48,7 +48,7 @@ def get_noise(
dtype=torch_dtype(device),
device=noise_device_type,
generator=generator,
).to(device)
).to("cpu")
return noise_tensor
@ -112,6 +112,7 @@ class NoiseInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Noise",
"tags": ["latents", "noise"],
},
}

View File

@ -43,6 +43,14 @@ class FloatLinearRangeInvocation(BaseInvocation):
stop: float = Field(default=10, description="The last value of the range")
steps: int = Field(default=30, description="number of values to interpolate over (including start and stop)")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Linear Range (Float)",
"tags": ["math", "float", "linear", "range"]
},
}
def invoke(self, context: InvocationContext) -> FloatCollectionOutput:
param_list = list(np.linspace(self.start, self.stop, self.steps))
return FloatCollectionOutput(
@ -113,6 +121,14 @@ class StepParamEasingInvocation(BaseInvocation):
show_easing_plot: bool = Field(default=False, description="show easing plot")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Param Easing By Step",
"tags": ["param", "step", "easing"]
},
}
def invoke(self, context: InvocationContext) -> FloatCollectionOutput:
log_diagnostics = False

View File

@ -1,9 +1,12 @@
# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
from typing import Literal
from pydantic import Field
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext
from .math import IntOutput, FloatOutput
from .baseinvocation import (BaseInvocation, BaseInvocationOutput,
InvocationConfig, InvocationContext)
from .math import FloatOutput, IntOutput
# Pass-through parameter nodes - used by subgraphs
@ -14,6 +17,14 @@ class ParamIntInvocation(BaseInvocation):
a: int = Field(default=0, description="The integer value")
#fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["param", "integer"],
"title": "Integer Parameter"
},
}
def invoke(self, context: InvocationContext) -> IntOutput:
return IntOutput(a=self.a)
@ -24,5 +35,36 @@ class ParamFloatInvocation(BaseInvocation):
param: float = Field(default=0.0, description="The float value")
#fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["param", "float"],
"title": "Float Parameter"
},
}
def invoke(self, context: InvocationContext) -> FloatOutput:
return FloatOutput(param=self.param)
class StringOutput(BaseInvocationOutput):
"""A string output"""
type: Literal["string_output"] = "string_output"
text: str = Field(default=None, description="The output string")
class ParamStringInvocation(BaseInvocation):
"""A string parameter"""
type: Literal['param_string'] = 'param_string'
text: str = Field(default='', description='The string value')
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["param", "string"],
"title": "String Parameter"
},
}
def invoke(self, context: InvocationContext) -> StringOutput:
return StringOutput(text=self.text)

View File

@ -4,7 +4,7 @@ from typing import Literal, Optional
import numpy as np
from pydantic import Field, validator
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext
from dynamicprompts.generators import RandomPromptGenerator, CombinatorialPromptGenerator
class PromptOutput(BaseInvocationOutput):
@ -48,6 +48,14 @@ class DynamicPromptInvocation(BaseInvocation):
default=False, description="Whether to use the combinatorial generator"
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Dynamic Prompt",
"tags": ["prompt", "dynamic"]
},
}
def invoke(self, context: InvocationContext) -> PromptCollectionOutput:
if self.combinatorial:
generator = CombinatorialPromptGenerator()
@ -72,6 +80,14 @@ class PromptsFromFileInvocation(BaseInvocation):
max_prompts: int = Field(default=1, ge=0, description="Max lines to read from file (0=all)")
#fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Prompts From File",
"tags": ["prompt", "file"]
},
}
@validator("file_path")
def file_path_exists(cls, v):
if not exists(v):

View File

@ -233,6 +233,7 @@ class SDXLTextToLatentsInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "SDXL Text To Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
@ -305,7 +306,7 @@ class SDXLTextToLatentsInvocation(BaseInvocation):
add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
with tqdm(total=self.steps) as progress_bar:
with tqdm(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
@ -351,7 +352,7 @@ class SDXLTextToLatentsInvocation(BaseInvocation):
add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
with tqdm(total=self.steps) as progress_bar:
with tqdm(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
#latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
@ -415,6 +416,7 @@ class SDXLTextToLatentsInvocation(BaseInvocation):
#################
latents = latents.to("cpu")
torch.cuda.empty_cache()
name = f'{context.graph_execution_state_id}__{self.id}'
@ -461,6 +463,7 @@ class SDXLLatentsToLatentsInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "SDXL Latents to Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
@ -651,6 +654,7 @@ class SDXLLatentsToLatentsInvocation(BaseInvocation):
#################
latents = latents.to("cpu")
torch.cuda.empty_cache()
name = f'{context.graph_execution_state_id}__{self.id}'

View File

@ -11,7 +11,7 @@ from realesrgan import RealESRGANer
from invokeai.app.models.image import ImageCategory, ImageField, ResourceOrigin
from .baseinvocation import BaseInvocation, InvocationContext
from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext
from .image import ImageOutput
# TODO: Populate this from disk?
@ -33,6 +33,14 @@ class ESRGANInvocation(BaseInvocation):
default="RealESRGAN_x4plus.pth", description="The Real-ESRGAN model to use"
)
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Upscale (RealESRGAN)",
"tags": ["image", "upscale", "realesrgan"]
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
models_path = context.services.configuration.models_path

View File

@ -104,7 +104,8 @@ class ModelCache(object):
:param sha_chunksize: Chunksize to use when calculating sha256 model hash
'''
self.model_infos: Dict[str, ModelBase] = dict()
self.lazy_offloading = lazy_offloading
# allow lazy offloading only when vram cache enabled
self.lazy_offloading = lazy_offloading and max_vram_cache_size > 0
self.precision: torch.dtype=precision
self.max_cache_size: float=max_cache_size
self.max_vram_cache_size: float=max_vram_cache_size
@ -327,6 +328,25 @@ class ModelCache(object):
refs = sys.getrefcount(cache_entry.model)
# manualy clear local variable references of just finished function calls
# for some reason python don't want to collect it even by gc.collect() immidiately
if refs > 2:
while True:
cleared = False
for referrer in gc.get_referrers(cache_entry.model):
if type(referrer).__name__ == "frame":
# RuntimeError: cannot clear an executing frame
with suppress(RuntimeError):
referrer.clear()
cleared = True
#break
# repeat if referrers changes(due to frame clear), else exit loop
if cleared:
gc.collect()
else:
break
device = cache_entry.model.device if hasattr(cache_entry.model, "device") else None
self.logger.debug(f"Model: {model_key}, locks: {cache_entry._locks}, device: {device}, loaded: {cache_entry.loaded}, refs: {refs}")
@ -362,6 +382,9 @@ class ModelCache(object):
self.logger.debug(f'GPU VRAM freed: {(mem.vram_used/GIG):.2f} GB')
vram_in_use += mem.vram_used # note vram_used is negative
self.logger.debug(f'{(vram_in_use/GIG):.2f}GB VRAM used for models; max allowed={(reserved/GIG):.2f}GB')
gc.collect()
torch.cuda.empty_cache()
def _local_model_hash(self, model_path: Union[str, Path]) -> str:
sha = hashlib.sha256()

View File

@ -106,16 +106,16 @@ providing information about a model defined in models.yaml. For example:
>>> models = mgr.list_models()
>>> json.dumps(models[0])
{"path": "/home/lstein/invokeai-main/models/sd-1/controlnet/canny",
"model_format": "diffusers",
"name": "canny",
"base_model": "sd-1",
{"path": "/home/lstein/invokeai-main/models/sd-1/controlnet/canny",
"model_format": "diffusers",
"name": "canny",
"base_model": "sd-1",
"type": "controlnet"
}
You can filter by model type and base model as shown here:
controlnets = mgr.list_models(model_type=ModelType.ControlNet,
base_model=BaseModelType.StableDiffusion1)
for c in controlnets:
@ -140,14 +140,14 @@ Layout of the `models` directory:
models
sd-1
   controlnet
   lora
   main
   embedding
controlnet
lora
main
embedding
sd-2
   controlnet
   lora
   main
controlnet
lora
main
embedding
core
face_reconstruction
@ -195,7 +195,7 @@ name, base model, type and a dict of model attributes. See
`invokeai/backend/model_management/models` for the attributes required
by each model type.
A model can be deleted using `del_model()`, providing the same
A model can be deleted using `del_model()`, providing the same
identifying information as `get_model()`
The `heuristic_import()` method will take a set of strings
@ -304,7 +304,7 @@ class ModelManager(object):
logger: types.ModuleType = logger,
):
"""
Initialize with the path to the models.yaml config file.
Initialize with the path to the models.yaml config file.
Optional parameters are the torch device type, precision, max_models,
and sequential_offload boolean. Note that the default device
type and precision are set up for a CUDA system running at half precision.
@ -323,7 +323,7 @@ class ModelManager(object):
self.config_meta = ConfigMeta(**config.pop("__metadata__"))
# TODO: metadata not found
# TODO: version check
self.app_config = InvokeAIAppConfig.get_config()
self.logger = logger
self.cache = ModelCache(
@ -431,7 +431,7 @@ class ModelManager(object):
:param model_name: symbolic name of the model in models.yaml
:param model_type: ModelType enum indicating the type of model to return
:param base_model: BaseModelType enum indicating the base model used by this model
:param submode_typel: an ModelType enum indicating the portion of
:param submode_typel: an ModelType enum indicating the portion of
the model to retrieve (e.g. ModelType.Vae)
"""
model_class = MODEL_CLASSES[base_model][model_type]
@ -456,7 +456,7 @@ class ModelManager(object):
raise ModelNotFoundException(f"Model not found - {model_key}")
# vae/movq override
# TODO:
# TODO:
if submodel_type is not None and hasattr(model_config, submodel_type):
override_path = getattr(model_config, submodel_type)
if override_path:
@ -489,7 +489,7 @@ class ModelManager(object):
self.cache_keys[model_key].add(model_context.key)
model_hash = "<NO_HASH>" # TODO:
return ModelInfo(
context = model_context,
name = model_name,
@ -518,7 +518,7 @@ class ModelManager(object):
def model_names(self) -> List[Tuple[str, BaseModelType, ModelType]]:
"""
Return a list of (str, BaseModelType, ModelType) corresponding to all models
Return a list of (str, BaseModelType, ModelType) corresponding to all models
known to the configuration.
"""
return [(self.parse_key(x)) for x in self.models.keys()]
@ -692,12 +692,12 @@ class ModelManager(object):
if new_name is None and new_base is None:
self.logger.error("rename_model() called with neither a new_name nor a new_base. {model_name} unchanged.")
return
model_key = self.create_key(model_name, base_model, model_type)
model_cfg = self.models.get(model_key, None)
if not model_cfg:
raise ModelNotFoundException(f"Unknown model: {model_key}")
old_path = self.app_config.root_path / model_cfg.path
new_name = new_name or model_name
new_base = new_base or base_model
@ -726,7 +726,7 @@ class ModelManager(object):
self.models.pop(model_key, None) # delete
self.models[new_key] = model_cfg
self.commit()
def convert_model (
self,
model_name: str,
@ -776,12 +776,12 @@ class ModelManager(object):
# something went wrong, so don't leave dangling diffusers model in directory or it will cause a duplicate model error!
rmtree(new_diffusers_path)
raise
if checkpoint_path.exists() and checkpoint_path.is_relative_to(self.app_config.models_path):
checkpoint_path.unlink()
return result
def search_models(self, search_folder):
self.logger.info(f"Finding Models In: {search_folder}")
models_folder_ckpt = Path(search_folder).glob("**/*.ckpt")
@ -824,10 +824,14 @@ class ModelManager(object):
assert config_file_path is not None,'no config file path to write to'
config_file_path = self.app_config.root_path / config_file_path
tmpfile = os.path.join(os.path.dirname(config_file_path), "new_config.tmp")
with open(tmpfile, "w", encoding="utf-8") as outfile:
outfile.write(self.preamble())
outfile.write(yaml_str)
os.replace(tmpfile, config_file_path)
try:
with open(tmpfile, "w", encoding="utf-8") as outfile:
outfile.write(self.preamble())
outfile.write(yaml_str)
os.replace(tmpfile, config_file_path)
except OSError as err:
self.logger.warning(f"Could not modify the config file at {config_file_path}")
self.logger.warning(err)
def preamble(self) -> str:
"""
@ -977,13 +981,12 @@ class ModelManager(object):
# avoid circular import here
from invokeai.backend.install.model_install_backend import ModelInstall
successfully_installed = dict()
installer = ModelInstall(config = self.app_config,
prediction_type_helper = prediction_type_helper,
model_manager = self)
for thing in items_to_import:
installed = installer.heuristic_import(thing)
successfully_installed.update(installed)
self.commit()
self.commit()
return successfully_installed

View File

@ -5,7 +5,11 @@ import { useRef } from 'react';
import { useHoverDirty } from 'react-use';
import { useGetAppVersionQuery } from 'services/api/endpoints/appInfo';
const InvokeAILogoComponent = () => {
interface Props {
showVersion?: boolean;
}
const InvokeAILogoComponent = ({ showVersion = true }: Props) => {
const { data: appVersion } = useGetAppVersionQuery();
const ref = useRef(null);
const isHovered = useHoverDirty(ref);
@ -28,7 +32,7 @@ const InvokeAILogoComponent = () => {
invoke <strong>ai</strong>
</Text>
<AnimatePresence>
{isHovered && appVersion && (
{showVersion && isHovered && appVersion && (
<motion.div
key="statusText"
initial={{