fix: Inpaint Fixes (#4301)

## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [x] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [x] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [x] No


## Description
Fix masked generation with inpaint models

## Related Tickets & Documents
- Closes #4295 

## Added/updated tests?

- [ ] Yes
- [x] No
This commit is contained in:
Kent Keirsey 2023-08-28 00:11:11 -04:00 committed by GitHub
commit 502570e083
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 826 additions and 277 deletions

View File

@ -375,6 +375,9 @@ class ImageResizeInvocation(BaseInvocation):
width: int = InputField(default=512, ge=64, multiple_of=8, description="The width to resize to (px)")
height: int = InputField(default=512, ge=64, multiple_of=8, description="The height to resize to (px)")
resample_mode: PIL_RESAMPLING_MODES = InputField(default="bicubic", description="The resampling mode")
metadata: Optional[CoreMetadata] = InputField(
default=None, description=FieldDescriptions.core_metadata, ui_hidden=True
)
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.services.images.get_pil_image(self.image.image_name)
@ -393,6 +396,7 @@ class ImageResizeInvocation(BaseInvocation):
node_id=self.id,
session_id=context.graph_execution_state_id,
is_intermediate=self.is_intermediate,
metadata=self.metadata.dict() if self.metadata else None,
)
return ImageOutput(

View File

@ -21,6 +21,8 @@ from torchvision.transforms.functional import resize as tv_resize
from invokeai.app.invocations.metadata import CoreMetadata
from invokeai.app.invocations.primitives import (
DenoiseMaskField,
DenoiseMaskOutput,
ImageField,
ImageOutput,
LatentsField,
@ -31,8 +33,8 @@ from invokeai.app.util.controlnet_utils import prepare_control_image
from invokeai.app.util.step_callback import stable_diffusion_step_callback
from invokeai.backend.model_management.models import ModelType, SilenceWarnings
from ...backend.model_management.models import BaseModelType
from ...backend.model_management.lora import ModelPatcher
from ...backend.model_management.models import BaseModelType
from ...backend.stable_diffusion import PipelineIntermediateState
from ...backend.stable_diffusion.diffusers_pipeline import (
ConditioningData,
@ -44,16 +46,7 @@ from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import Post
from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
from ...backend.util.devices import choose_precision, choose_torch_device
from ..models.image import ImageCategory, ResourceOrigin
from .baseinvocation import (
BaseInvocation,
FieldDescriptions,
Input,
InputField,
InvocationContext,
UIType,
tags,
title,
)
from .baseinvocation import BaseInvocation, FieldDescriptions, Input, InputField, InvocationContext, UIType, tags, title
from .compel import ConditioningField
from .controlnet_image_processors import ControlField
from .model import ModelInfo, UNetField, VaeField
@ -64,6 +57,72 @@ DEFAULT_PRECISION = choose_precision(choose_torch_device())
SAMPLER_NAME_VALUES = Literal[tuple(list(SCHEDULER_MAP.keys()))]
@title("Create Denoise Mask")
@tags("mask", "denoise")
class CreateDenoiseMaskInvocation(BaseInvocation):
"""Creates mask for denoising model run."""
# Metadata
type: Literal["create_denoise_mask"] = "create_denoise_mask"
# Inputs
vae: VaeField = InputField(description=FieldDescriptions.vae, input=Input.Connection, ui_order=0)
image: Optional[ImageField] = InputField(default=None, description="Image which will be masked", ui_order=1)
mask: ImageField = InputField(description="The mask to use when pasting", ui_order=2)
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=3)
fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32, ui_order=4)
def prep_mask_tensor(self, mask_image):
if mask_image.mode != "L":
mask_image = mask_image.convert("L")
mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
if mask_tensor.dim() == 3:
mask_tensor = mask_tensor.unsqueeze(0)
# if shape is not None:
# mask_tensor = tv_resize(mask_tensor, shape, T.InterpolationMode.BILINEAR)
return mask_tensor
@torch.no_grad()
def invoke(self, context: InvocationContext) -> DenoiseMaskOutput:
if self.image is not None:
image = context.services.images.get_pil_image(self.image.image_name)
image = image_resized_to_grid_as_tensor(image.convert("RGB"))
if image.dim() == 3:
image = image.unsqueeze(0)
else:
image = None
mask = self.prep_mask_tensor(
context.services.images.get_pil_image(self.mask.image_name),
)
if image is not None:
vae_info = context.services.model_manager.get_model(
**self.vae.vae.dict(),
context=context,
)
img_mask = tv_resize(mask, image.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
masked_image = image * torch.where(img_mask < 0.5, 0.0, 1.0)
# TODO:
masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone())
masked_latents_name = f"{context.graph_execution_state_id}__{self.id}_masked_latents"
context.services.latents.save(masked_latents_name, masked_latents)
else:
masked_latents_name = None
mask_name = f"{context.graph_execution_state_id}__{self.id}_mask"
context.services.latents.save(mask_name, mask)
return DenoiseMaskOutput(
denoise_mask=DenoiseMaskField(
mask_name=mask_name,
masked_latents_name=masked_latents_name,
),
)
def get_scheduler(
context: InvocationContext,
scheduler_info: ModelInfo,
@ -126,10 +185,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
control: Union[ControlField, list[ControlField]] = InputField(
default=None, description=FieldDescriptions.control, input=Input.Connection, ui_order=5
)
latents: Optional[LatentsField] = InputField(
description=FieldDescriptions.latents, input=Input.Connection, ui_order=4
)
mask: Optional[ImageField] = InputField(
latents: Optional[LatentsField] = InputField(description=FieldDescriptions.latents, input=Input.Connection)
denoise_mask: Optional[DenoiseMaskField] = InputField(
default=None,
description=FieldDescriptions.mask,
)
@ -342,19 +399,18 @@ class DenoiseLatentsInvocation(BaseInvocation):
return num_inference_steps, timesteps, init_timestep
def prep_mask_tensor(self, mask, context, lantents):
if mask is None:
return None
def prep_inpaint_mask(self, context, latents):
if self.denoise_mask is None:
return None, None
mask_image = context.services.images.get_pil_image(mask.image_name)
if mask_image.mode != "L":
# FIXME: why do we get passed an RGB image here? We can only use single-channel.
mask_image = mask_image.convert("L")
mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
if mask_tensor.dim() == 3:
mask_tensor = mask_tensor.unsqueeze(0)
mask_tensor = tv_resize(mask_tensor, lantents.shape[-2:], T.InterpolationMode.BILINEAR)
return 1 - mask_tensor
mask = context.services.latents.get(self.denoise_mask.mask_name)
mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
if self.denoise_mask.masked_latents_name is not None:
masked_latents = context.services.latents.get(self.denoise_mask.masked_latents_name)
else:
masked_latents = None
return 1 - mask, masked_latents
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
@ -375,7 +431,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
if seed is None:
seed = 0
mask = self.prep_mask_tensor(self.mask, context, latents)
mask, masked_latents = self.prep_inpaint_mask(context, latents)
# Get the source node id (we are invoking the prepared node)
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
@ -406,6 +462,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
noise = noise.to(device=unet.device, dtype=unet.dtype)
if mask is not None:
mask = mask.to(device=unet.device, dtype=unet.dtype)
if masked_latents is not None:
masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
@ -442,6 +500,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
noise=noise,
seed=seed,
mask=mask,
masked_latents=masked_latents,
num_inference_steps=num_inference_steps,
conditioning_data=conditioning_data,
control_data=control_data, # list[ControlNetData]
@ -663,26 +722,11 @@ class ImageToLatentsInvocation(BaseInvocation):
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled)
fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
# image = context.services.images.get(
# self.image.image_type, self.image.image_name
# )
image = context.services.images.get_pil_image(self.image.image_name)
# vae_info = context.services.model_manager.get_model(**self.vae.vae.dict())
vae_info = context.services.model_manager.get_model(
**self.vae.vae.dict(),
context=context,
)
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
if image_tensor.dim() == 3:
image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
@staticmethod
def vae_encode(vae_info, upcast, tiled, image_tensor):
with vae_info as vae:
orig_dtype = vae.dtype
if self.fp32:
if upcast:
vae.to(dtype=torch.float32)
use_torch_2_0_or_xformers = isinstance(
@ -707,7 +751,7 @@ class ImageToLatentsInvocation(BaseInvocation):
vae.to(dtype=torch.float16)
# latents = latents.half()
if self.tiled:
if tiled:
vae.enable_tiling()
else:
vae.disable_tiling()
@ -721,6 +765,23 @@ class ImageToLatentsInvocation(BaseInvocation):
latents = vae.config.scaling_factor * latents
latents = latents.to(dtype=orig_dtype)
return latents
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
image = context.services.images.get_pil_image(self.image.image_name)
vae_info = context.services.model_manager.get_model(
**self.vae.vae.dict(),
context=context,
)
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
if image_tensor.dim() == 3:
image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
latents = self.vae_encode(vae_info, self.fp32, self.tiled, image_tensor)
name = f"{context.graph_execution_state_id}__{self.id}"
latents = latents.to("cpu")
context.services.latents.save(name, latents)

View File

@ -294,6 +294,25 @@ class ImageCollectionInvocation(BaseInvocation):
return ImageCollectionOutput(collection=self.collection)
# endregion
# region DenoiseMask
class DenoiseMaskField(BaseModel):
"""An inpaint mask field"""
mask_name: str = Field(description="The name of the mask image")
masked_latents_name: Optional[str] = Field(description="The name of the masked image latents")
class DenoiseMaskOutput(BaseInvocationOutput):
"""Base class for nodes that output a single image"""
type: Literal["denoise_mask_output"] = "denoise_mask_output"
denoise_mask: DenoiseMaskField = OutputField(description="Mask for denoise model run")
# endregion
# region Latents

View File

@ -144,7 +144,7 @@ def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool = Tr
w, h = trim_to_multiple_of(*image.size, multiple_of=multiple_of)
transformation = T.Compose(
[
T.Resize((h, w), T.InterpolationMode.LANCZOS),
T.Resize((h, w), T.InterpolationMode.LANCZOS, antialias=True),
T.ToTensor(),
]
)
@ -358,6 +358,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
callback: Callable[[PipelineIntermediateState], None] = None,
control_data: List[ControlNetData] = None,
mask: Optional[torch.Tensor] = None,
masked_latents: Optional[torch.Tensor] = None,
seed: Optional[int] = None,
) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]:
if init_timestep.shape[0] == 0:
@ -376,28 +377,28 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
latents = self.scheduler.add_noise(latents, noise, batched_t)
if mask is not None:
# if no noise provided, noisify unmasked area based on seed(or 0 as fallback)
if noise is None:
noise = torch.randn(
orig_latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(seed or 0),
).to(device=orig_latents.device, dtype=orig_latents.dtype)
latents = self.scheduler.add_noise(latents, noise, batched_t)
latents = torch.lerp(
orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
)
if is_inpainting_model(self.unet):
# You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
# (that's why there's a mask!) but it seems to really want that blanked out.
# masked_latents = latents * torch.where(mask < 0.5, 1, 0) TODO: inpaint/outpaint/infill
if masked_latents is None:
raise Exception("Source image required for inpaint mask when inpaint model used!")
# TODO: we should probably pass this in so we don't have to try/finally around setting it.
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(self._unet_forward, mask, orig_latents)
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
self._unet_forward, mask, masked_latents
)
else:
# if no noise provided, noisify unmasked area based on seed(or 0 as fallback)
if noise is None:
noise = torch.randn(
orig_latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(seed or 0),
).to(device=orig_latents.device, dtype=orig_latents.dtype)
latents = self.scheduler.add_noise(latents, noise, batched_t)
latents = torch.lerp(
orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
)
additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise))
try:

View File

@ -10,6 +10,7 @@ import ColorInputField from './inputs/ColorInputField';
import ConditioningInputField from './inputs/ConditioningInputField';
import ControlInputField from './inputs/ControlInputField';
import ControlNetModelInputField from './inputs/ControlNetModelInputField';
import DenoiseMaskInputField from './inputs/DenoiseMaskInputField';
import EnumInputField from './inputs/EnumInputField';
import ImageCollectionInputField from './inputs/ImageCollectionInputField';
import ImageInputField from './inputs/ImageInputField';
@ -105,6 +106,19 @@ const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => {
);
}
if (
field?.type === 'DenoiseMaskField' &&
fieldTemplate?.type === 'DenoiseMaskField'
) {
return (
<DenoiseMaskInputField
nodeId={nodeId}
field={field}
fieldTemplate={fieldTemplate}
/>
);
}
if (
field?.type === 'ConditioningField' &&
fieldTemplate?.type === 'ConditioningField'

View File

@ -0,0 +1,17 @@
import {
DenoiseMaskInputFieldTemplate,
DenoiseMaskInputFieldValue,
FieldComponentProps,
} from 'features/nodes/types/types';
import { memo } from 'react';
const DenoiseMaskInputFieldComponent = (
_props: FieldComponentProps<
DenoiseMaskInputFieldValue,
DenoiseMaskInputFieldTemplate
>
) => {
return null;
};
export default memo(DenoiseMaskInputFieldComponent);

View File

@ -59,6 +59,11 @@ export const FIELDS: Record<FieldType, FieldUIConfig> = {
description: 'Images may be passed between nodes.',
color: 'purple.500',
},
DenoiseMaskField: {
title: 'Denoise Mask',
description: 'Denoise Mask may be passed between nodes',
color: 'red.700',
},
LatentsField: {
title: 'Latents',
description: 'Latents may be passed between nodes.',

View File

@ -64,6 +64,7 @@ export const zFieldType = z.enum([
'string',
'array',
'ImageField',
'DenoiseMaskField',
'LatentsField',
'ConditioningField',
'ControlField',
@ -120,6 +121,7 @@ export type InputFieldTemplate =
| StringInputFieldTemplate
| BooleanInputFieldTemplate
| ImageInputFieldTemplate
| DenoiseMaskInputFieldTemplate
| LatentsInputFieldTemplate
| ConditioningInputFieldTemplate
| UNetInputFieldTemplate
@ -205,6 +207,12 @@ export const zConditioningField = z.object({
});
export type ConditioningField = z.infer<typeof zConditioningField>;
export const zDenoiseMaskField = z.object({
mask_name: z.string().trim().min(1),
masked_latents_name: z.string().trim().min(1).optional(),
});
export type DenoiseMaskFieldValue = z.infer<typeof zDenoiseMaskField>;
export const zIntegerInputFieldValue = zInputFieldValueBase.extend({
type: z.literal('integer'),
value: z.number().optional(),
@ -241,6 +249,14 @@ export const zLatentsInputFieldValue = zInputFieldValueBase.extend({
});
export type LatentsInputFieldValue = z.infer<typeof zLatentsInputFieldValue>;
export const zDenoiseMaskInputFieldValue = zInputFieldValueBase.extend({
type: z.literal('DenoiseMaskField'),
value: zDenoiseMaskField.optional(),
});
export type DenoiseMaskInputFieldValue = z.infer<
typeof zDenoiseMaskInputFieldValue
>;
export const zConditioningInputFieldValue = zInputFieldValueBase.extend({
type: z.literal('ConditioningField'),
value: zConditioningField.optional(),
@ -459,6 +475,7 @@ export const zInputFieldValue = z.discriminatedUnion('type', [
zBooleanInputFieldValue,
zImageInputFieldValue,
zLatentsInputFieldValue,
zDenoiseMaskInputFieldValue,
zConditioningInputFieldValue,
zUNetInputFieldValue,
zClipInputFieldValue,
@ -532,6 +549,11 @@ export type ImageCollectionInputFieldTemplate = InputFieldTemplateBase & {
type: 'ImageCollection';
};
export type DenoiseMaskInputFieldTemplate = InputFieldTemplateBase & {
default: undefined;
type: 'DenoiseMaskField';
};
export type LatentsInputFieldTemplate = InputFieldTemplateBase & {
default: string;
type: 'LatentsField';

View File

@ -8,6 +8,7 @@ import {
ConditioningInputFieldTemplate,
ControlInputFieldTemplate,
ControlNetModelInputFieldTemplate,
DenoiseMaskInputFieldTemplate,
EnumInputFieldTemplate,
FieldType,
FloatInputFieldTemplate,
@ -263,6 +264,19 @@ const buildImageCollectionInputFieldTemplate = ({
return template;
};
const buildDenoiseMaskInputFieldTemplate = ({
schemaObject,
baseField,
}: BuildInputFieldArg): DenoiseMaskInputFieldTemplate => {
const template: DenoiseMaskInputFieldTemplate = {
...baseField,
type: 'DenoiseMaskField',
default: schemaObject.default ?? undefined,
};
return template;
};
const buildLatentsInputFieldTemplate = ({
schemaObject,
baseField,
@ -498,6 +512,12 @@ export const buildInputFieldTemplate = (
baseField,
});
}
if (fieldType === 'DenoiseMaskField') {
return buildDenoiseMaskInputFieldTemplate({
schemaObject: fieldSchema,
baseField,
});
}
if (fieldType === 'LatentsField') {
return buildLatentsInputFieldTemplate({
schemaObject: fieldSchema,

View File

@ -49,6 +49,10 @@ export const buildInputFieldValue = (
fieldValue.value = [];
}
if (template.type === 'DenoiseMaskField') {
fieldValue.value = undefined;
}
if (template.type === 'LatentsField') {
fieldValue.value = undefined;
}

View File

@ -9,6 +9,7 @@ import {
CANVAS_TEXT_TO_IMAGE_GRAPH,
IMAGE_TO_IMAGE_GRAPH,
IMAGE_TO_LATENTS,
INPAINT_CREATE_MASK,
INPAINT_IMAGE,
LATENTS_TO_IMAGE,
MAIN_MODEL_LOADER,
@ -30,6 +31,11 @@ export const addVAEToGraph = (
modelLoaderNodeId: string = MAIN_MODEL_LOADER
): void => {
const { vae } = state.generation;
const { boundingBoxScaleMethod } = state.canvas;
const isUsingScaledDimensions = ['auto', 'manual'].includes(
boundingBoxScaleMethod
);
const isAutoVae = !vae;
const metadataAccumulator = graph.nodes[METADATA_ACCUMULATOR] as
@ -76,7 +82,7 @@ export const addVAEToGraph = (
field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae',
},
destination: {
node_id: CANVAS_OUTPUT,
node_id: isUsingScaledDimensions ? LATENTS_TO_IMAGE : CANVAS_OUTPUT,
field: 'vae',
},
});
@ -117,6 +123,16 @@ export const addVAEToGraph = (
field: 'vae',
},
},
{
source: {
node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER,
field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'vae',
},
},
{
source: {
node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER,

View File

@ -2,11 +2,7 @@ import { logger } from 'app/logging/logger';
import { RootState } from 'app/store/store';
import { NonNullableGraph } from 'features/nodes/types/types';
import { initialGenerationState } from 'features/parameters/store/generationSlice';
import {
ImageDTO,
ImageResizeInvocation,
ImageToLatentsInvocation,
} from 'services/api/types';
import { ImageDTO, ImageToLatentsInvocation } from 'services/api/types';
import { addControlNetToLinearGraph } from './addControlNetToLinearGraph';
import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
import { addLoRAsToGraph } from './addLoRAsToGraph';
@ -19,12 +15,13 @@ import {
CLIP_SKIP,
DENOISE_LATENTS,
IMAGE_TO_LATENTS,
IMG2IMG_RESIZE,
LATENTS_TO_IMAGE,
MAIN_MODEL_LOADER,
METADATA_ACCUMULATOR,
NEGATIVE_CONDITIONING,
NOISE,
POSITIVE_CONDITIONING,
RESIZE,
} from './constants';
/**
@ -43,6 +40,7 @@ export const buildCanvasImageToImageGraph = (
scheduler,
steps,
img2imgStrength: strength,
vaePrecision,
clipSkip,
shouldUseCpuNoise,
shouldUseNoiseSettings,
@ -51,7 +49,15 @@ export const buildCanvasImageToImageGraph = (
// The bounding box determines width and height, not the width and height params
const { width, height } = state.canvas.boundingBoxDimensions;
const { shouldAutoSave } = state.canvas;
const {
scaledBoundingBoxDimensions,
boundingBoxScaleMethod,
shouldAutoSave,
} = state.canvas;
const isUsingScaledDimensions = ['auto', 'manual'].includes(
boundingBoxScaleMethod
);
if (!model) {
log.error('No model found in state');
@ -104,15 +110,17 @@ export const buildCanvasImageToImageGraph = (
id: NOISE,
is_intermediate: true,
use_cpu,
width: !isUsingScaledDimensions
? width
: scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
},
[IMAGE_TO_LATENTS]: {
type: 'i2l',
id: IMAGE_TO_LATENTS,
is_intermediate: true,
// must be set manually later, bc `fit` parameter may require a resize node inserted
// image: {
// image_name: initialImage.image_name,
// },
},
[DENOISE_LATENTS]: {
type: 'denoise_latents',
@ -214,82 +222,84 @@ export const buildCanvasImageToImageGraph = (
field: 'latents',
},
},
// Decode the denoised latents to an image
],
};
// Decode Latents To Image & Handle Scaled Before Processing
if (isUsingScaledDimensions) {
graph.nodes[IMG2IMG_RESIZE] = {
id: IMG2IMG_RESIZE,
type: 'img_resize',
is_intermediate: true,
image: initialImage,
width: scaledBoundingBoxDimensions.width,
height: scaledBoundingBoxDimensions.height,
};
graph.nodes[LATENTS_TO_IMAGE] = {
id: LATENTS_TO_IMAGE,
type: 'l2i',
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
};
graph.nodes[CANVAS_OUTPUT] = {
id: CANVAS_OUTPUT,
type: 'img_resize',
is_intermediate: !shouldAutoSave,
width: width,
height: height,
};
graph.edges.push(
{
source: {
node_id: IMG2IMG_RESIZE,
field: 'image',
},
destination: {
node_id: IMAGE_TO_LATENTS,
field: 'image',
},
},
{
source: {
node_id: DENOISE_LATENTS,
field: 'latents',
},
destination: {
node_id: CANVAS_OUTPUT,
node_id: LATENTS_TO_IMAGE,
field: 'latents',
},
},
],
};
// handle `fit`
if (initialImage.width !== width || initialImage.height !== height) {
// The init image needs to be resized to the specified width and height before being passed to `IMAGE_TO_LATENTS`
// Create a resize node, explicitly setting its image
const resizeNode: ImageResizeInvocation = {
id: RESIZE,
type: 'img_resize',
image: {
image_name: initialImage.image_name,
},
is_intermediate: true,
width,
height,
};
graph.nodes[RESIZE] = resizeNode;
// The `RESIZE` node then passes its image to `IMAGE_TO_LATENTS`
graph.edges.push({
source: { node_id: RESIZE, field: 'image' },
destination: {
node_id: IMAGE_TO_LATENTS,
field: 'image',
},
});
// The `RESIZE` node also passes its width and height to `NOISE`
graph.edges.push({
source: { node_id: RESIZE, field: 'width' },
destination: {
node_id: NOISE,
field: 'width',
},
});
graph.edges.push({
source: { node_id: RESIZE, field: 'height' },
destination: {
node_id: NOISE,
field: 'height',
},
});
{
source: {
node_id: LATENTS_TO_IMAGE,
field: 'image',
},
destination: {
node_id: CANVAS_OUTPUT,
field: 'image',
},
}
);
} else {
// We are not resizing, so we need to set the image on the `IMAGE_TO_LATENTS` node explicitly
(graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image = {
image_name: initialImage.image_name,
graph.nodes[CANVAS_OUTPUT] = {
type: 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
fp32: vaePrecision === 'fp32' ? true : false,
};
// Pass the image's dimensions to the `NOISE` node
(graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image =
initialImage;
graph.edges.push({
source: { node_id: IMAGE_TO_LATENTS, field: 'width' },
destination: {
node_id: NOISE,
field: 'width',
source: {
node_id: DENOISE_LATENTS,
field: 'latents',
},
});
graph.edges.push({
source: { node_id: IMAGE_TO_LATENTS, field: 'height' },
destination: {
node_id: NOISE,
field: 'height',
node_id: CANVAS_OUTPUT,
field: 'latents',
},
});
}
@ -300,8 +310,10 @@ export const buildCanvasImageToImageGraph = (
type: 'metadata_accumulator',
generation_mode: 'img2img',
cfg_scale,
height,
width,
width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
positive_prompt: '', // set in addDynamicPromptsToGraph
negative_prompt: negativePrompt,
model,

View File

@ -2,6 +2,7 @@ import { logger } from 'app/logging/logger';
import { RootState } from 'app/store/store';
import { NonNullableGraph } from 'features/nodes/types/types';
import {
CreateDenoiseMaskInvocation,
ImageBlurInvocation,
ImageDTO,
ImageToLatentsInvocation,
@ -15,13 +16,14 @@ import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph';
import { addVAEToGraph } from './addVAEToGraph';
import { addWatermarkerToGraph } from './addWatermarkerToGraph';
import {
CANVAS_INPAINT_GRAPH,
CANVAS_OUTPUT,
CANVAS_COHERENCE_DENOISE_LATENTS,
CANVAS_COHERENCE_NOISE,
CANVAS_COHERENCE_NOISE_INCREMENT,
CANVAS_INPAINT_GRAPH,
CANVAS_OUTPUT,
CLIP_SKIP,
DENOISE_LATENTS,
INPAINT_CREATE_MASK,
INPAINT_IMAGE,
INPAINT_IMAGE_RESIZE_DOWN,
INPAINT_IMAGE_RESIZE_UP,
@ -127,6 +129,12 @@ export const buildCanvasInpaintGraph = (
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
},
[INPAINT_CREATE_MASK]: {
type: 'create_denoise_mask',
id: INPAINT_CREATE_MASK,
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
},
[NOISE]: {
type: 'noise',
id: NOISE,
@ -276,16 +284,27 @@ export const buildCanvasInpaintGraph = (
field: 'latents',
},
},
// Create Inpaint Mask
{
source: {
node_id: MASK_BLUR,
field: 'image',
},
destination: {
node_id: DENOISE_LATENTS,
node_id: INPAINT_CREATE_MASK,
field: 'mask',
},
},
{
source: {
node_id: INPAINT_CREATE_MASK,
field: 'denoise_mask',
},
destination: {
node_id: DENOISE_LATENTS,
field: 'denoise_mask',
},
},
// Iterate
{
source: {
@ -459,6 +478,16 @@ export const buildCanvasInpaintGraph = (
field: 'image',
},
},
{
source: {
node_id: INPAINT_IMAGE_RESIZE_UP,
field: 'image',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'image',
},
},
// Color Correct The Inpainted Result
{
source: {
@ -516,6 +545,10 @@ export const buildCanvasInpaintGraph = (
...(graph.nodes[MASK_BLUR] as ImageBlurInvocation),
image: canvasMaskImage,
};
graph.nodes[INPAINT_CREATE_MASK] = {
...(graph.nodes[INPAINT_CREATE_MASK] as CreateDenoiseMaskInvocation),
image: canvasInitImage,
};
graph.edges.push(
// Color Correct The Inpainted Result

View File

@ -17,13 +17,14 @@ import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph';
import { addVAEToGraph } from './addVAEToGraph';
import { addWatermarkerToGraph } from './addWatermarkerToGraph';
import {
CANVAS_OUTPAINT_GRAPH,
CANVAS_OUTPUT,
CANVAS_COHERENCE_DENOISE_LATENTS,
CANVAS_COHERENCE_NOISE,
CANVAS_COHERENCE_NOISE_INCREMENT,
CANVAS_OUTPAINT_GRAPH,
CANVAS_OUTPUT,
CLIP_SKIP,
DENOISE_LATENTS,
INPAINT_CREATE_MASK,
INPAINT_IMAGE,
INPAINT_IMAGE_RESIZE_DOWN,
INPAINT_IMAGE_RESIZE_UP,
@ -153,6 +154,12 @@ export const buildCanvasOutpaintGraph = (
use_cpu,
is_intermediate: true,
},
[INPAINT_CREATE_MASK]: {
type: 'create_denoise_mask',
id: INPAINT_CREATE_MASK,
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
},
[DENOISE_LATENTS]: {
type: 'denoise_latents',
id: DENOISE_LATENTS,
@ -317,16 +324,27 @@ export const buildCanvasOutpaintGraph = (
field: 'latents',
},
},
// Create Inpaint Mask
{
source: {
node_id: MASK_BLUR,
field: 'image',
},
destination: {
node_id: DENOISE_LATENTS,
node_id: INPAINT_CREATE_MASK,
field: 'mask',
},
},
{
source: {
node_id: INPAINT_CREATE_MASK,
field: 'denoise_mask',
},
destination: {
node_id: DENOISE_LATENTS,
field: 'denoise_mask',
},
},
// Iterate
{
source: {
@ -522,6 +540,16 @@ export const buildCanvasOutpaintGraph = (
field: 'image',
},
},
{
source: {
node_id: INPAINT_INFILL,
field: 'image',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'image',
},
},
// Take combined mask and resize and then blur
{
source: {
@ -640,6 +668,16 @@ export const buildCanvasOutpaintGraph = (
field: 'image',
},
},
{
source: {
node_id: INPAINT_INFILL,
field: 'image',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'image',
},
},
// Color Correct The Inpainted Result
{
source: {

View File

@ -2,11 +2,7 @@ import { logger } from 'app/logging/logger';
import { RootState } from 'app/store/store';
import { NonNullableGraph } from 'features/nodes/types/types';
import { initialGenerationState } from 'features/parameters/store/generationSlice';
import {
ImageDTO,
ImageResizeInvocation,
ImageToLatentsInvocation,
} from 'services/api/types';
import { ImageDTO, ImageToLatentsInvocation } from 'services/api/types';
import { addControlNetToLinearGraph } from './addControlNetToLinearGraph';
import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph';
@ -17,11 +13,12 @@ import { addWatermarkerToGraph } from './addWatermarkerToGraph';
import {
CANVAS_OUTPUT,
IMAGE_TO_LATENTS,
IMG2IMG_RESIZE,
LATENTS_TO_IMAGE,
METADATA_ACCUMULATOR,
NEGATIVE_CONDITIONING,
NOISE,
POSITIVE_CONDITIONING,
RESIZE,
SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH,
SDXL_DENOISE_LATENTS,
SDXL_MODEL_LOADER,
@ -59,7 +56,15 @@ export const buildCanvasSDXLImageToImageGraph = (
// The bounding box determines width and height, not the width and height params
const { width, height } = state.canvas.boundingBoxDimensions;
const { shouldAutoSave } = state.canvas;
const {
scaledBoundingBoxDimensions,
boundingBoxScaleMethod,
shouldAutoSave,
} = state.canvas;
const isUsingScaledDimensions = ['auto', 'manual'].includes(
boundingBoxScaleMethod
);
if (!model) {
log.error('No model found in state');
@ -109,16 +114,18 @@ export const buildCanvasSDXLImageToImageGraph = (
id: NOISE,
is_intermediate: true,
use_cpu,
width: !isUsingScaledDimensions
? width
: scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
},
[IMAGE_TO_LATENTS]: {
type: 'i2l',
id: IMAGE_TO_LATENTS,
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
// must be set manually later, bc `fit` parameter may require a resize node inserted
// image: {
// image_name: initialImage.image_name,
// },
},
[SDXL_DENOISE_LATENTS]: {
type: 'denoise_latents',
@ -132,12 +139,6 @@ export const buildCanvasSDXLImageToImageGraph = (
: 1 - strength,
denoising_end: shouldUseSDXLRefiner ? refinerStart : 1,
},
[CANVAS_OUTPUT]: {
type: 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
fp32: vaePrecision === 'fp32' ? true : false,
},
},
edges: [
// Connect Model Loader To UNet & CLIP
@ -232,82 +233,84 @@ export const buildCanvasSDXLImageToImageGraph = (
field: 'latents',
},
},
// Decode denoised latents to an image
],
};
// Decode Latents To Image & Handle Scaled Before Processing
if (isUsingScaledDimensions) {
graph.nodes[IMG2IMG_RESIZE] = {
id: IMG2IMG_RESIZE,
type: 'img_resize',
is_intermediate: true,
image: initialImage,
width: scaledBoundingBoxDimensions.width,
height: scaledBoundingBoxDimensions.height,
};
graph.nodes[LATENTS_TO_IMAGE] = {
id: LATENTS_TO_IMAGE,
type: 'l2i',
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
};
graph.nodes[CANVAS_OUTPUT] = {
id: CANVAS_OUTPUT,
type: 'img_resize',
is_intermediate: !shouldAutoSave,
width: width,
height: height,
};
graph.edges.push(
{
source: {
node_id: IMG2IMG_RESIZE,
field: 'image',
},
destination: {
node_id: IMAGE_TO_LATENTS,
field: 'image',
},
},
{
source: {
node_id: SDXL_DENOISE_LATENTS,
field: 'latents',
},
destination: {
node_id: CANVAS_OUTPUT,
node_id: LATENTS_TO_IMAGE,
field: 'latents',
},
},
],
};
// handle `fit`
if (initialImage.width !== width || initialImage.height !== height) {
// The init image needs to be resized to the specified width and height before being passed to `IMAGE_TO_LATENTS`
// Create a resize node, explicitly setting its image
const resizeNode: ImageResizeInvocation = {
id: RESIZE,
type: 'img_resize',
image: {
image_name: initialImage.image_name,
},
is_intermediate: true,
width,
height,
};
graph.nodes[RESIZE] = resizeNode;
// The `RESIZE` node then passes its image to `IMAGE_TO_LATENTS`
graph.edges.push({
source: { node_id: RESIZE, field: 'image' },
destination: {
node_id: IMAGE_TO_LATENTS,
field: 'image',
},
});
// The `RESIZE` node also passes its width and height to `NOISE`
graph.edges.push({
source: { node_id: RESIZE, field: 'width' },
destination: {
node_id: NOISE,
field: 'width',
},
});
graph.edges.push({
source: { node_id: RESIZE, field: 'height' },
destination: {
node_id: NOISE,
field: 'height',
},
});
{
source: {
node_id: LATENTS_TO_IMAGE,
field: 'image',
},
destination: {
node_id: CANVAS_OUTPUT,
field: 'image',
},
}
);
} else {
// We are not resizing, so we need to set the image on the `IMAGE_TO_LATENTS` node explicitly
(graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image = {
image_name: initialImage.image_name,
graph.nodes[CANVAS_OUTPUT] = {
type: 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
fp32: vaePrecision === 'fp32' ? true : false,
};
// Pass the image's dimensions to the `NOISE` node
(graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image =
initialImage;
graph.edges.push({
source: { node_id: IMAGE_TO_LATENTS, field: 'width' },
destination: {
node_id: NOISE,
field: 'width',
source: {
node_id: SDXL_DENOISE_LATENTS,
field: 'latents',
},
});
graph.edges.push({
source: { node_id: IMAGE_TO_LATENTS, field: 'height' },
destination: {
node_id: NOISE,
field: 'height',
node_id: CANVAS_OUTPUT,
field: 'latents',
},
});
}
@ -318,8 +321,10 @@ export const buildCanvasSDXLImageToImageGraph = (
type: 'metadata_accumulator',
generation_mode: 'img2img',
cfg_scale,
height,
width,
width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
positive_prompt: '', // set in addDynamicPromptsToGraph
negative_prompt: negativePrompt,
model,

View File

@ -2,6 +2,7 @@ import { logger } from 'app/logging/logger';
import { RootState } from 'app/store/store';
import { NonNullableGraph } from 'features/nodes/types/types';
import {
CreateDenoiseMaskInvocation,
ImageBlurInvocation,
ImageDTO,
ImageToLatentsInvocation,
@ -16,10 +17,11 @@ import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
import { addVAEToGraph } from './addVAEToGraph';
import { addWatermarkerToGraph } from './addWatermarkerToGraph';
import {
CANVAS_OUTPUT,
CANVAS_COHERENCE_DENOISE_LATENTS,
CANVAS_COHERENCE_NOISE,
CANVAS_COHERENCE_NOISE_INCREMENT,
CANVAS_OUTPUT,
INPAINT_CREATE_MASK,
INPAINT_IMAGE,
INPAINT_IMAGE_RESIZE_DOWN,
INPAINT_IMAGE_RESIZE_UP,
@ -136,6 +138,12 @@ export const buildCanvasSDXLInpaintGraph = (
use_cpu,
is_intermediate: true,
},
[INPAINT_CREATE_MASK]: {
type: 'create_denoise_mask',
id: INPAINT_CREATE_MASK,
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
},
[SDXL_DENOISE_LATENTS]: {
type: 'denoise_latents',
id: SDXL_DENOISE_LATENTS,
@ -290,16 +298,27 @@ export const buildCanvasSDXLInpaintGraph = (
field: 'latents',
},
},
// Create Inpaint Mask
{
source: {
node_id: MASK_BLUR,
field: 'image',
},
destination: {
node_id: SDXL_DENOISE_LATENTS,
node_id: INPAINT_CREATE_MASK,
field: 'mask',
},
},
{
source: {
node_id: INPAINT_CREATE_MASK,
field: 'denoise_mask',
},
destination: {
node_id: SDXL_DENOISE_LATENTS,
field: 'denoise_mask',
},
},
// Iterate
{
source: {
@ -473,6 +492,16 @@ export const buildCanvasSDXLInpaintGraph = (
field: 'image',
},
},
{
source: {
node_id: INPAINT_IMAGE_RESIZE_UP,
field: 'image',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'image',
},
},
// Color Correct The Inpainted Result
{
source: {
@ -530,6 +559,10 @@ export const buildCanvasSDXLInpaintGraph = (
...(graph.nodes[MASK_BLUR] as ImageBlurInvocation),
image: canvasMaskImage,
};
graph.nodes[INPAINT_CREATE_MASK] = {
...(graph.nodes[INPAINT_CREATE_MASK] as CreateDenoiseMaskInvocation),
image: canvasInitImage,
};
graph.edges.push(
// Color Correct The Inpainted Result

View File

@ -18,10 +18,11 @@ import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
import { addVAEToGraph } from './addVAEToGraph';
import { addWatermarkerToGraph } from './addWatermarkerToGraph';
import {
CANVAS_OUTPUT,
CANVAS_COHERENCE_DENOISE_LATENTS,
CANVAS_COHERENCE_NOISE,
CANVAS_COHERENCE_NOISE_INCREMENT,
CANVAS_OUTPUT,
INPAINT_CREATE_MASK,
INPAINT_IMAGE,
INPAINT_IMAGE_RESIZE_DOWN,
INPAINT_IMAGE_RESIZE_UP,
@ -156,6 +157,12 @@ export const buildCanvasSDXLOutpaintGraph = (
use_cpu,
is_intermediate: true,
},
[INPAINT_CREATE_MASK]: {
type: 'create_denoise_mask',
id: INPAINT_CREATE_MASK,
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
},
[SDXL_DENOISE_LATENTS]: {
type: 'denoise_latents',
id: SDXL_DENOISE_LATENTS,
@ -331,16 +338,27 @@ export const buildCanvasSDXLOutpaintGraph = (
field: 'latents',
},
},
// Create Inpaint Mask
{
source: {
node_id: MASK_BLUR,
field: 'image',
},
destination: {
node_id: SDXL_DENOISE_LATENTS,
node_id: INPAINT_CREATE_MASK,
field: 'mask',
},
},
{
source: {
node_id: INPAINT_CREATE_MASK,
field: 'denoise_mask',
},
destination: {
node_id: SDXL_DENOISE_LATENTS,
field: 'denoise_mask',
},
},
// Iterate
{
source: {
@ -537,6 +555,16 @@ export const buildCanvasSDXLOutpaintGraph = (
field: 'image',
},
},
{
source: {
node_id: INPAINT_INFILL,
field: 'image',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'image',
},
},
// Take combined mask and resize and then blur
{
source: {
@ -655,6 +683,16 @@ export const buildCanvasSDXLOutpaintGraph = (
field: 'image',
},
},
{
source: {
node_id: INPAINT_INFILL,
field: 'image',
},
destination: {
node_id: INPAINT_CREATE_MASK,
field: 'image',
},
},
// Color Correct The Inpainted Result
{
source: {

View File

@ -15,6 +15,7 @@ import { addVAEToGraph } from './addVAEToGraph';
import { addWatermarkerToGraph } from './addWatermarkerToGraph';
import {
CANVAS_OUTPUT,
LATENTS_TO_IMAGE,
METADATA_ACCUMULATOR,
NEGATIVE_CONDITIONING,
NOISE,
@ -49,7 +50,15 @@ export const buildCanvasSDXLTextToImageGraph = (
// The bounding box determines width and height, not the width and height params
const { width, height } = state.canvas.boundingBoxDimensions;
const { shouldAutoSave } = state.canvas;
const {
scaledBoundingBoxDimensions,
boundingBoxScaleMethod,
shouldAutoSave,
} = state.canvas;
const isUsingScaledDimensions = ['auto', 'manual'].includes(
boundingBoxScaleMethod
);
const { shouldUseSDXLRefiner, refinerStart, shouldConcatSDXLStylePrompt } =
state.sdxl;
@ -136,17 +145,15 @@ export const buildCanvasSDXLTextToImageGraph = (
type: 'noise',
id: NOISE,
is_intermediate: true,
width,
height,
width: !isUsingScaledDimensions
? width
: scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
use_cpu,
},
[t2lNode.id]: t2lNode,
[CANVAS_OUTPUT]: {
type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
fp32: vaePrecision === 'fp32' ? true : false,
},
},
edges: [
// Connect Model Loader to UNet and CLIP
@ -231,19 +238,67 @@ export const buildCanvasSDXLTextToImageGraph = (
field: 'noise',
},
},
// Decode Denoised Latents To Image
],
};
// Decode Latents To Image & Handle Scaled Before Processing
if (isUsingScaledDimensions) {
graph.nodes[LATENTS_TO_IMAGE] = {
id: LATENTS_TO_IMAGE,
type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
};
graph.nodes[CANVAS_OUTPUT] = {
id: CANVAS_OUTPUT,
type: 'img_resize',
is_intermediate: !shouldAutoSave,
width: width,
height: height,
};
graph.edges.push(
{
source: {
node_id: SDXL_DENOISE_LATENTS,
field: 'latents',
},
destination: {
node_id: CANVAS_OUTPUT,
node_id: LATENTS_TO_IMAGE,
field: 'latents',
},
},
],
};
{
source: {
node_id: LATENTS_TO_IMAGE,
field: 'image',
},
destination: {
node_id: CANVAS_OUTPUT,
field: 'image',
},
}
);
} else {
graph.nodes[CANVAS_OUTPUT] = {
type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
fp32: vaePrecision === 'fp32' ? true : false,
};
graph.edges.push({
source: {
node_id: SDXL_DENOISE_LATENTS,
field: 'latents',
},
destination: {
node_id: CANVAS_OUTPUT,
field: 'latents',
},
});
}
// add metadata accumulator, which is only mostly populated - some fields are added later
graph.nodes[METADATA_ACCUMULATOR] = {
@ -251,8 +306,10 @@ export const buildCanvasSDXLTextToImageGraph = (
type: 'metadata_accumulator',
generation_mode: 'txt2img',
cfg_scale,
height,
width,
width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
positive_prompt: '', // set in addDynamicPromptsToGraph
negative_prompt: negativePrompt,
model,

View File

@ -17,6 +17,7 @@ import {
CANVAS_TEXT_TO_IMAGE_GRAPH,
CLIP_SKIP,
DENOISE_LATENTS,
LATENTS_TO_IMAGE,
MAIN_MODEL_LOADER,
METADATA_ACCUMULATOR,
NEGATIVE_CONDITIONING,
@ -39,6 +40,7 @@ export const buildCanvasTextToImageGraph = (
cfgScale: cfg_scale,
scheduler,
steps,
vaePrecision,
clipSkip,
shouldUseCpuNoise,
shouldUseNoiseSettings,
@ -47,7 +49,15 @@ export const buildCanvasTextToImageGraph = (
// The bounding box determines width and height, not the width and height params
const { width, height } = state.canvas.boundingBoxDimensions;
const { shouldAutoSave } = state.canvas;
const {
scaledBoundingBoxDimensions,
boundingBoxScaleMethod,
shouldAutoSave,
} = state.canvas;
const isUsingScaledDimensions = ['auto', 'manual'].includes(
boundingBoxScaleMethod
);
if (!model) {
log.error('No model found in state');
@ -131,16 +141,15 @@ export const buildCanvasTextToImageGraph = (
type: 'noise',
id: NOISE,
is_intermediate: true,
width,
height,
width: !isUsingScaledDimensions
? width
: scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
use_cpu,
},
[t2lNode.id]: t2lNode,
[CANVAS_OUTPUT]: {
type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
},
},
edges: [
// Connect Model Loader to UNet & CLIP Skip
@ -216,19 +225,67 @@ export const buildCanvasTextToImageGraph = (
field: 'noise',
},
},
// Decode denoised latents to image
],
};
// Decode Latents To Image & Handle Scaled Before Processing
if (isUsingScaledDimensions) {
graph.nodes[LATENTS_TO_IMAGE] = {
id: LATENTS_TO_IMAGE,
type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
is_intermediate: true,
fp32: vaePrecision === 'fp32' ? true : false,
};
graph.nodes[CANVAS_OUTPUT] = {
id: CANVAS_OUTPUT,
type: 'img_resize',
is_intermediate: !shouldAutoSave,
width: width,
height: height,
};
graph.edges.push(
{
source: {
node_id: DENOISE_LATENTS,
field: 'latents',
},
destination: {
node_id: CANVAS_OUTPUT,
node_id: LATENTS_TO_IMAGE,
field: 'latents',
},
},
],
};
{
source: {
node_id: LATENTS_TO_IMAGE,
field: 'image',
},
destination: {
node_id: CANVAS_OUTPUT,
field: 'image',
},
}
);
} else {
graph.nodes[CANVAS_OUTPUT] = {
type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
id: CANVAS_OUTPUT,
is_intermediate: !shouldAutoSave,
fp32: vaePrecision === 'fp32' ? true : false,
};
graph.edges.push({
source: {
node_id: DENOISE_LATENTS,
field: 'latents',
},
destination: {
node_id: CANVAS_OUTPUT,
field: 'latents',
},
});
}
// add metadata accumulator, which is only mostly populated - some fields are added later
graph.nodes[METADATA_ACCUMULATOR] = {
@ -236,8 +293,10 @@ export const buildCanvasTextToImageGraph = (
type: 'metadata_accumulator',
generation_mode: 'txt2img',
cfg_scale,
height,
width,
width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
height: !isUsingScaledDimensions
? height
: scaledBoundingBoxDimensions.height,
positive_prompt: '', // set in addDynamicPromptsToGraph
negative_prompt: negativePrompt,
model,

View File

@ -17,6 +17,7 @@ export const CLIP_SKIP = 'clip_skip';
export const IMAGE_TO_LATENTS = 'image_to_latents';
export const LATENTS_TO_LATENTS = 'latents_to_latents';
export const RESIZE = 'resize_image';
export const IMG2IMG_RESIZE = 'img2img_resize';
export const CANVAS_OUTPUT = 'canvas_output';
export const INPAINT_IMAGE = 'inpaint_image';
export const SCALED_INPAINT_IMAGE = 'scaled_inpaint_image';
@ -25,6 +26,7 @@ export const INPAINT_IMAGE_RESIZE_DOWN = 'inpaint_image_resize_down';
export const INPAINT_INFILL = 'inpaint_infill';
export const INPAINT_INFILL_RESIZE_DOWN = 'inpaint_infill_resize_down';
export const INPAINT_FINAL_IMAGE = 'inpaint_final_image';
export const INPAINT_CREATE_MASK = 'inpaint_create_mask';
export const CANVAS_COHERENCE_DENOISE_LATENTS =
'canvas_coherence_denoise_latents';
export const CANVAS_COHERENCE_NOISE = 'canvas_coherence_noise';

File diff suppressed because one or more lines are too long

View File

@ -111,6 +111,7 @@ export type ImageBlurInvocation = s['ImageBlurInvocation'];
export type ImageScaleInvocation = s['ImageScaleInvocation'];
export type InfillPatchMatchInvocation = s['InfillPatchMatchInvocation'];
export type InfillTileInvocation = s['InfillTileInvocation'];
export type CreateDenoiseMaskInvocation = s['CreateDenoiseMaskInvocation'];
export type RandomIntInvocation = s['RandomIntInvocation'];
export type CompelInvocation = s['CompelInvocation'];
export type DynamicPromptInvocation = s['DynamicPromptInvocation'];