fix: Inpaint Fixes (#4301)

## What type of PR is this? (check all applicable) - [ ] Refactor - [ ] Feature - [x] Bug Fix - [ ] Optimization - [ ] Documentation Update - [ ] Community Node Submission ## Have you discussed this change with the InvokeAI team? - [x] Yes - [ ] No, because: ## Have you updated all relevant documentation? - [ ] Yes - [x] No ## Description Fix masked generation with inpaint models ## Related Tickets & Documents - Closes #4295 ## Added/updated tests? - [ ] Yes - [x] No
2024-08-30 20:32:17 +00:00 · 2023-08-28 00:11:11 -04:00
parent b5a83bbc8a 4f00dbe704
commit 502570e083
22 changed files with 826 additions and 277 deletions
--- a/invokeai/app/invocations/image.py
+++ b/invokeai/app/invocations/image.py
@ -375,6 +375,9 @@ class ImageResizeInvocation(BaseInvocation):
    width: int = InputField(default=512, ge=64, multiple_of=8, description="The width to resize to (px)")
    height: int = InputField(default=512, ge=64, multiple_of=8, description="The height to resize to (px)")
    resample_mode: PIL_RESAMPLING_MODES = InputField(default="bicubic", description="The resampling mode")
+    metadata: Optional[CoreMetadata] = InputField(
+        default=None, description=FieldDescriptions.core_metadata, ui_hidden=True
+    )

    def invoke(self, context: InvocationContext) -> ImageOutput:
        image = context.services.images.get_pil_image(self.image.image_name)
@ -393,6 +396,7 @@ class ImageResizeInvocation(BaseInvocation):
            node_id=self.id,
            session_id=context.graph_execution_state_id,
            is_intermediate=self.is_intermediate,
+            metadata=self.metadata.dict() if self.metadata else None,
        )

        return ImageOutput(
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -21,6 +21,8 @@ from torchvision.transforms.functional import resize as tv_resize

 from invokeai.app.invocations.metadata import CoreMetadata
 from invokeai.app.invocations.primitives import (
+    DenoiseMaskField,
+    DenoiseMaskOutput,
    ImageField,
    ImageOutput,
    LatentsField,
@ -31,8 +33,8 @@ from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.app.util.step_callback import stable_diffusion_step_callback
 from invokeai.backend.model_management.models import ModelType, SilenceWarnings

-from ...backend.model_management.models import BaseModelType
 from ...backend.model_management.lora import ModelPatcher
+from ...backend.model_management.models import BaseModelType
 from ...backend.stable_diffusion import PipelineIntermediateState
 from ...backend.stable_diffusion.diffusers_pipeline import (
    ConditioningData,
@ -44,16 +46,7 @@ from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import Post
 from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from ...backend.util.devices import choose_precision, choose_torch_device
 from ..models.image import ImageCategory, ResourceOrigin
-from .baseinvocation import (
-    BaseInvocation,
-    FieldDescriptions,
-    Input,
-    InputField,
-    InvocationContext,
-    UIType,
-    tags,
-    title,
-)
+from .baseinvocation import BaseInvocation, FieldDescriptions, Input, InputField, InvocationContext, UIType, tags, title
 from .compel import ConditioningField
 from .controlnet_image_processors import ControlField
 from .model import ModelInfo, UNetField, VaeField
@ -64,6 +57,72 @@ DEFAULT_PRECISION = choose_precision(choose_torch_device())
 SAMPLER_NAME_VALUES = Literal[tuple(list(SCHEDULER_MAP.keys()))]


+@title("Create Denoise Mask")
+@tags("mask", "denoise")
+class CreateDenoiseMaskInvocation(BaseInvocation):
+    """Creates mask for denoising model run."""
+
+    # Metadata
+    type: Literal["create_denoise_mask"] = "create_denoise_mask"
+
+    # Inputs
+    vae: VaeField = InputField(description=FieldDescriptions.vae, input=Input.Connection, ui_order=0)
+    image: Optional[ImageField] = InputField(default=None, description="Image which will be masked", ui_order=1)
+    mask: ImageField = InputField(description="The mask to use when pasting", ui_order=2)
+    tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=3)
+    fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32, ui_order=4)
+
+    def prep_mask_tensor(self, mask_image):
+        if mask_image.mode != "L":
+            mask_image = mask_image.convert("L")
+        mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
+        if mask_tensor.dim() == 3:
+            mask_tensor = mask_tensor.unsqueeze(0)
+        # if shape is not None:
+        #    mask_tensor = tv_resize(mask_tensor, shape, T.InterpolationMode.BILINEAR)
+        return mask_tensor
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> DenoiseMaskOutput:
+        if self.image is not None:
+            image = context.services.images.get_pil_image(self.image.image_name)
+            image = image_resized_to_grid_as_tensor(image.convert("RGB"))
+            if image.dim() == 3:
+                image = image.unsqueeze(0)
+        else:
+            image = None
+
+        mask = self.prep_mask_tensor(
+            context.services.images.get_pil_image(self.mask.image_name),
+        )
+
+        if image is not None:
+            vae_info = context.services.model_manager.get_model(
+                **self.vae.vae.dict(),
+                context=context,
+            )
+
+            img_mask = tv_resize(mask, image.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
+            masked_image = image * torch.where(img_mask < 0.5, 0.0, 1.0)
+            # TODO:
+            masked_latents = ImageToLatentsInvocation.vae_encode(vae_info, self.fp32, self.tiled, masked_image.clone())
+
+            masked_latents_name = f"{context.graph_execution_state_id}__{self.id}_masked_latents"
+            context.services.latents.save(masked_latents_name, masked_latents)
+        else:
+            masked_latents_name = None
+
+        mask_name = f"{context.graph_execution_state_id}__{self.id}_mask"
+        context.services.latents.save(mask_name, mask)
+
+        return DenoiseMaskOutput(
+            denoise_mask=DenoiseMaskField(
+                mask_name=mask_name,
+                masked_latents_name=masked_latents_name,
+            ),
+        )
+
+
 def get_scheduler(
    context: InvocationContext,
    scheduler_info: ModelInfo,
@ -126,10 +185,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
    control: Union[ControlField, list[ControlField]] = InputField(
        default=None, description=FieldDescriptions.control, input=Input.Connection, ui_order=5
    )
-    latents: Optional[LatentsField] = InputField(
-        description=FieldDescriptions.latents, input=Input.Connection, ui_order=4
-    )
-    mask: Optional[ImageField] = InputField(
+    latents: Optional[LatentsField] = InputField(description=FieldDescriptions.latents, input=Input.Connection)
+    denoise_mask: Optional[DenoiseMaskField] = InputField(
        default=None,
        description=FieldDescriptions.mask,
    )
@ -342,19 +399,18 @@ class DenoiseLatentsInvocation(BaseInvocation):

        return num_inference_steps, timesteps, init_timestep

-    def prep_mask_tensor(self, mask, context, lantents):
-        if mask is None:
-            return None
+    def prep_inpaint_mask(self, context, latents):
+        if self.denoise_mask is None:
+            return None, None

-        mask_image = context.services.images.get_pil_image(mask.image_name)
-        if mask_image.mode != "L":
-            # FIXME: why do we get passed an RGB image here? We can only use single-channel.
-            mask_image = mask_image.convert("L")
-        mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
-        if mask_tensor.dim() == 3:
-            mask_tensor = mask_tensor.unsqueeze(0)
-        mask_tensor = tv_resize(mask_tensor, lantents.shape[-2:], T.InterpolationMode.BILINEAR)
-        return 1 - mask_tensor
+        mask = context.services.latents.get(self.denoise_mask.mask_name)
+        mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
+        if self.denoise_mask.masked_latents_name is not None:
+            masked_latents = context.services.latents.get(self.denoise_mask.masked_latents_name)
+        else:
+            masked_latents = None
+
+        return 1 - mask, masked_latents

    @torch.no_grad()
    def invoke(self, context: InvocationContext) -> LatentsOutput:
@ -375,7 +431,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
            if seed is None:
                seed = 0

-            mask = self.prep_mask_tensor(self.mask, context, latents)
+            mask, masked_latents = self.prep_inpaint_mask(context, latents)

            # Get the source node id (we are invoking the prepared node)
            graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
@ -406,6 +462,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
                    noise = noise.to(device=unet.device, dtype=unet.dtype)
                if mask is not None:
                    mask = mask.to(device=unet.device, dtype=unet.dtype)
+                if masked_latents is not None:
+                    masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)

                scheduler = get_scheduler(
                    context=context,
@ -442,6 +500,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
                    noise=noise,
                    seed=seed,
                    mask=mask,
+                    masked_latents=masked_latents,
                    num_inference_steps=num_inference_steps,
                    conditioning_data=conditioning_data,
                    control_data=control_data,  # list[ControlNetData]
@ -663,26 +722,11 @@ class ImageToLatentsInvocation(BaseInvocation):
    tiled: bool = InputField(default=False, description=FieldDescriptions.tiled)
    fp32: bool = InputField(default=DEFAULT_PRECISION == "float32", description=FieldDescriptions.fp32)

-    @torch.no_grad()
-    def invoke(self, context: InvocationContext) -> LatentsOutput:
-        # image = context.services.images.get(
-        #     self.image.image_type, self.image.image_name
-        # )
-        image = context.services.images.get_pil_image(self.image.image_name)
-
-        # vae_info = context.services.model_manager.get_model(**self.vae.vae.dict())
-        vae_info = context.services.model_manager.get_model(
-            **self.vae.vae.dict(),
-            context=context,
-        )
-
-        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
-        if image_tensor.dim() == 3:
-            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
-
+    @staticmethod
+    def vae_encode(vae_info, upcast, tiled, image_tensor):
        with vae_info as vae:
            orig_dtype = vae.dtype
-            if self.fp32:
+            if upcast:
                vae.to(dtype=torch.float32)

                use_torch_2_0_or_xformers = isinstance(
@ -707,7 +751,7 @@ class ImageToLatentsInvocation(BaseInvocation):
                vae.to(dtype=torch.float16)
                # latents = latents.half()

-            if self.tiled:
+            if tiled:
                vae.enable_tiling()
            else:
                vae.disable_tiling()
@ -721,6 +765,23 @@ class ImageToLatentsInvocation(BaseInvocation):
            latents = vae.config.scaling_factor * latents
            latents = latents.to(dtype=orig_dtype)

+        return latents
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        image = context.services.images.get_pil_image(self.image.image_name)
+
+        vae_info = context.services.model_manager.get_model(
+            **self.vae.vae.dict(),
+            context=context,
+        )
+
+        image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
+        if image_tensor.dim() == 3:
+            image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
+
+        latents = self.vae_encode(vae_info, self.fp32, self.tiled, image_tensor)
+
        name = f"{context.graph_execution_state_id}__{self.id}"
        latents = latents.to("cpu")
        context.services.latents.save(name, latents)
--- a/invokeai/app/invocations/primitives.py
+++ b/invokeai/app/invocations/primitives.py
@ -294,6 +294,25 @@ class ImageCollectionInvocation(BaseInvocation):
        return ImageCollectionOutput(collection=self.collection)


+# endregion
+
+# region DenoiseMask
+
+
+class DenoiseMaskField(BaseModel):
+    """An inpaint mask field"""
+
+    mask_name: str = Field(description="The name of the mask image")
+    masked_latents_name: Optional[str] = Field(description="The name of the masked image latents")
+
+
+class DenoiseMaskOutput(BaseInvocationOutput):
+    """Base class for nodes that output a single image"""
+
+    type: Literal["denoise_mask_output"] = "denoise_mask_output"
+    denoise_mask: DenoiseMaskField = OutputField(description="Mask for denoise model run")
+
+
 # endregion

 # region Latents
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@ -144,7 +144,7 @@ def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool = Tr
    w, h = trim_to_multiple_of(*image.size, multiple_of=multiple_of)
    transformation = T.Compose(
        [
-            T.Resize((h, w), T.InterpolationMode.LANCZOS),
+            T.Resize((h, w), T.InterpolationMode.LANCZOS, antialias=True),
            T.ToTensor(),
        ]
    )
@ -358,6 +358,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
        callback: Callable[[PipelineIntermediateState], None] = None,
        control_data: List[ControlNetData] = None,
        mask: Optional[torch.Tensor] = None,
+        masked_latents: Optional[torch.Tensor] = None,
        seed: Optional[int] = None,
    ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]:
        if init_timestep.shape[0] == 0:
@ -376,28 +377,28 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
            latents = self.scheduler.add_noise(latents, noise, batched_t)

        if mask is not None:
+            # if no noise provided, noisify unmasked area based on seed(or 0 as fallback)
+            if noise is None:
+                noise = torch.randn(
+                    orig_latents.shape,
+                    dtype=torch.float32,
+                    device="cpu",
+                    generator=torch.Generator(device="cpu").manual_seed(seed or 0),
+                ).to(device=orig_latents.device, dtype=orig_latents.dtype)
+
+                latents = self.scheduler.add_noise(latents, noise, batched_t)
+                latents = torch.lerp(
+                    orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
+                )
+
            if is_inpainting_model(self.unet):
-                # You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
-                # (that's why there's a mask!) but it seems to really want that blanked out.
-                # masked_latents = latents * torch.where(mask < 0.5, 1, 0) TODO: inpaint/outpaint/infill
+                if masked_latents is None:
+                    raise Exception("Source image required for inpaint mask when inpaint model used!")

-                # TODO: we should probably pass this in so we don't have to try/finally around setting it.
-                self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(self._unet_forward, mask, orig_latents)
+                self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
+                    self._unet_forward, mask, masked_latents
+                )
            else:
-                # if no noise provided, noisify unmasked area based on seed(or 0 as fallback)
-                if noise is None:
-                    noise = torch.randn(
-                        orig_latents.shape,
-                        dtype=torch.float32,
-                        device="cpu",
-                        generator=torch.Generator(device="cpu").manual_seed(seed or 0),
-                    ).to(device=orig_latents.device, dtype=orig_latents.dtype)
-
-                    latents = self.scheduler.add_noise(latents, noise, batched_t)
-                    latents = torch.lerp(
-                        orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
-                    )
-
                additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise))

        try:
--- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/InputFieldRenderer.tsx
@ -10,6 +10,7 @@ import ColorInputField from './inputs/ColorInputField';
 import ConditioningInputField from './inputs/ConditioningInputField';
 import ControlInputField from './inputs/ControlInputField';
 import ControlNetModelInputField from './inputs/ControlNetModelInputField';
+import DenoiseMaskInputField from './inputs/DenoiseMaskInputField';
 import EnumInputField from './inputs/EnumInputField';
 import ImageCollectionInputField from './inputs/ImageCollectionInputField';
 import ImageInputField from './inputs/ImageInputField';
@ -105,6 +106,19 @@ const InputFieldRenderer = ({ nodeId, fieldName }: InputFieldProps) => {
    );
  }

+  if (
+    field?.type === 'DenoiseMaskField' &&
+    fieldTemplate?.type === 'DenoiseMaskField'
+  ) {
+    return (
+      <DenoiseMaskInputField
+        nodeId={nodeId}
+        field={field}
+        fieldTemplate={fieldTemplate}
+      />
+    );
+  }
+
  if (
    field?.type === 'ConditioningField' &&
    fieldTemplate?.type === 'ConditioningField'
--- a/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/DenoiseMaskInputField.tsx
+++ b/invokeai/frontend/web/src/features/nodes/components/flow/nodes/Invocation/fields/inputs/DenoiseMaskInputField.tsx
@ -0,0 +1,17 @@
+import {
+  DenoiseMaskInputFieldTemplate,
+  DenoiseMaskInputFieldValue,
+  FieldComponentProps,
+} from 'features/nodes/types/types';
+import { memo } from 'react';
+
+const DenoiseMaskInputFieldComponent = (
+  _props: FieldComponentProps<
+    DenoiseMaskInputFieldValue,
+    DenoiseMaskInputFieldTemplate
+  >
+) => {
+  return null;
+};
+
+export default memo(DenoiseMaskInputFieldComponent);
--- a/invokeai/frontend/web/src/features/nodes/types/constants.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts
@ -59,6 +59,11 @@ export const FIELDS: Record<FieldType, FieldUIConfig> = {
    description: 'Images may be passed between nodes.',
    color: 'purple.500',
  },
+  DenoiseMaskField: {
+    title: 'Denoise Mask',
+    description: 'Denoise Mask may be passed between nodes',
+    color: 'red.700',
+  },
  LatentsField: {
    title: 'Latents',
    description: 'Latents may be passed between nodes.',
--- a/invokeai/frontend/web/src/features/nodes/types/types.ts
+++ b/invokeai/frontend/web/src/features/nodes/types/types.ts
@ -64,6 +64,7 @@ export const zFieldType = z.enum([
  'string',
  'array',
  'ImageField',
+  'DenoiseMaskField',
  'LatentsField',
  'ConditioningField',
  'ControlField',
@ -120,6 +121,7 @@ export type InputFieldTemplate =
  | StringInputFieldTemplate
  | BooleanInputFieldTemplate
  | ImageInputFieldTemplate
+  | DenoiseMaskInputFieldTemplate
  | LatentsInputFieldTemplate
  | ConditioningInputFieldTemplate
  | UNetInputFieldTemplate
@ -205,6 +207,12 @@ export const zConditioningField = z.object({
 });
 export type ConditioningField = z.infer<typeof zConditioningField>;

+export const zDenoiseMaskField = z.object({
+  mask_name: z.string().trim().min(1),
+  masked_latents_name: z.string().trim().min(1).optional(),
+});
+export type DenoiseMaskFieldValue = z.infer<typeof zDenoiseMaskField>;
+
 export const zIntegerInputFieldValue = zInputFieldValueBase.extend({
  type: z.literal('integer'),
  value: z.number().optional(),
@ -241,6 +249,14 @@ export const zLatentsInputFieldValue = zInputFieldValueBase.extend({
 });
 export type LatentsInputFieldValue = z.infer<typeof zLatentsInputFieldValue>;

+export const zDenoiseMaskInputFieldValue = zInputFieldValueBase.extend({
+  type: z.literal('DenoiseMaskField'),
+  value: zDenoiseMaskField.optional(),
+});
+export type DenoiseMaskInputFieldValue = z.infer<
+  typeof zDenoiseMaskInputFieldValue
+>;
+
 export const zConditioningInputFieldValue = zInputFieldValueBase.extend({
  type: z.literal('ConditioningField'),
  value: zConditioningField.optional(),
@ -459,6 +475,7 @@ export const zInputFieldValue = z.discriminatedUnion('type', [
  zBooleanInputFieldValue,
  zImageInputFieldValue,
  zLatentsInputFieldValue,
+  zDenoiseMaskInputFieldValue,
  zConditioningInputFieldValue,
  zUNetInputFieldValue,
  zClipInputFieldValue,
@ -532,6 +549,11 @@ export type ImageCollectionInputFieldTemplate = InputFieldTemplateBase & {
  type: 'ImageCollection';
 };

+export type DenoiseMaskInputFieldTemplate = InputFieldTemplateBase & {
+  default: undefined;
+  type: 'DenoiseMaskField';
+};
+
 export type LatentsInputFieldTemplate = InputFieldTemplateBase & {
  default: string;
  type: 'LatentsField';
--- a/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/fieldTemplateBuilders.ts
@ -8,6 +8,7 @@ import {
  ConditioningInputFieldTemplate,
  ControlInputFieldTemplate,
  ControlNetModelInputFieldTemplate,
+  DenoiseMaskInputFieldTemplate,
  EnumInputFieldTemplate,
  FieldType,
  FloatInputFieldTemplate,
@ -263,6 +264,19 @@ const buildImageCollectionInputFieldTemplate = ({
  return template;
 };

+const buildDenoiseMaskInputFieldTemplate = ({
+  schemaObject,
+  baseField,
+}: BuildInputFieldArg): DenoiseMaskInputFieldTemplate => {
+  const template: DenoiseMaskInputFieldTemplate = {
+    ...baseField,
+    type: 'DenoiseMaskField',
+    default: schemaObject.default ?? undefined,
+  };
+
+  return template;
+};
+
 const buildLatentsInputFieldTemplate = ({
  schemaObject,
  baseField,
@ -498,6 +512,12 @@ export const buildInputFieldTemplate = (
      baseField,
    });
  }
+  if (fieldType === 'DenoiseMaskField') {
+    return buildDenoiseMaskInputFieldTemplate({
+      schemaObject: fieldSchema,
+      baseField,
+    });
+  }
  if (fieldType === 'LatentsField') {
    return buildLatentsInputFieldTemplate({
      schemaObject: fieldSchema,
--- a/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/fieldValueBuilders.ts
@ -49,6 +49,10 @@ export const buildInputFieldValue = (
    fieldValue.value = [];
  }

+  if (template.type === 'DenoiseMaskField') {
+    fieldValue.value = undefined;
+  }
+
  if (template.type === 'LatentsField') {
    fieldValue.value = undefined;
  }
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts
@ -9,6 +9,7 @@ import {
  CANVAS_TEXT_TO_IMAGE_GRAPH,
  IMAGE_TO_IMAGE_GRAPH,
  IMAGE_TO_LATENTS,
+  INPAINT_CREATE_MASK,
  INPAINT_IMAGE,
  LATENTS_TO_IMAGE,
  MAIN_MODEL_LOADER,
@ -30,6 +31,11 @@ export const addVAEToGraph = (
  modelLoaderNodeId: string = MAIN_MODEL_LOADER
 ): void => {
  const { vae } = state.generation;
+  const { boundingBoxScaleMethod } = state.canvas;
+
+  const isUsingScaledDimensions = ['auto', 'manual'].includes(
+    boundingBoxScaleMethod
+  );

  const isAutoVae = !vae;
  const metadataAccumulator = graph.nodes[METADATA_ACCUMULATOR] as
@ -76,7 +82,7 @@ export const addVAEToGraph = (
        field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae',
      },
      destination: {
-        node_id: CANVAS_OUTPUT,
+        node_id: isUsingScaledDimensions ? LATENTS_TO_IMAGE : CANVAS_OUTPUT,
        field: 'vae',
      },
    });
@ -117,6 +123,16 @@ export const addVAEToGraph = (
          field: 'vae',
        },
      },
+      {
+        source: {
+          node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER,
+          field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'vae',
+        },
+      },
      {
        source: {
          node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER,
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasImageToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasImageToImageGraph.ts
@ -2,11 +2,7 @@ import { logger } from 'app/logging/logger';
 import { RootState } from 'app/store/store';
 import { NonNullableGraph } from 'features/nodes/types/types';
 import { initialGenerationState } from 'features/parameters/store/generationSlice';
-import {
-  ImageDTO,
-  ImageResizeInvocation,
-  ImageToLatentsInvocation,
-} from 'services/api/types';
+import { ImageDTO, ImageToLatentsInvocation } from 'services/api/types';
 import { addControlNetToLinearGraph } from './addControlNetToLinearGraph';
 import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
 import { addLoRAsToGraph } from './addLoRAsToGraph';
@ -19,12 +15,13 @@ import {
  CLIP_SKIP,
  DENOISE_LATENTS,
  IMAGE_TO_LATENTS,
+  IMG2IMG_RESIZE,
+  LATENTS_TO_IMAGE,
  MAIN_MODEL_LOADER,
  METADATA_ACCUMULATOR,
  NEGATIVE_CONDITIONING,
  NOISE,
  POSITIVE_CONDITIONING,
-  RESIZE,
 } from './constants';

 /**
@ -43,6 +40,7 @@ export const buildCanvasImageToImageGraph = (
    scheduler,
    steps,
    img2imgStrength: strength,
+    vaePrecision,
    clipSkip,
    shouldUseCpuNoise,
    shouldUseNoiseSettings,
@ -51,7 +49,15 @@ export const buildCanvasImageToImageGraph = (
  // The bounding box determines width and height, not the width and height params
  const { width, height } = state.canvas.boundingBoxDimensions;

-  const { shouldAutoSave } = state.canvas;
+  const {
+    scaledBoundingBoxDimensions,
+    boundingBoxScaleMethod,
+    shouldAutoSave,
+  } = state.canvas;
+
+  const isUsingScaledDimensions = ['auto', 'manual'].includes(
+    boundingBoxScaleMethod
+  );

  if (!model) {
    log.error('No model found in state');
@ -104,15 +110,17 @@ export const buildCanvasImageToImageGraph = (
        id: NOISE,
        is_intermediate: true,
        use_cpu,
+        width: !isUsingScaledDimensions
+          ? width
+          : scaledBoundingBoxDimensions.width,
+        height: !isUsingScaledDimensions
+          ? height
+          : scaledBoundingBoxDimensions.height,
      },
      [IMAGE_TO_LATENTS]: {
        type: 'i2l',
        id: IMAGE_TO_LATENTS,
        is_intermediate: true,
-        // must be set manually later, bc `fit` parameter may require a resize node inserted
-        // image: {
-        //   image_name: initialImage.image_name,
-        // },
      },
      [DENOISE_LATENTS]: {
        type: 'denoise_latents',
@ -214,82 +222,84 @@ export const buildCanvasImageToImageGraph = (
          field: 'latents',
        },
      },
-      // Decode the denoised latents to an image
+    ],
+  };
+
+  // Decode Latents To Image & Handle Scaled Before Processing
+  if (isUsingScaledDimensions) {
+    graph.nodes[IMG2IMG_RESIZE] = {
+      id: IMG2IMG_RESIZE,
+      type: 'img_resize',
+      is_intermediate: true,
+      image: initialImage,
+      width: scaledBoundingBoxDimensions.width,
+      height: scaledBoundingBoxDimensions.height,
+    };
+    graph.nodes[LATENTS_TO_IMAGE] = {
+      id: LATENTS_TO_IMAGE,
+      type: 'l2i',
+      is_intermediate: true,
+      fp32: vaePrecision === 'fp32' ? true : false,
+    };
+    graph.nodes[CANVAS_OUTPUT] = {
+      id: CANVAS_OUTPUT,
+      type: 'img_resize',
+      is_intermediate: !shouldAutoSave,
+      width: width,
+      height: height,
+    };
+
+    graph.edges.push(
+      {
+        source: {
+          node_id: IMG2IMG_RESIZE,
+          field: 'image',
+        },
+        destination: {
+          node_id: IMAGE_TO_LATENTS,
+          field: 'image',
+        },
+      },
      {
        source: {
          node_id: DENOISE_LATENTS,
          field: 'latents',
        },
        destination: {
-          node_id: CANVAS_OUTPUT,
+          node_id: LATENTS_TO_IMAGE,
          field: 'latents',
        },
      },
-    ],
-  };
-
-  // handle `fit`
-  if (initialImage.width !== width || initialImage.height !== height) {
-    // The init image needs to be resized to the specified width and height before being passed to `IMAGE_TO_LATENTS`
-
-    // Create a resize node, explicitly setting its image
-    const resizeNode: ImageResizeInvocation = {
-      id: RESIZE,
-      type: 'img_resize',
-      image: {
-        image_name: initialImage.image_name,
-      },
-      is_intermediate: true,
-      width,
-      height,
-    };
-
-    graph.nodes[RESIZE] = resizeNode;
-
-    // The `RESIZE` node then passes its image to `IMAGE_TO_LATENTS`
-    graph.edges.push({
-      source: { node_id: RESIZE, field: 'image' },
-      destination: {
-        node_id: IMAGE_TO_LATENTS,
-        field: 'image',
-      },
-    });
-
-    // The `RESIZE` node also passes its width and height to `NOISE`
-    graph.edges.push({
-      source: { node_id: RESIZE, field: 'width' },
-      destination: {
-        node_id: NOISE,
-        field: 'width',
-      },
-    });
-
-    graph.edges.push({
-      source: { node_id: RESIZE, field: 'height' },
-      destination: {
-        node_id: NOISE,
-        field: 'height',
-      },
-    });
+      {
+        source: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'image',
+        },
+        destination: {
+          node_id: CANVAS_OUTPUT,
+          field: 'image',
+        },
+      }
+    );
  } else {
-    // We are not resizing, so we need to set the image on the `IMAGE_TO_LATENTS` node explicitly
-    (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image = {
-      image_name: initialImage.image_name,
+    graph.nodes[CANVAS_OUTPUT] = {
+      type: 'l2i',
+      id: CANVAS_OUTPUT,
+      is_intermediate: !shouldAutoSave,
+      fp32: vaePrecision === 'fp32' ? true : false,
    };

-    // Pass the image's dimensions to the `NOISE` node
+    (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image =
+      initialImage;
+
    graph.edges.push({
-      source: { node_id: IMAGE_TO_LATENTS, field: 'width' },
-      destination: {
-        node_id: NOISE,
-        field: 'width',
+      source: {
+        node_id: DENOISE_LATENTS,
+        field: 'latents',
      },
-    });
-    graph.edges.push({
-      source: { node_id: IMAGE_TO_LATENTS, field: 'height' },
      destination: {
-        node_id: NOISE,
-        field: 'height',
+        node_id: CANVAS_OUTPUT,
+        field: 'latents',
      },
    });
  }
@ -300,8 +310,10 @@ export const buildCanvasImageToImageGraph = (
    type: 'metadata_accumulator',
    generation_mode: 'img2img',
    cfg_scale,
-    height,
-    width,
+    width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
+    height: !isUsingScaledDimensions
+      ? height
+      : scaledBoundingBoxDimensions.height,
    positive_prompt: '', // set in addDynamicPromptsToGraph
    negative_prompt: negativePrompt,
    model,
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasInpaintGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasInpaintGraph.ts
@ -2,6 +2,7 @@ import { logger } from 'app/logging/logger';
 import { RootState } from 'app/store/store';
 import { NonNullableGraph } from 'features/nodes/types/types';
 import {
+  CreateDenoiseMaskInvocation,
  ImageBlurInvocation,
  ImageDTO,
  ImageToLatentsInvocation,
@ -15,13 +16,14 @@ import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph';
 import { addVAEToGraph } from './addVAEToGraph';
 import { addWatermarkerToGraph } from './addWatermarkerToGraph';
 import {
-  CANVAS_INPAINT_GRAPH,
-  CANVAS_OUTPUT,
  CANVAS_COHERENCE_DENOISE_LATENTS,
  CANVAS_COHERENCE_NOISE,
  CANVAS_COHERENCE_NOISE_INCREMENT,
+  CANVAS_INPAINT_GRAPH,
+  CANVAS_OUTPUT,
  CLIP_SKIP,
  DENOISE_LATENTS,
+  INPAINT_CREATE_MASK,
  INPAINT_IMAGE,
  INPAINT_IMAGE_RESIZE_DOWN,
  INPAINT_IMAGE_RESIZE_UP,
@ -127,6 +129,12 @@ export const buildCanvasInpaintGraph = (
        is_intermediate: true,
        fp32: vaePrecision === 'fp32' ? true : false,
      },
+      [INPAINT_CREATE_MASK]: {
+        type: 'create_denoise_mask',
+        id: INPAINT_CREATE_MASK,
+        is_intermediate: true,
+        fp32: vaePrecision === 'fp32' ? true : false,
+      },
      [NOISE]: {
        type: 'noise',
        id: NOISE,
@ -276,16 +284,27 @@ export const buildCanvasInpaintGraph = (
          field: 'latents',
        },
      },
+      // Create Inpaint Mask
      {
        source: {
          node_id: MASK_BLUR,
          field: 'image',
        },
        destination: {
-          node_id: DENOISE_LATENTS,
+          node_id: INPAINT_CREATE_MASK,
          field: 'mask',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'denoise_mask',
+        },
+        destination: {
+          node_id: DENOISE_LATENTS,
+          field: 'denoise_mask',
+        },
+      },
      // Iterate
      {
        source: {
@ -459,6 +478,16 @@ export const buildCanvasInpaintGraph = (
          field: 'image',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_IMAGE_RESIZE_UP,
+          field: 'image',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'image',
+        },
+      },
      // Color Correct The Inpainted Result
      {
        source: {
@ -516,6 +545,10 @@ export const buildCanvasInpaintGraph = (
      ...(graph.nodes[MASK_BLUR] as ImageBlurInvocation),
      image: canvasMaskImage,
    };
+    graph.nodes[INPAINT_CREATE_MASK] = {
+      ...(graph.nodes[INPAINT_CREATE_MASK] as CreateDenoiseMaskInvocation),
+      image: canvasInitImage,
+    };

    graph.edges.push(
      // Color Correct The Inpainted Result
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasOutpaintGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasOutpaintGraph.ts
@ -17,13 +17,14 @@ import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph';
 import { addVAEToGraph } from './addVAEToGraph';
 import { addWatermarkerToGraph } from './addWatermarkerToGraph';
 import {
-  CANVAS_OUTPAINT_GRAPH,
-  CANVAS_OUTPUT,
  CANVAS_COHERENCE_DENOISE_LATENTS,
  CANVAS_COHERENCE_NOISE,
  CANVAS_COHERENCE_NOISE_INCREMENT,
+  CANVAS_OUTPAINT_GRAPH,
+  CANVAS_OUTPUT,
  CLIP_SKIP,
  DENOISE_LATENTS,
+  INPAINT_CREATE_MASK,
  INPAINT_IMAGE,
  INPAINT_IMAGE_RESIZE_DOWN,
  INPAINT_IMAGE_RESIZE_UP,
@ -153,6 +154,12 @@ export const buildCanvasOutpaintGraph = (
        use_cpu,
        is_intermediate: true,
      },
+      [INPAINT_CREATE_MASK]: {
+        type: 'create_denoise_mask',
+        id: INPAINT_CREATE_MASK,
+        is_intermediate: true,
+        fp32: vaePrecision === 'fp32' ? true : false,
+      },
      [DENOISE_LATENTS]: {
        type: 'denoise_latents',
        id: DENOISE_LATENTS,
@ -317,16 +324,27 @@ export const buildCanvasOutpaintGraph = (
          field: 'latents',
        },
      },
+      // Create Inpaint Mask
      {
        source: {
          node_id: MASK_BLUR,
          field: 'image',
        },
        destination: {
-          node_id: DENOISE_LATENTS,
+          node_id: INPAINT_CREATE_MASK,
          field: 'mask',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'denoise_mask',
+        },
+        destination: {
+          node_id: DENOISE_LATENTS,
+          field: 'denoise_mask',
+        },
+      },
      // Iterate
      {
        source: {
@ -522,6 +540,16 @@ export const buildCanvasOutpaintGraph = (
          field: 'image',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_INFILL,
+          field: 'image',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'image',
+        },
+      },
      // Take combined mask and resize and then blur
      {
        source: {
@ -640,6 +668,16 @@ export const buildCanvasOutpaintGraph = (
          field: 'image',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_INFILL,
+          field: 'image',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'image',
+        },
+      },
      // Color Correct The Inpainted Result
      {
        source: {
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLImageToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLImageToImageGraph.ts
@ -2,11 +2,7 @@ import { logger } from 'app/logging/logger';
 import { RootState } from 'app/store/store';
 import { NonNullableGraph } from 'features/nodes/types/types';
 import { initialGenerationState } from 'features/parameters/store/generationSlice';
-import {
-  ImageDTO,
-  ImageResizeInvocation,
-  ImageToLatentsInvocation,
-} from 'services/api/types';
+import { ImageDTO, ImageToLatentsInvocation } from 'services/api/types';
 import { addControlNetToLinearGraph } from './addControlNetToLinearGraph';
 import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
 import { addNSFWCheckerToGraph } from './addNSFWCheckerToGraph';
@ -17,11 +13,12 @@ import { addWatermarkerToGraph } from './addWatermarkerToGraph';
 import {
  CANVAS_OUTPUT,
  IMAGE_TO_LATENTS,
+  IMG2IMG_RESIZE,
+  LATENTS_TO_IMAGE,
  METADATA_ACCUMULATOR,
  NEGATIVE_CONDITIONING,
  NOISE,
  POSITIVE_CONDITIONING,
-  RESIZE,
  SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH,
  SDXL_DENOISE_LATENTS,
  SDXL_MODEL_LOADER,
@ -59,7 +56,15 @@ export const buildCanvasSDXLImageToImageGraph = (
  // The bounding box determines width and height, not the width and height params
  const { width, height } = state.canvas.boundingBoxDimensions;

-  const { shouldAutoSave } = state.canvas;
+  const {
+    scaledBoundingBoxDimensions,
+    boundingBoxScaleMethod,
+    shouldAutoSave,
+  } = state.canvas;
+
+  const isUsingScaledDimensions = ['auto', 'manual'].includes(
+    boundingBoxScaleMethod
+  );

  if (!model) {
    log.error('No model found in state');
@ -109,16 +114,18 @@ export const buildCanvasSDXLImageToImageGraph = (
        id: NOISE,
        is_intermediate: true,
        use_cpu,
+        width: !isUsingScaledDimensions
+          ? width
+          : scaledBoundingBoxDimensions.width,
+        height: !isUsingScaledDimensions
+          ? height
+          : scaledBoundingBoxDimensions.height,
      },
      [IMAGE_TO_LATENTS]: {
        type: 'i2l',
        id: IMAGE_TO_LATENTS,
        is_intermediate: true,
        fp32: vaePrecision === 'fp32' ? true : false,
-        // must be set manually later, bc `fit` parameter may require a resize node inserted
-        // image: {
-        //   image_name: initialImage.image_name,
-        // },
      },
      [SDXL_DENOISE_LATENTS]: {
        type: 'denoise_latents',
@ -132,12 +139,6 @@ export const buildCanvasSDXLImageToImageGraph = (
          : 1 - strength,
        denoising_end: shouldUseSDXLRefiner ? refinerStart : 1,
      },
-      [CANVAS_OUTPUT]: {
-        type: 'l2i',
-        id: CANVAS_OUTPUT,
-        is_intermediate: !shouldAutoSave,
-        fp32: vaePrecision === 'fp32' ? true : false,
-      },
    },
    edges: [
      // Connect Model Loader To UNet & CLIP
@ -232,82 +233,84 @@ export const buildCanvasSDXLImageToImageGraph = (
          field: 'latents',
        },
      },
-      // Decode denoised latents to an image
+    ],
+  };
+
+  // Decode Latents To Image & Handle Scaled Before Processing
+  if (isUsingScaledDimensions) {
+    graph.nodes[IMG2IMG_RESIZE] = {
+      id: IMG2IMG_RESIZE,
+      type: 'img_resize',
+      is_intermediate: true,
+      image: initialImage,
+      width: scaledBoundingBoxDimensions.width,
+      height: scaledBoundingBoxDimensions.height,
+    };
+    graph.nodes[LATENTS_TO_IMAGE] = {
+      id: LATENTS_TO_IMAGE,
+      type: 'l2i',
+      is_intermediate: true,
+      fp32: vaePrecision === 'fp32' ? true : false,
+    };
+    graph.nodes[CANVAS_OUTPUT] = {
+      id: CANVAS_OUTPUT,
+      type: 'img_resize',
+      is_intermediate: !shouldAutoSave,
+      width: width,
+      height: height,
+    };
+
+    graph.edges.push(
+      {
+        source: {
+          node_id: IMG2IMG_RESIZE,
+          field: 'image',
+        },
+        destination: {
+          node_id: IMAGE_TO_LATENTS,
+          field: 'image',
+        },
+      },
      {
        source: {
          node_id: SDXL_DENOISE_LATENTS,
          field: 'latents',
        },
        destination: {
-          node_id: CANVAS_OUTPUT,
+          node_id: LATENTS_TO_IMAGE,
          field: 'latents',
        },
      },
-    ],
-  };
-
-  // handle `fit`
-  if (initialImage.width !== width || initialImage.height !== height) {
-    // The init image needs to be resized to the specified width and height before being passed to `IMAGE_TO_LATENTS`
-
-    // Create a resize node, explicitly setting its image
-    const resizeNode: ImageResizeInvocation = {
-      id: RESIZE,
-      type: 'img_resize',
-      image: {
-        image_name: initialImage.image_name,
-      },
-      is_intermediate: true,
-      width,
-      height,
-    };
-
-    graph.nodes[RESIZE] = resizeNode;
-
-    // The `RESIZE` node then passes its image to `IMAGE_TO_LATENTS`
-    graph.edges.push({
-      source: { node_id: RESIZE, field: 'image' },
-      destination: {
-        node_id: IMAGE_TO_LATENTS,
-        field: 'image',
-      },
-    });
-
-    // The `RESIZE` node also passes its width and height to `NOISE`
-    graph.edges.push({
-      source: { node_id: RESIZE, field: 'width' },
-      destination: {
-        node_id: NOISE,
-        field: 'width',
-      },
-    });
-
-    graph.edges.push({
-      source: { node_id: RESIZE, field: 'height' },
-      destination: {
-        node_id: NOISE,
-        field: 'height',
-      },
-    });
+      {
+        source: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'image',
+        },
+        destination: {
+          node_id: CANVAS_OUTPUT,
+          field: 'image',
+        },
+      }
+    );
  } else {
-    // We are not resizing, so we need to set the image on the `IMAGE_TO_LATENTS` node explicitly
-    (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image = {
-      image_name: initialImage.image_name,
+    graph.nodes[CANVAS_OUTPUT] = {
+      type: 'l2i',
+      id: CANVAS_OUTPUT,
+      is_intermediate: !shouldAutoSave,
+      fp32: vaePrecision === 'fp32' ? true : false,
    };

-    // Pass the image's dimensions to the `NOISE` node
+    (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image =
+      initialImage;
+
    graph.edges.push({
-      source: { node_id: IMAGE_TO_LATENTS, field: 'width' },
-      destination: {
-        node_id: NOISE,
-        field: 'width',
+      source: {
+        node_id: SDXL_DENOISE_LATENTS,
+        field: 'latents',
      },
-    });
-    graph.edges.push({
-      source: { node_id: IMAGE_TO_LATENTS, field: 'height' },
      destination: {
-        node_id: NOISE,
-        field: 'height',
+        node_id: CANVAS_OUTPUT,
+        field: 'latents',
      },
    });
  }
@ -318,8 +321,10 @@ export const buildCanvasSDXLImageToImageGraph = (
    type: 'metadata_accumulator',
    generation_mode: 'img2img',
    cfg_scale,
-    height,
-    width,
+    width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
+    height: !isUsingScaledDimensions
+      ? height
+      : scaledBoundingBoxDimensions.height,
    positive_prompt: '', // set in addDynamicPromptsToGraph
    negative_prompt: negativePrompt,
    model,
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLInpaintGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLInpaintGraph.ts
@ -2,6 +2,7 @@ import { logger } from 'app/logging/logger';
 import { RootState } from 'app/store/store';
 import { NonNullableGraph } from 'features/nodes/types/types';
 import {
+  CreateDenoiseMaskInvocation,
  ImageBlurInvocation,
  ImageDTO,
  ImageToLatentsInvocation,
@ -16,10 +17,11 @@ import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
 import { addVAEToGraph } from './addVAEToGraph';
 import { addWatermarkerToGraph } from './addWatermarkerToGraph';
 import {
-  CANVAS_OUTPUT,
  CANVAS_COHERENCE_DENOISE_LATENTS,
  CANVAS_COHERENCE_NOISE,
  CANVAS_COHERENCE_NOISE_INCREMENT,
+  CANVAS_OUTPUT,
+  INPAINT_CREATE_MASK,
  INPAINT_IMAGE,
  INPAINT_IMAGE_RESIZE_DOWN,
  INPAINT_IMAGE_RESIZE_UP,
@ -136,6 +138,12 @@ export const buildCanvasSDXLInpaintGraph = (
        use_cpu,
        is_intermediate: true,
      },
+      [INPAINT_CREATE_MASK]: {
+        type: 'create_denoise_mask',
+        id: INPAINT_CREATE_MASK,
+        is_intermediate: true,
+        fp32: vaePrecision === 'fp32' ? true : false,
+      },
      [SDXL_DENOISE_LATENTS]: {
        type: 'denoise_latents',
        id: SDXL_DENOISE_LATENTS,
@ -290,16 +298,27 @@ export const buildCanvasSDXLInpaintGraph = (
          field: 'latents',
        },
      },
+      // Create Inpaint Mask
      {
        source: {
          node_id: MASK_BLUR,
          field: 'image',
        },
        destination: {
-          node_id: SDXL_DENOISE_LATENTS,
+          node_id: INPAINT_CREATE_MASK,
          field: 'mask',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'denoise_mask',
+        },
+        destination: {
+          node_id: SDXL_DENOISE_LATENTS,
+          field: 'denoise_mask',
+        },
+      },
      // Iterate
      {
        source: {
@ -473,6 +492,16 @@ export const buildCanvasSDXLInpaintGraph = (
          field: 'image',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_IMAGE_RESIZE_UP,
+          field: 'image',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'image',
+        },
+      },
      // Color Correct The Inpainted Result
      {
        source: {
@ -530,6 +559,10 @@ export const buildCanvasSDXLInpaintGraph = (
      ...(graph.nodes[MASK_BLUR] as ImageBlurInvocation),
      image: canvasMaskImage,
    };
+    graph.nodes[INPAINT_CREATE_MASK] = {
+      ...(graph.nodes[INPAINT_CREATE_MASK] as CreateDenoiseMaskInvocation),
+      image: canvasInitImage,
+    };

    graph.edges.push(
      // Color Correct The Inpainted Result
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLOutpaintGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLOutpaintGraph.ts
@ -18,10 +18,11 @@ import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
 import { addVAEToGraph } from './addVAEToGraph';
 import { addWatermarkerToGraph } from './addWatermarkerToGraph';
 import {
-  CANVAS_OUTPUT,
  CANVAS_COHERENCE_DENOISE_LATENTS,
  CANVAS_COHERENCE_NOISE,
  CANVAS_COHERENCE_NOISE_INCREMENT,
+  CANVAS_OUTPUT,
+  INPAINT_CREATE_MASK,
  INPAINT_IMAGE,
  INPAINT_IMAGE_RESIZE_DOWN,
  INPAINT_IMAGE_RESIZE_UP,
@ -156,6 +157,12 @@ export const buildCanvasSDXLOutpaintGraph = (
        use_cpu,
        is_intermediate: true,
      },
+      [INPAINT_CREATE_MASK]: {
+        type: 'create_denoise_mask',
+        id: INPAINT_CREATE_MASK,
+        is_intermediate: true,
+        fp32: vaePrecision === 'fp32' ? true : false,
+      },
      [SDXL_DENOISE_LATENTS]: {
        type: 'denoise_latents',
        id: SDXL_DENOISE_LATENTS,
@ -331,16 +338,27 @@ export const buildCanvasSDXLOutpaintGraph = (
          field: 'latents',
        },
      },
+      // Create Inpaint Mask
      {
        source: {
          node_id: MASK_BLUR,
          field: 'image',
        },
        destination: {
-          node_id: SDXL_DENOISE_LATENTS,
+          node_id: INPAINT_CREATE_MASK,
          field: 'mask',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'denoise_mask',
+        },
+        destination: {
+          node_id: SDXL_DENOISE_LATENTS,
+          field: 'denoise_mask',
+        },
+      },
      // Iterate
      {
        source: {
@ -537,6 +555,16 @@ export const buildCanvasSDXLOutpaintGraph = (
          field: 'image',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_INFILL,
+          field: 'image',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'image',
+        },
+      },
      // Take combined mask and resize and then blur
      {
        source: {
@ -655,6 +683,16 @@ export const buildCanvasSDXLOutpaintGraph = (
          field: 'image',
        },
      },
+      {
+        source: {
+          node_id: INPAINT_INFILL,
+          field: 'image',
+        },
+        destination: {
+          node_id: INPAINT_CREATE_MASK,
+          field: 'image',
+        },
+      },
      // Color Correct The Inpainted Result
      {
        source: {
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLTextToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasSDXLTextToImageGraph.ts
@ -15,6 +15,7 @@ import { addVAEToGraph } from './addVAEToGraph';
 import { addWatermarkerToGraph } from './addWatermarkerToGraph';
 import {
  CANVAS_OUTPUT,
+  LATENTS_TO_IMAGE,
  METADATA_ACCUMULATOR,
  NEGATIVE_CONDITIONING,
  NOISE,
@ -49,7 +50,15 @@ export const buildCanvasSDXLTextToImageGraph = (
  // The bounding box determines width and height, not the width and height params
  const { width, height } = state.canvas.boundingBoxDimensions;

-  const { shouldAutoSave } = state.canvas;
+  const {
+    scaledBoundingBoxDimensions,
+    boundingBoxScaleMethod,
+    shouldAutoSave,
+  } = state.canvas;
+
+  const isUsingScaledDimensions = ['auto', 'manual'].includes(
+    boundingBoxScaleMethod
+  );

  const { shouldUseSDXLRefiner, refinerStart, shouldConcatSDXLStylePrompt } =
    state.sdxl;
@ -136,17 +145,15 @@ export const buildCanvasSDXLTextToImageGraph = (
        type: 'noise',
        id: NOISE,
        is_intermediate: true,
-        width,
-        height,
+        width: !isUsingScaledDimensions
+          ? width
+          : scaledBoundingBoxDimensions.width,
+        height: !isUsingScaledDimensions
+          ? height
+          : scaledBoundingBoxDimensions.height,
        use_cpu,
      },
      [t2lNode.id]: t2lNode,
-      [CANVAS_OUTPUT]: {
-        type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
-        id: CANVAS_OUTPUT,
-        is_intermediate: !shouldAutoSave,
-        fp32: vaePrecision === 'fp32' ? true : false,
-      },
    },
    edges: [
      // Connect Model Loader to UNet and CLIP
@ -231,19 +238,67 @@ export const buildCanvasSDXLTextToImageGraph = (
          field: 'noise',
        },
      },
-      // Decode Denoised Latents To Image
+    ],
+  };
+
+  // Decode Latents To Image & Handle Scaled Before Processing
+  if (isUsingScaledDimensions) {
+    graph.nodes[LATENTS_TO_IMAGE] = {
+      id: LATENTS_TO_IMAGE,
+      type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
+      is_intermediate: true,
+      fp32: vaePrecision === 'fp32' ? true : false,
+    };
+
+    graph.nodes[CANVAS_OUTPUT] = {
+      id: CANVAS_OUTPUT,
+      type: 'img_resize',
+      is_intermediate: !shouldAutoSave,
+      width: width,
+      height: height,
+    };
+
+    graph.edges.push(
      {
        source: {
          node_id: SDXL_DENOISE_LATENTS,
          field: 'latents',
        },
        destination: {
-          node_id: CANVAS_OUTPUT,
+          node_id: LATENTS_TO_IMAGE,
          field: 'latents',
        },
      },
-    ],
-  };
+      {
+        source: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'image',
+        },
+        destination: {
+          node_id: CANVAS_OUTPUT,
+          field: 'image',
+        },
+      }
+    );
+  } else {
+    graph.nodes[CANVAS_OUTPUT] = {
+      type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
+      id: CANVAS_OUTPUT,
+      is_intermediate: !shouldAutoSave,
+      fp32: vaePrecision === 'fp32' ? true : false,
+    };
+
+    graph.edges.push({
+      source: {
+        node_id: SDXL_DENOISE_LATENTS,
+        field: 'latents',
+      },
+      destination: {
+        node_id: CANVAS_OUTPUT,
+        field: 'latents',
+      },
+    });
+  }

  // add metadata accumulator, which is only mostly populated - some fields are added later
  graph.nodes[METADATA_ACCUMULATOR] = {
@ -251,8 +306,10 @@ export const buildCanvasSDXLTextToImageGraph = (
    type: 'metadata_accumulator',
    generation_mode: 'txt2img',
    cfg_scale,
-    height,
-    width,
+    width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
+    height: !isUsingScaledDimensions
+      ? height
+      : scaledBoundingBoxDimensions.height,
    positive_prompt: '', // set in addDynamicPromptsToGraph
    negative_prompt: negativePrompt,
    model,
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasTextToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildCanvasTextToImageGraph.ts
@ -17,6 +17,7 @@ import {
  CANVAS_TEXT_TO_IMAGE_GRAPH,
  CLIP_SKIP,
  DENOISE_LATENTS,
+  LATENTS_TO_IMAGE,
  MAIN_MODEL_LOADER,
  METADATA_ACCUMULATOR,
  NEGATIVE_CONDITIONING,
@ -39,6 +40,7 @@ export const buildCanvasTextToImageGraph = (
    cfgScale: cfg_scale,
    scheduler,
    steps,
+    vaePrecision,
    clipSkip,
    shouldUseCpuNoise,
    shouldUseNoiseSettings,
@ -47,7 +49,15 @@ export const buildCanvasTextToImageGraph = (
  // The bounding box determines width and height, not the width and height params
  const { width, height } = state.canvas.boundingBoxDimensions;

-  const { shouldAutoSave } = state.canvas;
+  const {
+    scaledBoundingBoxDimensions,
+    boundingBoxScaleMethod,
+    shouldAutoSave,
+  } = state.canvas;
+
+  const isUsingScaledDimensions = ['auto', 'manual'].includes(
+    boundingBoxScaleMethod
+  );

  if (!model) {
    log.error('No model found in state');
@ -131,16 +141,15 @@ export const buildCanvasTextToImageGraph = (
        type: 'noise',
        id: NOISE,
        is_intermediate: true,
-        width,
-        height,
+        width: !isUsingScaledDimensions
+          ? width
+          : scaledBoundingBoxDimensions.width,
+        height: !isUsingScaledDimensions
+          ? height
+          : scaledBoundingBoxDimensions.height,
        use_cpu,
      },
      [t2lNode.id]: t2lNode,
-      [CANVAS_OUTPUT]: {
-        type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
-        id: CANVAS_OUTPUT,
-        is_intermediate: !shouldAutoSave,
-      },
    },
    edges: [
      // Connect Model Loader to UNet & CLIP Skip
@ -216,19 +225,67 @@ export const buildCanvasTextToImageGraph = (
          field: 'noise',
        },
      },
-      // Decode denoised latents to image
+    ],
+  };
+
+  // Decode Latents To Image & Handle Scaled Before Processing
+  if (isUsingScaledDimensions) {
+    graph.nodes[LATENTS_TO_IMAGE] = {
+      id: LATENTS_TO_IMAGE,
+      type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
+      is_intermediate: true,
+      fp32: vaePrecision === 'fp32' ? true : false,
+    };
+
+    graph.nodes[CANVAS_OUTPUT] = {
+      id: CANVAS_OUTPUT,
+      type: 'img_resize',
+      is_intermediate: !shouldAutoSave,
+      width: width,
+      height: height,
+    };
+
+    graph.edges.push(
      {
        source: {
          node_id: DENOISE_LATENTS,
          field: 'latents',
        },
        destination: {
-          node_id: CANVAS_OUTPUT,
+          node_id: LATENTS_TO_IMAGE,
          field: 'latents',
        },
      },
-    ],
-  };
+      {
+        source: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'image',
+        },
+        destination: {
+          node_id: CANVAS_OUTPUT,
+          field: 'image',
+        },
+      }
+    );
+  } else {
+    graph.nodes[CANVAS_OUTPUT] = {
+      type: isUsingOnnxModel ? 'l2i_onnx' : 'l2i',
+      id: CANVAS_OUTPUT,
+      is_intermediate: !shouldAutoSave,
+      fp32: vaePrecision === 'fp32' ? true : false,
+    };
+
+    graph.edges.push({
+      source: {
+        node_id: DENOISE_LATENTS,
+        field: 'latents',
+      },
+      destination: {
+        node_id: CANVAS_OUTPUT,
+        field: 'latents',
+      },
+    });
+  }

  // add metadata accumulator, which is only mostly populated - some fields are added later
  graph.nodes[METADATA_ACCUMULATOR] = {
@ -236,8 +293,10 @@ export const buildCanvasTextToImageGraph = (
    type: 'metadata_accumulator',
    generation_mode: 'txt2img',
    cfg_scale,
-    height,
-    width,
+    width: !isUsingScaledDimensions ? width : scaledBoundingBoxDimensions.width,
+    height: !isUsingScaledDimensions
+      ? height
+      : scaledBoundingBoxDimensions.height,
    positive_prompt: '', // set in addDynamicPromptsToGraph
    negative_prompt: negativePrompt,
    model,
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts
@ -17,6 +17,7 @@ export const CLIP_SKIP = 'clip_skip';
 export const IMAGE_TO_LATENTS = 'image_to_latents';
 export const LATENTS_TO_LATENTS = 'latents_to_latents';
 export const RESIZE = 'resize_image';
+export const IMG2IMG_RESIZE = 'img2img_resize';
 export const CANVAS_OUTPUT = 'canvas_output';
 export const INPAINT_IMAGE = 'inpaint_image';
 export const SCALED_INPAINT_IMAGE = 'scaled_inpaint_image';
@ -25,6 +26,7 @@ export const INPAINT_IMAGE_RESIZE_DOWN = 'inpaint_image_resize_down';
 export const INPAINT_INFILL = 'inpaint_infill';
 export const INPAINT_INFILL_RESIZE_DOWN = 'inpaint_infill_resize_down';
 export const INPAINT_FINAL_IMAGE = 'inpaint_final_image';
+export const INPAINT_CREATE_MASK = 'inpaint_create_mask';
 export const CANVAS_COHERENCE_DENOISE_LATENTS =
  'canvas_coherence_denoise_latents';
 export const CANVAS_COHERENCE_NOISE = 'canvas_coherence_noise';
--- a/invokeai/frontend/web/src/services/api/schema.d.ts
+++ b/invokeai/frontend/web/src/services/api/schema.d.ts
--- a/invokeai/frontend/web/src/services/api/types.ts
+++ b/invokeai/frontend/web/src/services/api/types.ts
@ -111,6 +111,7 @@ export type ImageBlurInvocation = s['ImageBlurInvocation'];
 export type ImageScaleInvocation = s['ImageScaleInvocation'];
 export type InfillPatchMatchInvocation = s['InfillPatchMatchInvocation'];
 export type InfillTileInvocation = s['InfillTileInvocation'];
+export type CreateDenoiseMaskInvocation = s['CreateDenoiseMaskInvocation'];
 export type RandomIntInvocation = s['RandomIntInvocation'];
 export type CompelInvocation = s['CompelInvocation'];
 export type DynamicPromptInvocation = s['DynamicPromptInvocation'];