Add MaskEdge and ColorCorrect nodes

Co-Authored-By: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com>
2024-08-30 20:32:17 +00:00 · 2023-08-08 23:57:02 +03:00
parent f7aec3b934
commit b4a74f6523
1 changed files with 170 additions and 3 deletions
--- a/invokeai/app/invocations/image.py
+++ b/invokeai/app/invocations/image.py
@ -2,6 +2,7 @@
 from typing import Literal, Optional
 import cv2
 import numpy
 from PIL import Image, ImageFilter, ImageOps, ImageChops
 from pydantic import Field
@ -142,9 +143,10 @@ class ImagePasteInvocation(BaseInvocation, PILInvocationConfig):
    def invoke(self, context: InvocationContext) -> ImageOutput:
        base_image = context.services.images.get_pil_image(self.base_image.image_name)
        image = context.services.images.get_pil_image(self.image.image_name)
-        mask = (
+        mask = None
-            None if self.mask is None else ImageOps.invert(context.services.images.get_pil_image(self.mask.image_name))
+        if self.mask is not None:
-        )
+            mask = context.services.images.get_pil_image(self.mask.image_name)
            mask = ImageOps.invert(mask.convert("L"))
        # TODO: probably shouldn't invert mask here... should user be required to do it?
        min_x = min(0, self.x)
@ -650,3 +652,168 @@ class ImageWatermarkInvocation(BaseInvocation, PILInvocationConfig):
            width=image_dto.width,
            height=image_dto.height,
        )
 class MaskEdgeInvocation(BaseInvocation, PILInvocationConfig):
    """Applies an edge mask to an image"""
    # fmt: off
    type: Literal["mask_edge"] = "mask_edge"
    # Inputs
    image: Optional[ImageField] = Field(default=None, description="The image to apply the mask to")
    edge_size: int = Field(description="The size of the edge")
    edge_blur: int = Field(description="The amount of blur on the edge")
    low_threshold: int = Field(description="First threshold for the hysteresis procedure in Canny edge detection")
    high_threshold: int = Field(description="Second threshold for the hysteresis procedure in Canny edge detection")
    # fmt: on
    def invoke(self, context: InvocationContext) -> MaskOutput:
        mask = context.services.images.get_pil_image(self.image.image_name)
        npimg = numpy.asarray(mask, dtype=numpy.uint8)
        npgradient = numpy.uint8(
            255 * (1.0 - numpy.floor(numpy.abs(0.5 - numpy.float32(npimg) / 255.0) * 2.0))
        )
        npedge = cv2.Canny(npimg, threshold1=self.low_threshold, threshold2=self.high_threshold)
        npmask = npgradient + npedge
        npmask = cv2.dilate(
            npmask, numpy.ones((3, 3), numpy.uint8), iterations=int(self.edge_size / 2)
        )
        new_mask = Image.fromarray(npmask)
        if self.edge_blur > 0:
            new_mask = new_mask.filter(ImageFilter.BoxBlur(self.edge_blur))
        new_mask = ImageOps.invert(new_mask)
        image_dto = context.services.images.create(
            image=new_mask,
            image_origin=ResourceOrigin.INTERNAL,
            image_category=ImageCategory.MASK,
            node_id=self.id,
            session_id=context.graph_execution_state_id,
            is_intermediate=self.is_intermediate,
        )
        return MaskOutput(
            mask=ImageField(image_name=image_dto.image_name),
            width=image_dto.width,
            height=image_dto.height,
        )
 class ColorCorrectInvocation(BaseInvocation, PILInvocationConfig):
    type: Literal["color_correct"] = "color_correct"
    init: Optional[ImageField] = Field(default=None, description="Initial image")
    result: Optional[ImageField] = Field(default=None, description="Resulted image")
    mask: Optional[ImageField] = Field(default=None, description="Mask image")
    mask_blur_radius: float = Field(default=8, description="Mask blur radius")
    def invoke(self, context: InvocationContext) -> ImageOutput:
        pil_init_mask = None
        if self.mask is not None:
            pil_init_mask = context.services.images.get_pil_image(
                self.mask.image_name
            ).convert("L")
        init_image = context.services.images.get_pil_image(
            self.init.image_name
        )
        result = context.services.images.get_pil_image(
            self.result.image_name
        ).convert("RGBA")
        #if init_image is None or init_mask is None:
        #    return result
        # Get the original alpha channel of the mask if there is one.
        # Otherwise it is some other black/white image format ('1', 'L' or 'RGB')
        #pil_init_mask = (
        #    init_mask.getchannel("A")
        #    if init_mask.mode == "RGBA"
        #    else init_mask.convert("L")
        #)
        pil_init_image = init_image.convert(
            "RGBA"
        )  # Add an alpha channel if one doesn't exist
        # Build an image with only visible pixels from source to use as reference for color-matching.
        init_rgb_pixels = numpy.asarray(init_image.convert("RGB"), dtype=numpy.uint8)
        init_a_pixels = numpy.asarray(pil_init_image.getchannel("A"), dtype=numpy.uint8)
        init_mask_pixels = numpy.asarray(pil_init_mask, dtype=numpy.uint8)
        # Get numpy version of result
        np_image = numpy.asarray(result.convert("RGB"), dtype=numpy.uint8)
        # Mask and calculate mean and standard deviation
        mask_pixels = init_a_pixels * init_mask_pixels > 0
        np_init_rgb_pixels_masked = init_rgb_pixels[mask_pixels, :]
        np_image_masked = np_image[mask_pixels, :]
        if np_init_rgb_pixels_masked.size > 0:
            init_means = np_init_rgb_pixels_masked.mean(axis=0)
            init_std = np_init_rgb_pixels_masked.std(axis=0)
            gen_means = np_image_masked.mean(axis=0)
            gen_std = np_image_masked.std(axis=0)
            # Color correct
            np_matched_result = np_image.copy()
            np_matched_result[:, :, :] = (
                (
                    (
                        (
                            np_matched_result[:, :, :].astype(numpy.float32)
                            - gen_means[None, None, :]
                        )
                        / gen_std[None, None, :]
                    )
                    * init_std[None, None, :]
                    + init_means[None, None, :]
                )
                .clip(0, 255)
                .astype(numpy.uint8)
            )
            matched_result = Image.fromarray(np_matched_result, mode="RGB")
        else:
            matched_result = Image.fromarray(np_image, mode="RGB")
        # Blur the mask out (into init image) by specified amount
        if self.mask_blur_radius > 0:
            nm = numpy.asarray(pil_init_mask, dtype=numpy.uint8)
            nmd = cv2.erode(
                nm,
                kernel=numpy.ones((3, 3), dtype=numpy.uint8),
                iterations=int(self.mask_blur_radius / 2),
            )
            pmd = Image.fromarray(nmd, mode="L")
            blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(self.mask_blur_radius))
        else:
            blurred_init_mask = pil_init_mask
        multiplied_blurred_init_mask = ImageChops.multiply(
            blurred_init_mask, result.split()[-1]
        )
        # Paste original on color-corrected generation (using blurred mask)
        matched_result.paste(init_image, (0, 0), mask=multiplied_blurred_init_mask)
        image_dto = context.services.images.create(
            image=matched_result,
            image_origin=ResourceOrigin.INTERNAL,
            image_category=ImageCategory.GENERAL,
            node_id=self.id,
            session_id=context.graph_execution_state_id,
            is_intermediate=self.is_intermediate,
        )
        return ImageOutput(
            image=ImageField(image_name=image_dto.image_name),
            width=image_dto.width,
            height=image_dto.height,
        )