InvokeAI/invokeai/backend/stable_diffusion/extensions/inpaint_model.py

from __future__ import annotations

from typing import TYPE_CHECKING, Optional

import torch
from diffusers import UNet2DConditionModel

from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback

if TYPE_CHECKING:
    from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext


class InpaintModelExt(ExtensionBase):
    """An extension for inpainting with inpainting models. See `InpaintExt` for inpainting with non-inpainting
    models.
    """

    def __init__(
        self,
        mask: Optional[torch.Tensor],
        masked_latents: Optional[torch.Tensor],
        is_gradient_mask: bool,
    ):
        """Initialize InpaintModelExt.
        Args:
            mask (Optional[torch.Tensor]): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
                inpainted.
            masked_latents (Optional[torch.Tensor]): Latents of initial image, with masked out by black color inpainted area.
                If mask provided, then too should be provided. Shape: (1, 1, latent_height, latent_width)
            is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
                from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
                1.
        """
        super().__init__()
        if mask is not None and masked_latents is None:
            raise ValueError("Source image required for inpaint mask when inpaint model used!")

        # Inverse mask, because inpaint models treat mask as: 0 - remain same, 1 - inpaint
        self._mask = None
        if mask is not None:
            self._mask = 1 - mask
        self._masked_latents = masked_latents
        self._is_gradient_mask = is_gradient_mask

    @staticmethod
    def _is_inpaint_model(unet: UNet2DConditionModel):
        """Checks if the provided UNet belongs to a regular model.
        The `in_channels` of a UNet vary depending on model type:
        - normal - 4
        - depth - 5
        - inpaint - 9
        """
        return unet.conv_in.in_channels == 9

    @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
    def init_tensors(self, ctx: DenoiseContext):
        if not self._is_inpaint_model(ctx.unet):
            raise ValueError("InpaintModelExt should be used only on inpaint models!")

        if self._mask is None:
            self._mask = torch.ones_like(ctx.latents[:1, :1])
        self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)

        if self._masked_latents is None:
            self._masked_latents = torch.zeros_like(ctx.latents[:1])
        self._masked_latents = self._masked_latents.to(device=ctx.latents.device, dtype=ctx.latents.dtype)

    # Do last so that other extensions works with normal latents
    @callback(ExtensionCallbackType.PRE_UNET, order=1000)
    def append_inpaint_layers(self, ctx: DenoiseContext):
        batch_size = ctx.unet_kwargs.sample.shape[0]
        b_mask = torch.cat([self._mask] * batch_size)
        b_masked_latents = torch.cat([self._masked_latents] * batch_size)
        ctx.unet_kwargs.sample = torch.cat(
            [ctx.unet_kwargs.sample, b_mask, b_masked_latents],
            dim=1,
        )

    # Restore unmasked part as inpaint model can change unmasked part slightly
    @callback(ExtensionCallbackType.POST_DENOISE_LOOP)
    def restore_unmasked(self, ctx: DenoiseContext):
        if self._is_gradient_mask:
            ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
        else:
            ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)
Handle inpaint models 2024-07-21 17:45:55 +00:00			`from __future__ import annotations`

			`from typing import TYPE_CHECKING, Optional`

			`import torch`
			`from diffusers import UNet2DConditionModel`

			`from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType`
			`from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback`

			`if TYPE_CHECKING:`
			`from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext`


			`class InpaintModelExt(ExtensionBase):`
Suggested changes Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-23 20:34:28 +00:00			"""An extension for inpainting with inpainting models. See `InpaintExt` for inpainting with non-inpainting
			`models.`
			`"""`
Ruff format 2024-07-23 22:17:28 +00:00
Handle inpaint models 2024-07-21 17:45:55 +00:00			`def __init__(`
			`self,`
			`mask: Optional[torch.Tensor],`
			`masked_latents: Optional[torch.Tensor],`
			`is_gradient_mask: bool,`
			`):`
Suggested changes Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-23 20:34:28 +00:00			`"""Initialize InpaintModelExt.`
			`Args:`
			`mask (Optional[torch.Tensor]): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are`
Use non-inverted mask generally(except inpaint model handling) 2024-07-23 21:59:13 +00:00			`expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be`
Suggested changes Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-23 20:34:28 +00:00			`inpainted.`
			`masked_latents (Optional[torch.Tensor]): Latents of initial image, with masked out by black color inpainted area.`
			`If mask provided, then too should be provided. Shape: (1, 1, latent_height, latent_width)`
			`is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range`
			`from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or`
			`1.`
			`"""`
Handle inpaint models 2024-07-21 17:45:55 +00:00			`super().__init__()`
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`if mask is not None and masked_latents is None:`
			`raise ValueError("Source image required for inpaint mask when inpaint model used!")`

Use non-inverted mask generally(except inpaint model handling) 2024-07-23 21:59:13 +00:00			`# Inverse mask, because inpaint models treat mask as: 0 - remain same, 1 - inpaint`
			`self._mask = None`
			`if mask is not None:`
			`self._mask = 1 - mask`
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`self._masked_latents = masked_latents`
			`self._is_gradient_mask = is_gradient_mask`
Handle inpaint models 2024-07-21 17:45:55 +00:00
			`@staticmethod`
			`def _is_inpaint_model(unet: UNet2DConditionModel):`
Ruff format 2024-07-23 22:17:28 +00:00			`"""Checks if the provided UNet belongs to a regular model.`
Suggested changes Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-23 20:34:28 +00:00			The `in_channels` of a UNet vary depending on model type:
			`- normal - 4`
			`- depth - 5`
			`- inpaint - 9`
			`"""`
Handle inpaint models 2024-07-21 17:45:55 +00:00			`return unet.conv_in.in_channels == 9`

			`@callback(ExtensionCallbackType.PRE_DENOISE_LOOP)`
			`def init_tensors(self, ctx: DenoiseContext):`
			`if not self._is_inpaint_model(ctx.unet):`
Suggested changes Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-23 20:34:28 +00:00			`raise ValueError("InpaintModelExt should be used only on inpaint models!")`
Handle inpaint models 2024-07-21 17:45:55 +00:00
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`if self._mask is None:`
			`self._mask = torch.ones_like(ctx.latents[:1, :1])`
			`self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)`
Handle inpaint models 2024-07-21 17:45:55 +00:00
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`if self._masked_latents is None:`
			`self._masked_latents = torch.zeros_like(ctx.latents[:1])`
			`self._masked_latents = self._masked_latents.to(device=ctx.latents.device, dtype=ctx.latents.dtype)`
Handle inpaint models 2024-07-21 17:45:55 +00:00
Revert wrong comment copy 2024-07-27 10:20:58 +00:00			`# Do last so that other extensions works with normal latents`
Handle inpaint models 2024-07-21 17:45:55 +00:00			`@callback(ExtensionCallbackType.PRE_UNET, order=1000)`
			`def append_inpaint_layers(self, ctx: DenoiseContext):`
			`batch_size = ctx.unet_kwargs.sample.shape[0]`
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`b_mask = torch.cat([self._mask] * batch_size)`
			`b_masked_latents = torch.cat([self._masked_latents] * batch_size)`
Handle inpaint models 2024-07-21 17:45:55 +00:00			`ctx.unet_kwargs.sample = torch.cat(`
			`[ctx.unet_kwargs.sample, b_mask, b_masked_latents],`
			`dim=1,`
			`)`

Suggested changes Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-27 10:18:28 +00:00			`# Restore unmasked part as inpaint model can change unmasked part slightly`
Handle inpaint models 2024-07-21 17:45:55 +00:00			`@callback(ExtensionCallbackType.POST_DENOISE_LOOP)`
			`def restore_unmasked(self, ctx: DenoiseContext):`
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`if self._is_gradient_mask:`
			`ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)`
Handle inpaint models 2024-07-21 17:45:55 +00:00			`else:`
Same changes as in other PRs, add check for running inpainting on inpaint model without source image Co-Authored-By: Ryan Dick <14897797+RyanJDick@users.noreply.github.com> 2024-07-22 20:47:39 +00:00			`ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)`