InvokeAI/invokeai/backend/image_util/lineart_anime.py

"""Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license)."""

import functools
from typing import Optional

import cv2
import numpy as np
import torch
import torch.nn as nn
from einops import rearrange
from huggingface_hub import hf_hub_download
from PIL import Image

from invokeai.backend.image_util.util import (
    normalize_image_channel_count,
    np_to_pil,
    pil_to_np,
    resize_image_to_resolution,
)


class UnetGenerator(nn.Module):
    """Create a Unet-based generator"""

    def __init__(
        self,
        input_nc: int,
        output_nc: int,
        num_downs: int,
        ngf: int = 64,
        norm_layer=nn.BatchNorm2d,
        use_dropout: bool = False,
    ):
        """Construct a Unet generator
        Parameters:
            input_nc (int)  -- the number of channels in input images
            output_nc (int) -- the number of channels in output images
            num_downs (int) -- the number of downsamplings in UNet. For example, # if |num_downs| == 7,
                                image of size 128x128 will become of size 1x1 # at the bottleneck
            ngf (int)       -- the number of filters in the last conv layer
            norm_layer      -- normalization layer
        We construct the U-Net from the innermost layer to the outermost layer.
        It is a recursive process.
        """
        super(UnetGenerator, self).__init__()
        # construct unet structure
        unet_block = UnetSkipConnectionBlock(
            ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True
        )  # add the innermost layer
        for _ in range(num_downs - 5):  # add intermediate layers with ngf * 8 filters
            unet_block = UnetSkipConnectionBlock(
                ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout
            )
        # gradually reduce the number of filters from ngf * 8 to ngf
        unet_block = UnetSkipConnectionBlock(
            ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer
        )
        unet_block = UnetSkipConnectionBlock(
            ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer
        )
        unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
        self.model = UnetSkipConnectionBlock(
            output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer
        )  # add the outermost layer

    def forward(self, input):
        """Standard forward"""
        return self.model(input)


class UnetSkipConnectionBlock(nn.Module):
    """Defines the Unet submodule with skip connection.
    X -------------------identity----------------------
    |-- downsampling -- |submodule| -- upsampling --|
    """

    def __init__(
        self,
        outer_nc: int,
        inner_nc: int,
        input_nc: Optional[int] = None,
        submodule=None,
        outermost: bool = False,
        innermost: bool = False,
        norm_layer=nn.BatchNorm2d,
        use_dropout: bool = False,
    ):
        """Construct a Unet submodule with skip connections.
        Parameters:
            outer_nc (int) -- the number of filters in the outer conv layer
            inner_nc (int) -- the number of filters in the inner conv layer
            input_nc (int) -- the number of channels in input images/features
            submodule (UnetSkipConnectionBlock) -- previously defined submodules
            outermost (bool)    -- if this module is the outermost module
            innermost (bool)    -- if this module is the innermost module
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers.
        """
        super(UnetSkipConnectionBlock, self).__init__()
        self.outermost = outermost
        if isinstance(norm_layer, functools.partial):
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d
        if input_nc is None:
            input_nc = outer_nc
        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, stride=2, padding=1, bias=use_bias)
        downrelu = nn.LeakyReLU(0.2, True)
        downnorm = norm_layer(inner_nc)
        uprelu = nn.ReLU(True)
        upnorm = norm_layer(outer_nc)

        if outermost:
            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1)
            down = [downconv]
            up = [uprelu, upconv, nn.Tanh()]
            model = down + [submodule] + up
        elif innermost:
            upconv = nn.ConvTranspose2d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias)
            down = [downrelu, downconv]
            up = [uprelu, upconv, upnorm]
            model = down + up
        else:
            upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias)
            down = [downrelu, downconv, downnorm]
            up = [uprelu, upconv, upnorm]

            if use_dropout:
                model = down + [submodule] + up + [nn.Dropout(0.5)]
            else:
                model = down + [submodule] + up

        self.model = nn.Sequential(*model)

    def forward(self, x):
        if self.outermost:
            return self.model(x)
        else:  # add skip connections
            return torch.cat([x, self.model(x)], 1)


class LineartAnimeProcessor:
    """Processes an image to detect lineart."""

    def __init__(self):
        model_path = hf_hub_download("lllyasviel/Annotators", "netG.pth")
        norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
        self.model = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False)
        ckpt = torch.load(model_path)
        for key in list(ckpt.keys()):
            if "module." in key:
                ckpt[key.replace("module.", "")] = ckpt[key]
                del ckpt[key]
        self.model.load_state_dict(ckpt)
        self.model.eval()

    def to(self, device: torch.device):
        self.model.to(device)
        return self

    def run(self, input_image: Image.Image, detect_resolution: int = 512, image_resolution: int = 512) -> Image.Image:
        """Processes an image to detect lineart.

        Args:
            input_image: The input image.
            detect_resolution: The resolution to use for detection.
            image_resolution: The resolution to use for the output image.

        Returns:
            The detected lineart.
        """
        device = next(iter(self.model.parameters())).device
        np_image = pil_to_np(input_image)

        np_image = normalize_image_channel_count(np_image)
        np_image = resize_image_to_resolution(np_image, detect_resolution)

        H, W, C = np_image.shape
        Hn = 256 * int(np.ceil(float(H) / 256.0))
        Wn = 256 * int(np.ceil(float(W) / 256.0))
        img = cv2.resize(np_image, (Wn, Hn), interpolation=cv2.INTER_CUBIC)
        with torch.no_grad():
            image_feed = torch.from_numpy(img).float().to(device)
            image_feed = image_feed / 127.5 - 1.0
            image_feed = rearrange(image_feed, "h w c -> 1 c h w")

            line = self.model(image_feed)[0, 0] * 127.5 + 127.5
            line = line.cpu().numpy()

            line = cv2.resize(line, (W, H), interpolation=cv2.INTER_CUBIC)
            line = line.clip(0, 255).astype(np.uint8)

        detected_map = line

        detected_map = normalize_image_channel_count(detected_map)

        img = resize_image_to_resolution(np_image, image_resolution)
        H, W, C = img.shape

        detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
        detected_map = 255 - detected_map

        return np_to_pil(detected_map)
feat: adaptation of Lineart Anime processor Adapted from https://github.com/huggingface/controlnet_aux 2024-03-21 09:06:14 +00:00			`"""Adapted from https://github.com/huggingface/controlnet_aux (Apache-2.0 license)."""`

			`import functools`
			`from typing import Optional`

			`import cv2`
			`import numpy as np`
			`import torch`
			`import torch.nn as nn`
			`from einops import rearrange`
			`from huggingface_hub import hf_hub_download`
			`from PIL import Image`

			`from invokeai.backend.image_util.util import (`
			`normalize_image_channel_count,`
			`np_to_pil,`
			`pil_to_np,`
tidy: "fit_image_to_resolution" -> "resize_image_to_resolution" 2024-03-21 12:00:29 +00:00			`resize_image_to_resolution,`
feat: adaptation of Lineart Anime processor Adapted from https://github.com/huggingface/controlnet_aux 2024-03-21 09:06:14 +00:00			`)`


			`class UnetGenerator(nn.Module):`
			`"""Create a Unet-based generator"""`

			`def __init__(`
			`self,`
			`input_nc: int,`
			`output_nc: int,`
			`num_downs: int,`
			`ngf: int = 64,`
			`norm_layer=nn.BatchNorm2d,`
			`use_dropout: bool = False,`
			`):`
			`"""Construct a Unet generator`
			`Parameters:`
			`input_nc (int) -- the number of channels in input images`
			`output_nc (int) -- the number of channels in output images`
			`num_downs (int) -- the number of downsamplings in UNet. For example, # if \|num_downs\| == 7,`
			`image of size 128x128 will become of size 1x1 # at the bottleneck`
			`ngf (int) -- the number of filters in the last conv layer`
			`norm_layer -- normalization layer`
			`We construct the U-Net from the innermost layer to the outermost layer.`
			`It is a recursive process.`
			`"""`
			`super(UnetGenerator, self).__init__()`
			`# construct unet structure`
			`unet_block = UnetSkipConnectionBlock(`
			`ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True`
			`) # add the innermost layer`
			`for _ in range(num_downs - 5): # add intermediate layers with ngf * 8 filters`
			`unet_block = UnetSkipConnectionBlock(`
			`ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout`
			`)`
			`# gradually reduce the number of filters from ngf * 8 to ngf`
			`unet_block = UnetSkipConnectionBlock(`
			`ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer`
			`)`
			`unet_block = UnetSkipConnectionBlock(`
			`ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer`
			`)`
			`unet_block = UnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)`
			`self.model = UnetSkipConnectionBlock(`
			`output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer`
			`) # add the outermost layer`

			`def forward(self, input):`
			`"""Standard forward"""`
			`return self.model(input)`


			`class UnetSkipConnectionBlock(nn.Module):`
			`"""Defines the Unet submodule with skip connection.`
			`X -------------------identity----------------------`
			`\|-- downsampling -- \|submodule\| -- upsampling --\|`
			`"""`

			`def __init__(`
			`self,`
			`outer_nc: int,`
			`inner_nc: int,`
			`input_nc: Optional[int] = None,`
			`submodule=None,`
			`outermost: bool = False,`
			`innermost: bool = False,`
			`norm_layer=nn.BatchNorm2d,`
			`use_dropout: bool = False,`
			`):`
			`"""Construct a Unet submodule with skip connections.`
			`Parameters:`
			`outer_nc (int) -- the number of filters in the outer conv layer`
			`inner_nc (int) -- the number of filters in the inner conv layer`
			`input_nc (int) -- the number of channels in input images/features`
			`submodule (UnetSkipConnectionBlock) -- previously defined submodules`
			`outermost (bool) -- if this module is the outermost module`
			`innermost (bool) -- if this module is the innermost module`
			`norm_layer -- normalization layer`
			`use_dropout (bool) -- if use dropout layers.`
			`"""`
			`super(UnetSkipConnectionBlock, self).__init__()`
			`self.outermost = outermost`
chore: ruff Looks like an upstream change to ruff resulted in this file being a violation. 2024-07-15 03:02:08 +00:00			`if isinstance(norm_layer, functools.partial):`
feat: adaptation of Lineart Anime processor Adapted from https://github.com/huggingface/controlnet_aux 2024-03-21 09:06:14 +00:00			`use_bias = norm_layer.func == nn.InstanceNorm2d`
			`else:`
			`use_bias = norm_layer == nn.InstanceNorm2d`
			`if input_nc is None:`
			`input_nc = outer_nc`
			`downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4, stride=2, padding=1, bias=use_bias)`
			`downrelu = nn.LeakyReLU(0.2, True)`
			`downnorm = norm_layer(inner_nc)`
			`uprelu = nn.ReLU(True)`
			`upnorm = norm_layer(outer_nc)`

			`if outermost:`
			`upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1)`
			`down = [downconv]`
			`up = [uprelu, upconv, nn.Tanh()]`
			`model = down + [submodule] + up`
			`elif innermost:`
			`upconv = nn.ConvTranspose2d(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias)`
			`down = [downrelu, downconv]`
			`up = [uprelu, upconv, upnorm]`
			`model = down + up`
			`else:`
			`upconv = nn.ConvTranspose2d(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, bias=use_bias)`
			`down = [downrelu, downconv, downnorm]`
			`up = [uprelu, upconv, upnorm]`

			`if use_dropout:`
			`model = down + [submodule] + up + [nn.Dropout(0.5)]`
			`else:`
			`model = down + [submodule] + up`

			`self.model = nn.Sequential(*model)`

			`def forward(self, x):`
			`if self.outermost:`
			`return self.model(x)`
			`else: # add skip connections`
			`return torch.cat([x, self.model(x)], 1)`


			`class LineartAnimeProcessor:`
			`"""Processes an image to detect lineart."""`

			`def __init__(self):`
			`model_path = hf_hub_download("lllyasviel/Annotators", "netG.pth")`
			`norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)`
			`self.model = UnetGenerator(3, 1, 8, 64, norm_layer=norm_layer, use_dropout=False)`
			`ckpt = torch.load(model_path)`
			`for key in list(ckpt.keys()):`
			`if "module." in key:`
			`ckpt[key.replace("module.", "")] = ckpt[key]`
			`del ckpt[key]`
			`self.model.load_state_dict(ckpt)`
			`self.model.eval()`

			`def to(self, device: torch.device):`
			`self.model.to(device)`
			`return self`

			`def run(self, input_image: Image.Image, detect_resolution: int = 512, image_resolution: int = 512) -> Image.Image:`
			`"""Processes an image to detect lineart.`

			`Args:`
			`input_image: The input image.`
			`detect_resolution: The resolution to use for detection.`
			`image_resolution: The resolution to use for the output image.`

			`Returns:`
			`The detected lineart.`
			`"""`
			`device = next(iter(self.model.parameters())).device`
			`np_image = pil_to_np(input_image)`

			`np_image = normalize_image_channel_count(np_image)`
tidy: "fit_image_to_resolution" -> "resize_image_to_resolution" 2024-03-21 12:00:29 +00:00			`np_image = resize_image_to_resolution(np_image, detect_resolution)`
feat: adaptation of Lineart Anime processor Adapted from https://github.com/huggingface/controlnet_aux 2024-03-21 09:06:14 +00:00
			`H, W, C = np_image.shape`
			`Hn = 256 * int(np.ceil(float(H) / 256.0))`
			`Wn = 256 * int(np.ceil(float(W) / 256.0))`
			`img = cv2.resize(np_image, (Wn, Hn), interpolation=cv2.INTER_CUBIC)`
			`with torch.no_grad():`
			`image_feed = torch.from_numpy(img).float().to(device)`
			`image_feed = image_feed / 127.5 - 1.0`
			`image_feed = rearrange(image_feed, "h w c -> 1 c h w")`

			`line = self.model(image_feed)[0, 0] * 127.5 + 127.5`
			`line = line.cpu().numpy()`

			`line = cv2.resize(line, (W, H), interpolation=cv2.INTER_CUBIC)`
			`line = line.clip(0, 255).astype(np.uint8)`

			`detected_map = line`

			`detected_map = normalize_image_channel_count(detected_map)`

tidy: "fit_image_to_resolution" -> "resize_image_to_resolution" 2024-03-21 12:00:29 +00:00			`img = resize_image_to_resolution(np_image, image_resolution)`
feat: adaptation of Lineart Anime processor Adapted from https://github.com/huggingface/controlnet_aux 2024-03-21 09:06:14 +00:00			`H, W, C = img.shape`

			`detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)`
			`detected_map = 255 - detected_map`

			`return np_to_pil(detected_map)`