feat(nodes): redo tile infill

The previous algorithm errored if the image wasn't divisible by the tile size. I've reimplemented it from scratch to mitigate this issue. The new algorithm is simpler. We create a pool of tiles, then use them to create an image composed completely of tiles. If there is any awkwardly sized space on the edge of the image, the tiles are cropped to fit. Finally, paste the original image over the tile image. I've added a jupyter notebook to do a smoke test of infilling methods, and 10 test images. The other infill algorithms can be easily tested with the notebook on the same images, though I didn't set that up yet. Tested and confirmed this gives results just as good as the earlier infill, though of course they aren't the same due to the change in the algorithm.
2024-08-30 20:32:17 +00:00 · 2024-04-04 21:45:05 +11:00 · 2024-04-04 21:45:05 +11:00 · f0b1bb0327
commit f0b1bb0327
parent b061db414f
13 changed files with 207 additions and 63 deletions
--- a/invokeai/app/invocations/infill.py
+++ b/invokeai/app/invocations/infill.py
@ -95,9 +95,8 @@ class InfillTileInvocation(InfillImageProcessorInvocation):
    )

    def infill(self, image: Image.Image):
-        infilled = infill_tile(image, seed=self.seed, tile_size=self.tile_size)
-        infilled.paste(image, (0, 0), image.split()[-1])
-        return infilled
+        output = infill_tile(image, seed=self.seed, tile_size=self.tile_size)
+        return output.infilled


@invocation(
--- a/invokeai/backend/image_util/infill_methods/test_images/source1.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source1.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source10.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source10.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source2.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source2.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source3.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source3.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source4.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source4.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source5.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source5.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source6.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source6.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source7.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source7.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source8.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source8.webp
--- a/invokeai/backend/image_util/infill_methods/test_images/source9.webp
+++ b/invokeai/backend/image_util/infill_methods/test_images/source9.webp
--- a/invokeai/backend/image_util/infill_methods/tile.ipynb
+++ b/invokeai/backend/image_util/infill_methods/tile.ipynb
@ -0,0 +1,95 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"Smoke test for the tile infill\"\"\"\n",
+    "\n",
+    "from pathlib import Path\n",
+    "from typing import Optional\n",
+    "from PIL import Image\n",
+    "from invokeai.backend.image_util.infill_methods.tile import infill_tile\n",
+    "\n",
+    "images: list[tuple[str, Image.Image]] = []\n",
+    "\n",
+    "for i in sorted(Path(\"./test_images/\").glob(\"*.webp\")):\n",
+    "    images.append((i.name, Image.open(i)))\n",
+    "    images.append((i.name, Image.open(i).transpose(Image.FLIP_LEFT_RIGHT)))\n",
+    "    images.append((i.name, Image.open(i).transpose(Image.FLIP_TOP_BOTTOM)))\n",
+    "    images.append((i.name, Image.open(i).resize((512, 512))))\n",
+    "    images.append((i.name, Image.open(i).resize((1234, 461))))\n",
+    "\n",
+    "outputs: list[tuple[str, Image.Image, Image.Image, Optional[Image.Image]]] = []\n",
+    "\n",
+    "for name, image in images:\n",
+    "    try:\n",
+    "        output = infill_tile(image, seed=0, tile_size=32)\n",
+    "        outputs.append((name, image, output.infilled, output.tile_image))\n",
+    "    except ValueError as e:\n",
+    "        print(f\"Skipping image {name}: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Display the images in jupyter notebook\n",
+    "import matplotlib.pyplot as plt\n",
+    "from PIL import ImageOps\n",
+    "\n",
+    "fig, axes = plt.subplots(len(outputs), 3, figsize=(10, 3 * len(outputs)))\n",
+    "plt.subplots_adjust(hspace=0)\n",
+    "\n",
+    "for i, (name, original, infilled, tile_image) in enumerate(outputs):\n",
+    "    # Add a border to each image, helps to see the edges\n",
+    "    size = original.size\n",
+    "    original = ImageOps.expand(original, border=5, fill=\"red\")\n",
+    "    filled = ImageOps.expand(infilled, border=5, fill=\"red\")\n",
+    "    if tile_image:\n",
+    "        tile_image = ImageOps.expand(tile_image, border=5, fill=\"red\")\n",
+    "\n",
+    "    axes[i, 0].imshow(original)\n",
+    "    axes[i, 0].axis(\"off\")\n",
+    "    axes[i, 0].set_title(f\"Original ({name} - {size})\")\n",
+    "\n",
+    "    if tile_image:\n",
+    "        axes[i, 1].imshow(tile_image)\n",
+    "        axes[i, 1].axis(\"off\")\n",
+    "        axes[i, 1].set_title(\"Tile Image\")\n",
+    "    else:\n",
+    "        axes[i, 1].axis(\"off\")\n",
+    "        axes[i, 1].set_title(\"NO TILES GENERATED (NO TRANSPARENCY)\")\n",
+    "\n",
+    "    axes[i, 2].imshow(filled)\n",
+    "    axes[i, 2].axis(\"off\")\n",
+    "    axes[i, 2].set_title(\"Filled\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".invokeai",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/invokeai/backend/image_util/infill_methods/tile.py
+++ b/invokeai/backend/image_util/infill_methods/tile.py
@ -1,72 +1,122 @@
-import math
+from dataclasses import dataclass
 from typing import Optional

 import numpy as np
 from PIL import Image


-def get_tile_images(image: np.ndarray, width: int = 8, height: int = 8):
-    _nrows, _ncols, depth = image.shape
-    _strides = image.strides
+def create_tile_pool(img_array: np.ndarray, tile_size: tuple[int, int]) -> list[np.ndarray]:
+    """
+    Create a pool of tiles from non-transparent areas of the image by systematically walking through the image.

-    nrows, _m = divmod(_nrows, height)
-    ncols, _n = divmod(_ncols, width)
-    if _m != 0 or _n != 0:
-        return None
+    Args:
+        img_array: numpy array of the image.
+        tile_size: tuple (tile_width, tile_height) specifying the size of each tile.

-    return np.lib.stride_tricks.as_strided(
-        np.ravel(image),
-        shape=(nrows, ncols, height, width, depth),
-        strides=(height * _strides[0], width * _strides[1], *_strides),
-        writeable=False,
-    )
+    Returns:
+        A list of numpy arrays, each representing a tile.
+    """
+    tiles: list[np.ndarray] = []
+    rows, cols = img_array.shape[:2]
+    tile_width, tile_height = tile_size

+    for y in range(0, rows - tile_height + 1, tile_height):
+        for x in range(0, cols - tile_width + 1, tile_width):
+            tile = img_array[y : y + tile_height, x : x + tile_width]
+            # Check if the image has an alpha channel and the tile is completely opaque
+            if img_array.shape[2] == 4 and np.all(tile[:, :, 3] == 255):
+                tiles.append(tile)
+            elif img_array.shape[2] == 3:  # If no alpha channel, append the tile
+                tiles.append(tile)

-def infill_tile(im: Image.Image, tile_size: int = 16, seed: Optional[int] = None) -> Image.Image:
-    # Only fill if there's an alpha layer
-    if im.mode != "RGBA":
-        return im
-
-    a = np.asarray(im, dtype=np.uint8)
-
-    tile_size_tuple = (tile_size, tile_size)
-
-    # Get the image as tiles of a specified size
-    tiles = get_tile_images(a, *tile_size_tuple).copy()
-
-    # Get the mask as tiles
-    tiles_mask = tiles[:, :, :, :, 3]
-
-    # Find any mask tiles with any fully transparent pixels (we will be replacing these later)
-    tmask_shape = tiles_mask.shape
-    tiles_mask = tiles_mask.reshape(math.prod(tiles_mask.shape))
-    n, ny = (math.prod(tmask_shape[0:2])), math.prod(tmask_shape[2:])
-    tiles_mask = tiles_mask > 0
-    tiles_mask = tiles_mask.reshape((n, ny)).all(axis=1)
-
-    # Get RGB tiles in single array and filter by the mask
-    tshape = tiles.shape
-    tiles_all = tiles.reshape((math.prod(tiles.shape[0:2]), *tiles.shape[2:]))
-    filtered_tiles = tiles_all[tiles_mask]
-
-    if len(filtered_tiles) == 0:
-        return im
-
-    # Find all invalid tiles and replace with a random valid tile
-    replace_count = (tiles_mask == False).sum()  # noqa: E712
-    rng = np.random.default_rng(seed=seed)
-    tiles_all[np.logical_not(tiles_mask)] = filtered_tiles[rng.choice(filtered_tiles.shape[0], replace_count), :, :, :]
-
-    # Convert back to an image
-    tiles_all = tiles_all.reshape(tshape)
-    tiles_all = tiles_all.swapaxes(1, 2)
-    st = tiles_all.reshape(
-        (
-            math.prod(tiles_all.shape[0:2]),
-            math.prod(tiles_all.shape[2:4]),
-            tiles_all.shape[4],
+    if not tiles:
+        raise ValueError(
+            "Not enough opaque pixels to generate any tiles. Use a smaller tile size or a different image."
        )
-    )
-    si = Image.fromarray(st, mode="RGBA")

-    return si
+    return tiles
+
+
+def create_filled_image(
+    img_array: np.ndarray, tile_pool: list[np.ndarray], tile_size: tuple[int, int], seed: int
+) -> np.ndarray:
+    """
+    Create an image of the same dimensions as the original, filled entirely with tiles from the pool.
+
+    Args:
+        img_array: numpy array of the original image.
+        tile_pool: A list of numpy arrays, each representing a tile.
+        tile_size: tuple (tile_width, tile_height) specifying the size of each tile.
+
+    Returns:
+        A numpy array representing the filled image.
+    """
+
+    rows, cols, _ = img_array.shape
+    tile_width, tile_height = tile_size
+
+    # Prep an empty RGB image
+    filled_img_array = np.zeros((rows, cols, 3), dtype=img_array.dtype)
+
+    # Make the random tile selection reproducible
+    rng = np.random.default_rng(seed)
+
+    for y in range(0, rows, tile_height):
+        for x in range(0, cols, tile_width):
+            # Pick a random tile from the pool
+            tile = tile_pool[rng.integers(len(tile_pool))]
+
+            # Calculate the space available (may be less than tile size near the edges)
+            space_y = min(tile_height, rows - y)
+            space_x = min(tile_width, cols - x)
+
+            # Crop the tile if necessary to fit into the available space
+            cropped_tile = tile[:space_y, :space_x, :3]
+
+            # Fill the available space with the (possibly cropped) tile
+            filled_img_array[y : y + space_y, x : x + space_x, :3] = cropped_tile
+
+    return filled_img_array
+
+
+@dataclass
+class InfillTileOutput:
+    infilled: Image.Image
+    tile_image: Optional[Image.Image] = None
+
+
+def infill_tile(image_to_infill: Image.Image, seed: int, tile_size: int) -> InfillTileOutput:
+    """Infills an image with random tiles from the image itself.
+
+    If the image is not an RGBA image, it is returned untouched.
+
+    Args:
+        image: The image to infill.
+        tile_size: The size of the tiles to use for infilling.
+
+    Raises:
+        ValueError: If there are not enough opaque pixels to generate any tiles.
+    """
+
+    if image_to_infill.mode != "RGBA":
+        return InfillTileOutput(infilled=image_to_infill)
+
+    # Internally, we want a tuple of (tile_width, tile_height). In the future, the tile size can be any rectangle.
+    _tile_size = (tile_size, tile_size)
+    np_image = np.array(image_to_infill, dtype=np.uint8)
+
+    # Create the pool of tiles that we will use to infill
+    tile_pool = create_tile_pool(np_image, _tile_size)
+
+    # Create an image from the tiles, same size as the original
+    tile_np_image = create_filled_image(np_image, tile_pool, _tile_size, seed)
+
+    # Paste the OG image over the tile image, effectively infilling the area
+    tile_image = Image.fromarray(tile_np_image, "RGB")
+    infilled = tile_image.copy()
+    infilled.paste(image_to_infill, (0, 0), image_to_infill.split()[-1])
+
+    # I think we want this to be "RGBA"?
+    infilled.convert("RGBA")
+
+    return InfillTileOutput(infilled=infilled, tile_image=tile_image)