Add -D for outpainting

2024-08-30 20:32:17 +00:00 · 2022-09-04 22:48:17 +02:00
parent 9df743e2bf
commit 4d997145b4
3 changed files with 110 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -141,6 +141,24 @@ For older changelogs, please visit **[CHANGELOGS](docs/CHANGELOG.md)**.

 Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation problems and other issues.

+# Continous outpainting
+
+This extension uses the new inpainting code to extend an existing image to any direction
+of "top", "right", "bottom" or "left". To use it you need to provide an initial image with
+-I and an extension direction with -D (direction). When extending using outpainting a higher
+img2img strength value of 0.83 is the default. 
+
+~~~~
+dream> man with cat on shoulder -I./images/man.png -D bottom
+~~~~
+
+Or even shorter (the prompt is read from the metadata of the old image)
+
+~~~~
+dream> -I./images/man.png -D bottom
+~~~~
+
+
 # Contributing

 Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -203,6 +203,7 @@ class Generate:
            # these are specific to embiggen (which also relies on img2img args)
            embiggen       =    None,
            embiggen_tiles =    None,
+            out_direction  =    None,
            # these are specific to GFPGAN/ESRGAN
            gfpgan_strength  = 0,
            save_original    = False,
@ -309,7 +310,7 @@ class Generate:
                log_tokens    =self.log_tokenization
            )

-            (init_image,mask_image) = self._make_images(init_img,init_mask, width, height, fit)
+            (init_image,mask_image) = self._make_images(init_img, init_mask, out_direction, width, height, fit)
            
            if (init_image is not None) and (mask_image is not None):
                generator = self._make_inpaint()
@ -380,13 +381,15 @@ class Generate:
            )
        return results

-    def _make_images(self, img_path, mask_path, width, height, fit=False):
+    def _make_images(self, img_path, mask_path, out_direction, width, height, fit=False):
        init_image      = None
        init_mask       = None
        if not img_path:
            return None,None

        image        = self._load_img(img_path, width, height, fit=fit) # this returns an Image
+        if out_direction:
+            image    = self._create_outpaint_image(image, out_direction)
        init_image   = self._create_init_image(image)                   # this returns a torch tensor

        if self._has_transparency(image) and not mask_path:      # if image has a transparent area and no mask was provided, then try to generate mask
@ -608,6 +611,64 @@ class Generate:
        image = 2.0 * image - 1.0 
        return image.to(self.device)

+    def _create_outpaint_image(self, image, direction_args):
+        assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
+
+        if len(direction_args) == 1:
+            direction = direction_args[0]
+            pixels = None
+        elif len(direction_args) == 2:
+            direction = direction_args[0]
+            pixels = int(direction_args[1])            
+
+        assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
+
+        image = image.convert("RGBA")
+        # we always extend top, but rotate to extend along the requested side
+        if direction == 'left':
+            image = image.transpose(Image.Transpose.ROTATE_270)
+        elif direction == 'bottom':
+            image = image.transpose(Image.Transpose.ROTATE_180)
+        elif direction == 'right':
+            image = image.transpose(Image.Transpose.ROTATE_90)
+
+        pixels = image.height//2 if pixels == None else int(pixels)
+        assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
+
+        # the top part of the image is taken from the source image mirrored
+        # coordinates (0,0) are the upper left corner of an image
+        top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
+        top = top.crop((0, top.height - pixels, top.width, top.height))
+
+        # setting all alpha of the top part to 0
+        alpha = top.getchannel("A")
+        alpha.paste(0, (0, 0, top.width, top.height))
+        top.putalpha(alpha)
+
+        # taking the bottom from the original image
+        bottom = image.crop((0, 0, image.width, image.height - pixels))
+            
+        new_img = image.copy()
+        new_img.paste(top, (0, 0))
+        new_img.paste(bottom, (0, pixels))
+
+        # create a 10% dither in the middle
+        dither = min(image.height//10, pixels)
+        for x in range(0, image.width, 2):
+            for y in range(pixels - dither, pixels + dither):
+                (r, g, b, a) = new_img.getpixel((x, y))
+                new_img.putpixel((x, y), (r, g, b, 0))
+
+        # let's rotate back again
+        if direction == 'left':
+            new_img = new_img.transpose(Image.Transpose.ROTATE_90)
+        elif direction == 'bottom':
+            new_img = new_img.transpose(Image.Transpose.ROTATE_180)
+        elif direction == 'right':
+            new_img = new_img.transpose(Image.Transpose.ROTATE_270)
+
+        return new_img
+
    def _create_init_mask(self, image):
        # convert into a black/white mask
        image = self._image_to_mask(image)
@ -710,3 +771,4 @@ class Generate:

    def _has_cuda(self):
        return self.device.type == 'cuda'
+
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -6,6 +6,7 @@ import shlex
 import os
 import re
 import sys
+import shlex
 import copy
 import warnings
 import time
@ -13,6 +14,7 @@ import ldm.dream.readline
 from ldm.dream.pngwriter import PngWriter, PromptFormatter
 from ldm.dream.server import DreamServer, ThreadingDreamServer
 from ldm.dream.image_util import make_grid
+from PIL import Image
 from omegaconf import OmegaConf

 # Placeholder to be replaced with proper class that tracks the
@ -162,6 +164,22 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
        except SystemExit:
            parser.print_help()
            continue
+
+        if opt.init_img:
+            try:
+                im = Image.open(opt.init_img)
+                # '-F' argument appears (M1) in the dream prompt even though
+                # it's not a main loop argument
+                oldprompt = im.text['Dream'].replace(" -F", "")
+                oldargs = parser.parse_args(shlex.split(oldprompt))
+                if len(opt.prompt) == 0:
+                    opt.prompt = oldargs.prompt
+
+            except AttributeError:
+                pass
+            except KeyError:
+                pass
+
        if len(opt.prompt) == 0:
            print('Try again with a prompt!')
            continue
@ -186,6 +204,8 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
                opt.seed = None
                continue

+        opt.strength = 0.83 if opt.out_direction and opt.strength is None else opt.strength
+
        if opt.with_variations is not None:
            # shotgun parsing, woo
            parts = []
@ -577,6 +597,14 @@ def create_cmd_parser():
        type=str,
        help='Path to input image for img2img mode (supersedes width and height)',
    )
+    parser.add_argument(
+        '-D',
+        '--out_direction',
+        nargs='+',
+        type=str,
+        metavar=('direction', 'pixels'),
+        help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
+    )    
    parser.add_argument(
        '-M',
        '--init_mask',