Add outpainting functionality.

- Adapted from PR #489, author Dominic Letz [https://github.com/dominicletz] - Too many upstream changes to merge, so frankensteined it in. - Added support for !fix syntax - Added documentation
2024-08-30 20:32:17 +00:00 · 2022-09-21 02:44:46 -04:00
parent b93f04ee38 340189fa0d
commit 2cf294e6de
4 changed files with 188 additions and 27 deletions
--- a/README.md
+++ b/README.md
@ -98,6 +98,7 @@ you can try starting `dream.py` with the `--precision=float32` flag:
 - [Interactive Command Line Interface](docs/features/CLI.md)
 - [Image To Image](docs/features/IMG2IMG.md)
 - [Inpainting Support](docs/features/INPAINTING.md)
 - [Outpainting Support](docs/features/OUTPAINTING.md)
 - [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
 - [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
 - [Google Colab](docs/features/OTHER.md#google-colab)
@ -157,7 +158,7 @@ For older changelogs, please visit the **[CHANGELOG](docs/features/CHANGELOG.md)
 Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation
 problems and other issues.
-### Contributing
+# Contributing
 Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
 cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -74,9 +74,10 @@ To retrieve a (series of) opt objects corresponding to the metadata, do this:
 opt_list = metadata_loads(metadata)
 The metadata should be pulled out of the PNG image. pngwriter has a method
-retrieve_metadata that will do this.
+retrieve_metadata that will do this, or you can do it in one swell foop
-
+with metadata_from_png():
 opt_list = metadata_from_png('/path/to/image_file.png')
 """
 import argparse
@ -87,6 +88,7 @@ import hashlib
 import os
 import copy
 import base64
 import ldm.dream.pngwriter
 from ldm.dream.conditioning import split_weighted_subprompts
 SAMPLER_CHOICES = [
@ -208,10 +210,16 @@ class Args(object):
        # esrgan-specific parameters
        if a['upscale']:
            switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}')
        # embiggen parameters
        if a['embiggen']:
            switches.append(f'--embiggen {" ".join([str(u) for u in a["embiggen"]])}')
        if a['embiggen_tiles']:
            switches.append(f'--embiggen_tiles {" ".join([str(u) for u in a["embiggen_tiles"]])}')
        # outpainting parameters
        if a['out_direction']:
            switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}')
        if a['with_variations']:
            formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
            switches.append(f'-V {formatted_variations}')
@ -546,6 +554,14 @@ class Args(object):
            help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
            default=0.75,
        )
        img2img_group.add_argument(
            '-D',
            '--out_direction',
            nargs='+',
            type=str,
            metavar=('direction', 'pixels'),
            help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
        )
        postprocessing_group.add_argument(
            '-ft',
            '--facetool',
@ -710,6 +726,15 @@ def metadata_dumps(opt,
    return metadata
 def metadata_from_png(png_file_path):
    '''
    Given the path to a PNG file created by dream.py, retrieves
    an Args object containing the image metadata
    '''
    meta = ldm.dream.pngwriter.retrieve_metadata(png_file_path)
    opts = metadata_loads(meta)
    return opts[0]
 def metadata_loads(metadata):
    '''
    Takes the dictionary corresponding to RFC266 (https://github.com/lstein/stable-diffusion/issues/266)
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -27,8 +27,8 @@ from ldm.util import instantiate_from_config
 from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.models.diffusion.plms import PLMSSampler
 from ldm.models.diffusion.ksampler import KSampler
-from ldm.dream.pngwriter import PngWriter, retrieve_metadata
+from ldm.dream.pngwriter import PngWriter
-from ldm.dream.args import metadata_loads
+from ldm.dream.args import metadata_from_png
 from ldm.dream.image_util import InitImageResizer
 from ldm.dream.devices import choose_torch_device, choose_precision
 from ldm.dream.conditioning import get_uc_and_c
@ -276,8 +276,9 @@ class Generate:
            strength         = None,
            init_color       = None,
            # these are specific to embiggen (which also relies on img2img args)
-            embiggen=None,
+            embiggen       =    None,
-            embiggen_tiles=None,
+            embiggen_tiles =    None,
            out_direction  =    None,
            # these are specific to GFPGAN/ESRGAN
            facetool         = None,
            gfpgan_strength  = 0,
@ -388,9 +389,14 @@ class Generate:
                log_tokens    =self.log_tokenization
            )
-            (init_image, mask_image) = self._make_images(
+            init_image,mask_image = self._make_images(
-                init_img, init_mask, width, height, fit)
+                init_img,
-
+                init_mask,
                width,
                height,
                fit=fit,
                out_direction=out_direction,
            )
            if (init_image is not None) and (mask_image is not None):
                generator = self._make_inpaint()
            elif (embiggen != None or embiggen_tiles != None):
@ -469,16 +475,17 @@ class Generate:
            )
        return results
-    # this needs to be generalized to all sorts of postprocessors, but for now
+    # this needs to be generalized to all sorts of postprocessors, which should be wrapped
-    # sufficient to support most use cases
+    # in a nice harmonized call signature. For now we have a bunch of if/elses!
    def apply_postprocessor(
            self,
            image_path,
-            tool                = 'gfpgan',  # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen'
+            tool                = 'gfpgan',  # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen'
            gfpgan_strength     = 0.0,
            codeformer_fidelity = 0.75,
            save_original       = True, # to get new name
            upscale             = None,
            out_direction       = None,
            save_original       = True, # to get new name
            callback            = None,
            opt                 = None,
            ):
@ -489,8 +496,7 @@ class Generate:
        image_metadata = None
        prompt = None
        try:
-            meta = retrieve_metadata(image_path)
+            args = metadata_from_png(image_path)
            args = metadata_loads(meta)
            if len(args) > 1:
                print("* Can't postprocess a grid")
                return
@ -556,22 +562,56 @@ class Generate:
                embiggen_tiles = opt.embiggen_tiles,
                image_callback = callback,
            )
-
+        elif tool == 'outpaint':
            oldargs      = metadata_from_png(image_path)
            opt.strength = 0.83
            opt.init_img = image_path
            return self.prompt2image(
                oldargs.prompt,
                out_direction  = opt.out_direction,
                sampler     = self.sampler,
                steps       = opt.steps,
                cfg_scale   = opt.cfg_scale,
                ddim_eta    = self.ddim_eta,
                conditioning= get_uc_and_c(
                    oldargs.prompt, model =self.model,
                    skip_normalize=opt.skip_normalize,
                    log_tokens    =opt.log_tokenization
                ),
                width       = opt.width,
                height      = opt.height,
                init_img    = image_path,  # not the Image! (sigh)
                strength    = opt.strength,
                image_callback = callback,
                )
        else:
            print(f'* postprocessing tool {tool} is not yet supported')
            return None
-    def _make_images(self, img_path, mask_path, width, height, fit=False):
+    def _make_images(
            self,
            img_path,
            mask_path,
            width,
            height,
            fit=False,
            out_direction=None,
    ):
        init_image      = None
        init_mask       = None
        if not img_path:
            return None, None
-        image = self._load_img(img_path, width, height,
+        image = self._load_img(
-                               fit=fit)  # this returns an Image
+            img_path,
-        # this returns a torch tensor
+            width,
-        init_image = self._create_init_image(image)
+            height,
            fit=fit
        ) # this returns an Image
        if out_direction:
            image    = self._create_outpaint_image(image, out_direction)
        init_image   = self._create_init_image(image)                   # this returns a torch tensor
        # if image has a transparent area and no mask was provided, then try to generate mask
        if self._has_transparency(image) and not mask_path:
@ -789,6 +829,7 @@ class Generate:
        return model
    def _load_img(self, path, width, height, fit=False):
        print(f'DEBUG: path = {path}')
        assert os.path.exists(path), f'>> {path}: File not found'
        #        with Image.open(path) as img:
@ -815,6 +856,66 @@ class Generate:
        image = 2.0 * image - 1.0
        return image.to(self.device)
    #  TODO: outpainting is a post-processing application and should be made to behave
    # like the other ones.
    def _create_outpaint_image(self, image, direction_args):
        assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
        if len(direction_args) == 1:
            direction = direction_args[0]
            pixels = None
        elif len(direction_args) == 2:
            direction = direction_args[0]
            pixels = int(direction_args[1])
        assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
        image = image.convert("RGBA")
        # we always extend top, but rotate to extend along the requested side
        if direction == 'left':
            image = image.transpose(Image.Transpose.ROTATE_270)
        elif direction == 'bottom':
            image = image.transpose(Image.Transpose.ROTATE_180)
        elif direction == 'right':
            image = image.transpose(Image.Transpose.ROTATE_90)
        pixels = image.height//2 if pixels is None else int(pixels)
        assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
        # the top part of the image is taken from the source image mirrored
        # coordinates (0,0) are the upper left corner of an image
        top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
        top = top.crop((0, top.height - pixels, top.width, top.height))
        # setting all alpha of the top part to 0
        alpha = top.getchannel("A")
        alpha.paste(0, (0, 0, top.width, top.height))
        top.putalpha(alpha)
        # taking the bottom from the original image
        bottom = image.crop((0, 0, image.width, image.height - pixels))
        new_img = image.copy()
        new_img.paste(top, (0, 0))
        new_img.paste(bottom, (0, pixels))
        # create a 10% dither in the middle
        dither = min(image.height//10, pixels)
        for x in range(0, image.width, 2):
            for y in range(pixels - dither, pixels + dither):
                (r, g, b, a) = new_img.getpixel((x, y))
                new_img.putpixel((x, y), (r, g, b, 0))
        # let's rotate back again
        if direction == 'left':
            new_img = new_img.transpose(Image.Transpose.ROTATE_90)
        elif direction == 'bottom':
            new_img = new_img.transpose(Image.Transpose.ROTATE_180)
        elif direction == 'right':
            new_img = new_img.transpose(Image.Transpose.ROTATE_270)
        return new_img
    def _create_init_mask(self, image):
        # convert into a black/white mask
        image = self._image_to_mask(image)
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -4,11 +4,12 @@
 import os
 import re
 import sys
 import shlex
 import copy
 import warnings
 import time
 import ldm.dream.readline
-from ldm.dream.args import Args, metadata_dumps
+from ldm.dream.args import Args, metadata_dumps, metadata_from_png
 from ldm.dream.pngwriter import PngWriter
 from ldm.dream.server import DreamServer, ThreadingDreamServer
 from ldm.dream.image_util import make_grid
@ -166,6 +167,17 @@ def main_loop(gen, opt, infile):
        if opt.parse_cmd(command) is None:
            continue
        if opt.init_img:
            try:
                oldargs    = metadata_from_png(opt.init_img)
                opt.prompt = oldargs.prompt
                print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}')
            except AttributeError:
                pass
            except KeyError:
                pass
        if len(opt.prompt) == 0:
            print('\nTry again with a prompt!')
            continue
@ -197,7 +209,9 @@ def main_loop(gen, opt, infile):
                opt.seed = None
                continue
-        # TODO - move this into a module
+        if opt.strength is None:
            opt.strength = 0.75 if opt.out_direction is None else 0.83
        if opt.with_variations is not None:
            # shotgun parsing, woo
            parts = []
@ -347,7 +361,15 @@ def do_postprocess (gen, opt, callback):
        print(f'* file {file_path} does not exist')
        return
-    tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale')
+    tool=None
    if opt.gfpgan_strength > 0:
        tool = opt.facetool
    elif opt.embiggen:
        tool = 'embiggen'
    elif opt.upscale:
        tool = 'upscale'
    elif opt.out_direction:
        tool = 'outpaint'
    opt.save_original = True # do not overwrite old image!
    return gen.apply_postprocessor(
        image_path      = opt.prompt,
@ -356,6 +378,7 @@ def do_postprocess (gen, opt, callback):
        codeformer_fidelity = opt.codeformer_fidelity,
        save_original       = opt.save_original,
        upscale             = opt.upscale,
        out_direction       = opt.out_direction,
        callback            = callback,
        opt                 = opt,
        )
@ -415,5 +438,16 @@ def dream_server_loop(gen, host, port, outdir, gfpgan):
    dream_server.server_close()
 def write_log_message(results, log_path):
    """logs the name of the output image, prompt, and prompt args to the terminal and log file"""
    global output_cntr
    log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
    for l in log_lines:
        output_cntr += 1
        print(f'[{output_cntr}] {l}',end='')
    with open(log_path, 'a', encoding='utf-8') as file:
        file.writelines(log_lines)
 if __name__ == '__main__':
    main()