Add outpainting functionality.

- Adapted from PR #489, author Dominic Letz [https://github.com/dominicletz]
- Too many upstream changes to merge, so frankensteined it in.
- Added support for !fix syntax
- Added documentation
This commit is contained in:
Lincoln Stein 2022-09-21 02:44:46 -04:00
commit 2cf294e6de
4 changed files with 188 additions and 27 deletions

View File

@ -98,6 +98,7 @@ you can try starting `dream.py` with the `--precision=float32` flag:
- [Interactive Command Line Interface](docs/features/CLI.md)
- [Image To Image](docs/features/IMG2IMG.md)
- [Inpainting Support](docs/features/INPAINTING.md)
- [Outpainting Support](docs/features/OUTPAINTING.md)
- [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
- [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
- [Google Colab](docs/features/OTHER.md#google-colab)
@ -157,7 +158,7 @@ For older changelogs, please visit the **[CHANGELOG](docs/features/CHANGELOG.md)
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation
problems and other issues.
### Contributing
# Contributing
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how

View File

@ -74,9 +74,10 @@ To retrieve a (series of) opt objects corresponding to the metadata, do this:
opt_list = metadata_loads(metadata)
The metadata should be pulled out of the PNG image. pngwriter has a method
retrieve_metadata that will do this.
retrieve_metadata that will do this, or you can do it in one swell foop
with metadata_from_png():
opt_list = metadata_from_png('/path/to/image_file.png')
"""
import argparse
@ -87,6 +88,7 @@ import hashlib
import os
import copy
import base64
import ldm.dream.pngwriter
from ldm.dream.conditioning import split_weighted_subprompts
SAMPLER_CHOICES = [
@ -208,10 +210,16 @@ class Args(object):
# esrgan-specific parameters
if a['upscale']:
switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}')
# embiggen parameters
if a['embiggen']:
switches.append(f'--embiggen {" ".join([str(u) for u in a["embiggen"]])}')
if a['embiggen_tiles']:
switches.append(f'--embiggen_tiles {" ".join([str(u) for u in a["embiggen_tiles"]])}')
# outpainting parameters
if a['out_direction']:
switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}')
if a['with_variations']:
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
switches.append(f'-V {formatted_variations}')
@ -546,6 +554,14 @@ class Args(object):
help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
default=0.75,
)
img2img_group.add_argument(
'-D',
'--out_direction',
nargs='+',
type=str,
metavar=('direction', 'pixels'),
help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
)
postprocessing_group.add_argument(
'-ft',
'--facetool',
@ -710,6 +726,15 @@ def metadata_dumps(opt,
return metadata
def metadata_from_png(png_file_path):
'''
Given the path to a PNG file created by dream.py, retrieves
an Args object containing the image metadata
'''
meta = ldm.dream.pngwriter.retrieve_metadata(png_file_path)
opts = metadata_loads(meta)
return opts[0]
def metadata_loads(metadata):
'''
Takes the dictionary corresponding to RFC266 (https://github.com/lstein/stable-diffusion/issues/266)

View File

@ -27,8 +27,8 @@ from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler
from ldm.models.diffusion.ksampler import KSampler
from ldm.dream.pngwriter import PngWriter, retrieve_metadata
from ldm.dream.args import metadata_loads
from ldm.dream.pngwriter import PngWriter
from ldm.dream.args import metadata_from_png
from ldm.dream.image_util import InitImageResizer
from ldm.dream.devices import choose_torch_device, choose_precision
from ldm.dream.conditioning import get_uc_and_c
@ -276,8 +276,9 @@ class Generate:
strength = None,
init_color = None,
# these are specific to embiggen (which also relies on img2img args)
embiggen=None,
embiggen_tiles=None,
embiggen = None,
embiggen_tiles = None,
out_direction = None,
# these are specific to GFPGAN/ESRGAN
facetool = None,
gfpgan_strength = 0,
@ -388,9 +389,14 @@ class Generate:
log_tokens =self.log_tokenization
)
(init_image, mask_image) = self._make_images(
init_img, init_mask, width, height, fit)
init_image,mask_image = self._make_images(
init_img,
init_mask,
width,
height,
fit=fit,
out_direction=out_direction,
)
if (init_image is not None) and (mask_image is not None):
generator = self._make_inpaint()
elif (embiggen != None or embiggen_tiles != None):
@ -469,16 +475,17 @@ class Generate:
)
return results
# this needs to be generalized to all sorts of postprocessors, but for now
# sufficient to support most use cases
# this needs to be generalized to all sorts of postprocessors, which should be wrapped
# in a nice harmonized call signature. For now we have a bunch of if/elses!
def apply_postprocessor(
self,
image_path,
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen'
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen'
gfpgan_strength = 0.0,
codeformer_fidelity = 0.75,
save_original = True, # to get new name
upscale = None,
out_direction = None,
save_original = True, # to get new name
callback = None,
opt = None,
):
@ -489,8 +496,7 @@ class Generate:
image_metadata = None
prompt = None
try:
meta = retrieve_metadata(image_path)
args = metadata_loads(meta)
args = metadata_from_png(image_path)
if len(args) > 1:
print("* Can't postprocess a grid")
return
@ -556,22 +562,56 @@ class Generate:
embiggen_tiles = opt.embiggen_tiles,
image_callback = callback,
)
elif tool == 'outpaint':
oldargs = metadata_from_png(image_path)
opt.strength = 0.83
opt.init_img = image_path
return self.prompt2image(
oldargs.prompt,
out_direction = opt.out_direction,
sampler = self.sampler,
steps = opt.steps,
cfg_scale = opt.cfg_scale,
ddim_eta = self.ddim_eta,
conditioning= get_uc_and_c(
oldargs.prompt, model =self.model,
skip_normalize=opt.skip_normalize,
log_tokens =opt.log_tokenization
),
width = opt.width,
height = opt.height,
init_img = image_path, # not the Image! (sigh)
strength = opt.strength,
image_callback = callback,
)
else:
print(f'* postprocessing tool {tool} is not yet supported')
return None
def _make_images(self, img_path, mask_path, width, height, fit=False):
init_image = None
init_mask = None
def _make_images(
self,
img_path,
mask_path,
width,
height,
fit=False,
out_direction=None,
):
init_image = None
init_mask = None
if not img_path:
return None, None
image = self._load_img(img_path, width, height,
fit=fit) # this returns an Image
# this returns a torch tensor
init_image = self._create_init_image(image)
image = self._load_img(
img_path,
width,
height,
fit=fit
) # this returns an Image
if out_direction:
image = self._create_outpaint_image(image, out_direction)
init_image = self._create_init_image(image) # this returns a torch tensor
# if image has a transparent area and no mask was provided, then try to generate mask
if self._has_transparency(image) and not mask_path:
@ -789,6 +829,7 @@ class Generate:
return model
def _load_img(self, path, width, height, fit=False):
print(f'DEBUG: path = {path}')
assert os.path.exists(path), f'>> {path}: File not found'
# with Image.open(path) as img:
@ -815,6 +856,66 @@ class Generate:
image = 2.0 * image - 1.0
return image.to(self.device)
# TODO: outpainting is a post-processing application and should be made to behave
# like the other ones.
def _create_outpaint_image(self, image, direction_args):
assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
if len(direction_args) == 1:
direction = direction_args[0]
pixels = None
elif len(direction_args) == 2:
direction = direction_args[0]
pixels = int(direction_args[1])
assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
image = image.convert("RGBA")
# we always extend top, but rotate to extend along the requested side
if direction == 'left':
image = image.transpose(Image.Transpose.ROTATE_270)
elif direction == 'bottom':
image = image.transpose(Image.Transpose.ROTATE_180)
elif direction == 'right':
image = image.transpose(Image.Transpose.ROTATE_90)
pixels = image.height//2 if pixels is None else int(pixels)
assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
# the top part of the image is taken from the source image mirrored
# coordinates (0,0) are the upper left corner of an image
top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
top = top.crop((0, top.height - pixels, top.width, top.height))
# setting all alpha of the top part to 0
alpha = top.getchannel("A")
alpha.paste(0, (0, 0, top.width, top.height))
top.putalpha(alpha)
# taking the bottom from the original image
bottom = image.crop((0, 0, image.width, image.height - pixels))
new_img = image.copy()
new_img.paste(top, (0, 0))
new_img.paste(bottom, (0, pixels))
# create a 10% dither in the middle
dither = min(image.height//10, pixels)
for x in range(0, image.width, 2):
for y in range(pixels - dither, pixels + dither):
(r, g, b, a) = new_img.getpixel((x, y))
new_img.putpixel((x, y), (r, g, b, 0))
# let's rotate back again
if direction == 'left':
new_img = new_img.transpose(Image.Transpose.ROTATE_90)
elif direction == 'bottom':
new_img = new_img.transpose(Image.Transpose.ROTATE_180)
elif direction == 'right':
new_img = new_img.transpose(Image.Transpose.ROTATE_270)
return new_img
def _create_init_mask(self, image):
# convert into a black/white mask
image = self._image_to_mask(image)

View File

@ -4,11 +4,12 @@
import os
import re
import sys
import shlex
import copy
import warnings
import time
import ldm.dream.readline
from ldm.dream.args import Args, metadata_dumps
from ldm.dream.args import Args, metadata_dumps, metadata_from_png
from ldm.dream.pngwriter import PngWriter
from ldm.dream.server import DreamServer, ThreadingDreamServer
from ldm.dream.image_util import make_grid
@ -166,6 +167,17 @@ def main_loop(gen, opt, infile):
if opt.parse_cmd(command) is None:
continue
if opt.init_img:
try:
oldargs = metadata_from_png(opt.init_img)
opt.prompt = oldargs.prompt
print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}')
except AttributeError:
pass
except KeyError:
pass
if len(opt.prompt) == 0:
print('\nTry again with a prompt!')
continue
@ -197,7 +209,9 @@ def main_loop(gen, opt, infile):
opt.seed = None
continue
# TODO - move this into a module
if opt.strength is None:
opt.strength = 0.75 if opt.out_direction is None else 0.83
if opt.with_variations is not None:
# shotgun parsing, woo
parts = []
@ -347,7 +361,15 @@ def do_postprocess (gen, opt, callback):
print(f'* file {file_path} does not exist')
return
tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale')
tool=None
if opt.gfpgan_strength > 0:
tool = opt.facetool
elif opt.embiggen:
tool = 'embiggen'
elif opt.upscale:
tool = 'upscale'
elif opt.out_direction:
tool = 'outpaint'
opt.save_original = True # do not overwrite old image!
return gen.apply_postprocessor(
image_path = opt.prompt,
@ -356,6 +378,7 @@ def do_postprocess (gen, opt, callback):
codeformer_fidelity = opt.codeformer_fidelity,
save_original = opt.save_original,
upscale = opt.upscale,
out_direction = opt.out_direction,
callback = callback,
opt = opt,
)
@ -415,5 +438,16 @@ def dream_server_loop(gen, host, port, outdir, gfpgan):
dream_server.server_close()
def write_log_message(results, log_path):
"""logs the name of the output image, prompt, and prompt args to the terminal and log file"""
global output_cntr
log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
for l in log_lines:
output_cntr += 1
print(f'[{output_cntr}] {l}',end='')
with open(log_path, 'a', encoding='utf-8') as file:
file.writelines(log_lines)
if __name__ == '__main__':
main()