Add -D for outpainting

This commit is contained in:
Dominic Letz 2022-09-04 22:48:17 +02:00
parent 9df743e2bf
commit 4d997145b4
3 changed files with 110 additions and 2 deletions

View File

@ -141,6 +141,24 @@ For older changelogs, please visit **[CHANGELOGS](docs/CHANGELOG.md)**.
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation problems and other issues.
# Continous outpainting
This extension uses the new inpainting code to extend an existing image to any direction
of "top", "right", "bottom" or "left". To use it you need to provide an initial image with
-I and an extension direction with -D (direction). When extending using outpainting a higher
img2img strength value of 0.83 is the default.
~~~~
dream> man with cat on shoulder -I./images/man.png -D bottom
~~~~
Or even shorter (the prompt is read from the metadata of the old image)
~~~~
dream> -I./images/man.png -D bottom
~~~~
# Contributing
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with

View File

@ -203,6 +203,7 @@ class Generate:
# these are specific to embiggen (which also relies on img2img args)
embiggen = None,
embiggen_tiles = None,
out_direction = None,
# these are specific to GFPGAN/ESRGAN
gfpgan_strength = 0,
save_original = False,
@ -309,7 +310,7 @@ class Generate:
log_tokens =self.log_tokenization
)
(init_image,mask_image) = self._make_images(init_img,init_mask, width, height, fit)
(init_image,mask_image) = self._make_images(init_img, init_mask, out_direction, width, height, fit)
if (init_image is not None) and (mask_image is not None):
generator = self._make_inpaint()
@ -380,13 +381,15 @@ class Generate:
)
return results
def _make_images(self, img_path, mask_path, width, height, fit=False):
def _make_images(self, img_path, mask_path, out_direction, width, height, fit=False):
init_image = None
init_mask = None
if not img_path:
return None,None
image = self._load_img(img_path, width, height, fit=fit) # this returns an Image
if out_direction:
image = self._create_outpaint_image(image, out_direction)
init_image = self._create_init_image(image) # this returns a torch tensor
if self._has_transparency(image) and not mask_path: # if image has a transparent area and no mask was provided, then try to generate mask
@ -608,6 +611,64 @@ class Generate:
image = 2.0 * image - 1.0
return image.to(self.device)
def _create_outpaint_image(self, image, direction_args):
assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
if len(direction_args) == 1:
direction = direction_args[0]
pixels = None
elif len(direction_args) == 2:
direction = direction_args[0]
pixels = int(direction_args[1])
assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
image = image.convert("RGBA")
# we always extend top, but rotate to extend along the requested side
if direction == 'left':
image = image.transpose(Image.Transpose.ROTATE_270)
elif direction == 'bottom':
image = image.transpose(Image.Transpose.ROTATE_180)
elif direction == 'right':
image = image.transpose(Image.Transpose.ROTATE_90)
pixels = image.height//2 if pixels == None else int(pixels)
assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
# the top part of the image is taken from the source image mirrored
# coordinates (0,0) are the upper left corner of an image
top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
top = top.crop((0, top.height - pixels, top.width, top.height))
# setting all alpha of the top part to 0
alpha = top.getchannel("A")
alpha.paste(0, (0, 0, top.width, top.height))
top.putalpha(alpha)
# taking the bottom from the original image
bottom = image.crop((0, 0, image.width, image.height - pixels))
new_img = image.copy()
new_img.paste(top, (0, 0))
new_img.paste(bottom, (0, pixels))
# create a 10% dither in the middle
dither = min(image.height//10, pixels)
for x in range(0, image.width, 2):
for y in range(pixels - dither, pixels + dither):
(r, g, b, a) = new_img.getpixel((x, y))
new_img.putpixel((x, y), (r, g, b, 0))
# let's rotate back again
if direction == 'left':
new_img = new_img.transpose(Image.Transpose.ROTATE_90)
elif direction == 'bottom':
new_img = new_img.transpose(Image.Transpose.ROTATE_180)
elif direction == 'right':
new_img = new_img.transpose(Image.Transpose.ROTATE_270)
return new_img
def _create_init_mask(self, image):
# convert into a black/white mask
image = self._image_to_mask(image)
@ -710,3 +771,4 @@ class Generate:
def _has_cuda(self):
return self.device.type == 'cuda'

View File

@ -6,6 +6,7 @@ import shlex
import os
import re
import sys
import shlex
import copy
import warnings
import time
@ -13,6 +14,7 @@ import ldm.dream.readline
from ldm.dream.pngwriter import PngWriter, PromptFormatter
from ldm.dream.server import DreamServer, ThreadingDreamServer
from ldm.dream.image_util import make_grid
from PIL import Image
from omegaconf import OmegaConf
# Placeholder to be replaced with proper class that tracks the
@ -162,6 +164,22 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
except SystemExit:
parser.print_help()
continue
if opt.init_img:
try:
im = Image.open(opt.init_img)
# '-F' argument appears (M1) in the dream prompt even though
# it's not a main loop argument
oldprompt = im.text['Dream'].replace(" -F", "")
oldargs = parser.parse_args(shlex.split(oldprompt))
if len(opt.prompt) == 0:
opt.prompt = oldargs.prompt
except AttributeError:
pass
except KeyError:
pass
if len(opt.prompt) == 0:
print('Try again with a prompt!')
continue
@ -186,6 +204,8 @@ def main_loop(gen, outdir, prompt_as_dir, parser, infile):
opt.seed = None
continue
opt.strength = 0.83 if opt.out_direction and opt.strength is None else opt.strength
if opt.with_variations is not None:
# shotgun parsing, woo
parts = []
@ -577,6 +597,14 @@ def create_cmd_parser():
type=str,
help='Path to input image for img2img mode (supersedes width and height)',
)
parser.add_argument(
'-D',
'--out_direction',
nargs='+',
type=str,
metavar=('direction', 'pixels'),
help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
)
parser.add_argument(
'-M',
'--init_mask',