mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Add outpainting functionality.
- Adapted from PR #489, author Dominic Letz [https://github.com/dominicletz] - Too many upstream changes to merge, so frankensteined it in. - Added support for !fix syntax - Added documentation
This commit is contained in:
commit
2cf294e6de
@ -98,6 +98,7 @@ you can try starting `dream.py` with the `--precision=float32` flag:
|
||||
- [Interactive Command Line Interface](docs/features/CLI.md)
|
||||
- [Image To Image](docs/features/IMG2IMG.md)
|
||||
- [Inpainting Support](docs/features/INPAINTING.md)
|
||||
- [Outpainting Support](docs/features/OUTPAINTING.md)
|
||||
- [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
|
||||
- [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
|
||||
- [Google Colab](docs/features/OTHER.md#google-colab)
|
||||
@ -157,7 +158,7 @@ For older changelogs, please visit the **[CHANGELOG](docs/features/CHANGELOG.md)
|
||||
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation
|
||||
problems and other issues.
|
||||
|
||||
### Contributing
|
||||
# Contributing
|
||||
|
||||
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
|
||||
cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how
|
||||
|
@ -74,9 +74,10 @@ To retrieve a (series of) opt objects corresponding to the metadata, do this:
|
||||
opt_list = metadata_loads(metadata)
|
||||
|
||||
The metadata should be pulled out of the PNG image. pngwriter has a method
|
||||
retrieve_metadata that will do this.
|
||||
|
||||
retrieve_metadata that will do this, or you can do it in one swell foop
|
||||
with metadata_from_png():
|
||||
|
||||
opt_list = metadata_from_png('/path/to/image_file.png')
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@ -87,6 +88,7 @@ import hashlib
|
||||
import os
|
||||
import copy
|
||||
import base64
|
||||
import ldm.dream.pngwriter
|
||||
from ldm.dream.conditioning import split_weighted_subprompts
|
||||
|
||||
SAMPLER_CHOICES = [
|
||||
@ -208,10 +210,16 @@ class Args(object):
|
||||
# esrgan-specific parameters
|
||||
if a['upscale']:
|
||||
switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}')
|
||||
|
||||
# embiggen parameters
|
||||
if a['embiggen']:
|
||||
switches.append(f'--embiggen {" ".join([str(u) for u in a["embiggen"]])}')
|
||||
if a['embiggen_tiles']:
|
||||
switches.append(f'--embiggen_tiles {" ".join([str(u) for u in a["embiggen_tiles"]])}')
|
||||
|
||||
# outpainting parameters
|
||||
if a['out_direction']:
|
||||
switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}')
|
||||
if a['with_variations']:
|
||||
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
|
||||
switches.append(f'-V {formatted_variations}')
|
||||
@ -546,6 +554,14 @@ class Args(object):
|
||||
help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
|
||||
default=0.75,
|
||||
)
|
||||
img2img_group.add_argument(
|
||||
'-D',
|
||||
'--out_direction',
|
||||
nargs='+',
|
||||
type=str,
|
||||
metavar=('direction', 'pixels'),
|
||||
help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
|
||||
)
|
||||
postprocessing_group.add_argument(
|
||||
'-ft',
|
||||
'--facetool',
|
||||
@ -710,6 +726,15 @@ def metadata_dumps(opt,
|
||||
|
||||
return metadata
|
||||
|
||||
def metadata_from_png(png_file_path):
|
||||
'''
|
||||
Given the path to a PNG file created by dream.py, retrieves
|
||||
an Args object containing the image metadata
|
||||
'''
|
||||
meta = ldm.dream.pngwriter.retrieve_metadata(png_file_path)
|
||||
opts = metadata_loads(meta)
|
||||
return opts[0]
|
||||
|
||||
def metadata_loads(metadata):
|
||||
'''
|
||||
Takes the dictionary corresponding to RFC266 (https://github.com/lstein/stable-diffusion/issues/266)
|
||||
|
143
ldm/generate.py
143
ldm/generate.py
@ -27,8 +27,8 @@ from ldm.util import instantiate_from_config
|
||||
from ldm.models.diffusion.ddim import DDIMSampler
|
||||
from ldm.models.diffusion.plms import PLMSSampler
|
||||
from ldm.models.diffusion.ksampler import KSampler
|
||||
from ldm.dream.pngwriter import PngWriter, retrieve_metadata
|
||||
from ldm.dream.args import metadata_loads
|
||||
from ldm.dream.pngwriter import PngWriter
|
||||
from ldm.dream.args import metadata_from_png
|
||||
from ldm.dream.image_util import InitImageResizer
|
||||
from ldm.dream.devices import choose_torch_device, choose_precision
|
||||
from ldm.dream.conditioning import get_uc_and_c
|
||||
@ -276,8 +276,9 @@ class Generate:
|
||||
strength = None,
|
||||
init_color = None,
|
||||
# these are specific to embiggen (which also relies on img2img args)
|
||||
embiggen=None,
|
||||
embiggen_tiles=None,
|
||||
embiggen = None,
|
||||
embiggen_tiles = None,
|
||||
out_direction = None,
|
||||
# these are specific to GFPGAN/ESRGAN
|
||||
facetool = None,
|
||||
gfpgan_strength = 0,
|
||||
@ -388,9 +389,14 @@ class Generate:
|
||||
log_tokens =self.log_tokenization
|
||||
)
|
||||
|
||||
(init_image, mask_image) = self._make_images(
|
||||
init_img, init_mask, width, height, fit)
|
||||
|
||||
init_image,mask_image = self._make_images(
|
||||
init_img,
|
||||
init_mask,
|
||||
width,
|
||||
height,
|
||||
fit=fit,
|
||||
out_direction=out_direction,
|
||||
)
|
||||
if (init_image is not None) and (mask_image is not None):
|
||||
generator = self._make_inpaint()
|
||||
elif (embiggen != None or embiggen_tiles != None):
|
||||
@ -469,16 +475,17 @@ class Generate:
|
||||
)
|
||||
return results
|
||||
|
||||
# this needs to be generalized to all sorts of postprocessors, but for now
|
||||
# sufficient to support most use cases
|
||||
# this needs to be generalized to all sorts of postprocessors, which should be wrapped
|
||||
# in a nice harmonized call signature. For now we have a bunch of if/elses!
|
||||
def apply_postprocessor(
|
||||
self,
|
||||
image_path,
|
||||
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen'
|
||||
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen'
|
||||
gfpgan_strength = 0.0,
|
||||
codeformer_fidelity = 0.75,
|
||||
save_original = True, # to get new name
|
||||
upscale = None,
|
||||
out_direction = None,
|
||||
save_original = True, # to get new name
|
||||
callback = None,
|
||||
opt = None,
|
||||
):
|
||||
@ -489,8 +496,7 @@ class Generate:
|
||||
image_metadata = None
|
||||
prompt = None
|
||||
try:
|
||||
meta = retrieve_metadata(image_path)
|
||||
args = metadata_loads(meta)
|
||||
args = metadata_from_png(image_path)
|
||||
if len(args) > 1:
|
||||
print("* Can't postprocess a grid")
|
||||
return
|
||||
@ -556,22 +562,56 @@ class Generate:
|
||||
embiggen_tiles = opt.embiggen_tiles,
|
||||
image_callback = callback,
|
||||
)
|
||||
|
||||
elif tool == 'outpaint':
|
||||
oldargs = metadata_from_png(image_path)
|
||||
opt.strength = 0.83
|
||||
opt.init_img = image_path
|
||||
return self.prompt2image(
|
||||
oldargs.prompt,
|
||||
out_direction = opt.out_direction,
|
||||
sampler = self.sampler,
|
||||
steps = opt.steps,
|
||||
cfg_scale = opt.cfg_scale,
|
||||
ddim_eta = self.ddim_eta,
|
||||
conditioning= get_uc_and_c(
|
||||
oldargs.prompt, model =self.model,
|
||||
skip_normalize=opt.skip_normalize,
|
||||
log_tokens =opt.log_tokenization
|
||||
),
|
||||
width = opt.width,
|
||||
height = opt.height,
|
||||
init_img = image_path, # not the Image! (sigh)
|
||||
strength = opt.strength,
|
||||
image_callback = callback,
|
||||
)
|
||||
else:
|
||||
print(f'* postprocessing tool {tool} is not yet supported')
|
||||
return None
|
||||
|
||||
|
||||
def _make_images(self, img_path, mask_path, width, height, fit=False):
|
||||
init_image = None
|
||||
init_mask = None
|
||||
def _make_images(
|
||||
self,
|
||||
img_path,
|
||||
mask_path,
|
||||
width,
|
||||
height,
|
||||
fit=False,
|
||||
out_direction=None,
|
||||
):
|
||||
init_image = None
|
||||
init_mask = None
|
||||
if not img_path:
|
||||
return None, None
|
||||
|
||||
image = self._load_img(img_path, width, height,
|
||||
fit=fit) # this returns an Image
|
||||
# this returns a torch tensor
|
||||
init_image = self._create_init_image(image)
|
||||
image = self._load_img(
|
||||
img_path,
|
||||
width,
|
||||
height,
|
||||
fit=fit
|
||||
) # this returns an Image
|
||||
if out_direction:
|
||||
image = self._create_outpaint_image(image, out_direction)
|
||||
init_image = self._create_init_image(image) # this returns a torch tensor
|
||||
|
||||
# if image has a transparent area and no mask was provided, then try to generate mask
|
||||
if self._has_transparency(image) and not mask_path:
|
||||
@ -789,6 +829,7 @@ class Generate:
|
||||
return model
|
||||
|
||||
def _load_img(self, path, width, height, fit=False):
|
||||
print(f'DEBUG: path = {path}')
|
||||
assert os.path.exists(path), f'>> {path}: File not found'
|
||||
|
||||
# with Image.open(path) as img:
|
||||
@ -815,6 +856,66 @@ class Generate:
|
||||
image = 2.0 * image - 1.0
|
||||
return image.to(self.device)
|
||||
|
||||
# TODO: outpainting is a post-processing application and should be made to behave
|
||||
# like the other ones.
|
||||
def _create_outpaint_image(self, image, direction_args):
|
||||
assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
|
||||
|
||||
if len(direction_args) == 1:
|
||||
direction = direction_args[0]
|
||||
pixels = None
|
||||
elif len(direction_args) == 2:
|
||||
direction = direction_args[0]
|
||||
pixels = int(direction_args[1])
|
||||
|
||||
assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
|
||||
|
||||
image = image.convert("RGBA")
|
||||
# we always extend top, but rotate to extend along the requested side
|
||||
if direction == 'left':
|
||||
image = image.transpose(Image.Transpose.ROTATE_270)
|
||||
elif direction == 'bottom':
|
||||
image = image.transpose(Image.Transpose.ROTATE_180)
|
||||
elif direction == 'right':
|
||||
image = image.transpose(Image.Transpose.ROTATE_90)
|
||||
|
||||
pixels = image.height//2 if pixels is None else int(pixels)
|
||||
assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
|
||||
|
||||
# the top part of the image is taken from the source image mirrored
|
||||
# coordinates (0,0) are the upper left corner of an image
|
||||
top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
|
||||
top = top.crop((0, top.height - pixels, top.width, top.height))
|
||||
|
||||
# setting all alpha of the top part to 0
|
||||
alpha = top.getchannel("A")
|
||||
alpha.paste(0, (0, 0, top.width, top.height))
|
||||
top.putalpha(alpha)
|
||||
|
||||
# taking the bottom from the original image
|
||||
bottom = image.crop((0, 0, image.width, image.height - pixels))
|
||||
|
||||
new_img = image.copy()
|
||||
new_img.paste(top, (0, 0))
|
||||
new_img.paste(bottom, (0, pixels))
|
||||
|
||||
# create a 10% dither in the middle
|
||||
dither = min(image.height//10, pixels)
|
||||
for x in range(0, image.width, 2):
|
||||
for y in range(pixels - dither, pixels + dither):
|
||||
(r, g, b, a) = new_img.getpixel((x, y))
|
||||
new_img.putpixel((x, y), (r, g, b, 0))
|
||||
|
||||
# let's rotate back again
|
||||
if direction == 'left':
|
||||
new_img = new_img.transpose(Image.Transpose.ROTATE_90)
|
||||
elif direction == 'bottom':
|
||||
new_img = new_img.transpose(Image.Transpose.ROTATE_180)
|
||||
elif direction == 'right':
|
||||
new_img = new_img.transpose(Image.Transpose.ROTATE_270)
|
||||
|
||||
return new_img
|
||||
|
||||
def _create_init_mask(self, image):
|
||||
# convert into a black/white mask
|
||||
image = self._image_to_mask(image)
|
||||
|
@ -4,11 +4,12 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import shlex
|
||||
import copy
|
||||
import warnings
|
||||
import time
|
||||
import ldm.dream.readline
|
||||
from ldm.dream.args import Args, metadata_dumps
|
||||
from ldm.dream.args import Args, metadata_dumps, metadata_from_png
|
||||
from ldm.dream.pngwriter import PngWriter
|
||||
from ldm.dream.server import DreamServer, ThreadingDreamServer
|
||||
from ldm.dream.image_util import make_grid
|
||||
@ -166,6 +167,17 @@ def main_loop(gen, opt, infile):
|
||||
|
||||
if opt.parse_cmd(command) is None:
|
||||
continue
|
||||
|
||||
if opt.init_img:
|
||||
try:
|
||||
oldargs = metadata_from_png(opt.init_img)
|
||||
opt.prompt = oldargs.prompt
|
||||
print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}')
|
||||
except AttributeError:
|
||||
pass
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if len(opt.prompt) == 0:
|
||||
print('\nTry again with a prompt!')
|
||||
continue
|
||||
@ -197,7 +209,9 @@ def main_loop(gen, opt, infile):
|
||||
opt.seed = None
|
||||
continue
|
||||
|
||||
# TODO - move this into a module
|
||||
if opt.strength is None:
|
||||
opt.strength = 0.75 if opt.out_direction is None else 0.83
|
||||
|
||||
if opt.with_variations is not None:
|
||||
# shotgun parsing, woo
|
||||
parts = []
|
||||
@ -347,7 +361,15 @@ def do_postprocess (gen, opt, callback):
|
||||
print(f'* file {file_path} does not exist')
|
||||
return
|
||||
|
||||
tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale')
|
||||
tool=None
|
||||
if opt.gfpgan_strength > 0:
|
||||
tool = opt.facetool
|
||||
elif opt.embiggen:
|
||||
tool = 'embiggen'
|
||||
elif opt.upscale:
|
||||
tool = 'upscale'
|
||||
elif opt.out_direction:
|
||||
tool = 'outpaint'
|
||||
opt.save_original = True # do not overwrite old image!
|
||||
return gen.apply_postprocessor(
|
||||
image_path = opt.prompt,
|
||||
@ -356,6 +378,7 @@ def do_postprocess (gen, opt, callback):
|
||||
codeformer_fidelity = opt.codeformer_fidelity,
|
||||
save_original = opt.save_original,
|
||||
upscale = opt.upscale,
|
||||
out_direction = opt.out_direction,
|
||||
callback = callback,
|
||||
opt = opt,
|
||||
)
|
||||
@ -415,5 +438,16 @@ def dream_server_loop(gen, host, port, outdir, gfpgan):
|
||||
|
||||
dream_server.server_close()
|
||||
|
||||
def write_log_message(results, log_path):
|
||||
"""logs the name of the output image, prompt, and prompt args to the terminal and log file"""
|
||||
global output_cntr
|
||||
log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
|
||||
for l in log_lines:
|
||||
output_cntr += 1
|
||||
print(f'[{output_cntr}] {l}',end='')
|
||||
|
||||
with open(log_path, 'a', encoding='utf-8') as file:
|
||||
file.writelines(log_lines)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user