mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Add outpainting functionality.
- Adapted from PR #489, author Dominic Letz [https://github.com/dominicletz] - Too many upstream changes to merge, so frankensteined it in. - Added support for !fix syntax - Added documentation
This commit is contained in:
commit
2cf294e6de
@ -98,6 +98,7 @@ you can try starting `dream.py` with the `--precision=float32` flag:
|
|||||||
- [Interactive Command Line Interface](docs/features/CLI.md)
|
- [Interactive Command Line Interface](docs/features/CLI.md)
|
||||||
- [Image To Image](docs/features/IMG2IMG.md)
|
- [Image To Image](docs/features/IMG2IMG.md)
|
||||||
- [Inpainting Support](docs/features/INPAINTING.md)
|
- [Inpainting Support](docs/features/INPAINTING.md)
|
||||||
|
- [Outpainting Support](docs/features/OUTPAINTING.md)
|
||||||
- [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
|
- [GFPGAN and Real-ESRGAN Support](docs/features/UPSCALE.md)
|
||||||
- [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
|
- [Seamless Tiling](docs/features/OTHER.md#seamless-tiling)
|
||||||
- [Google Colab](docs/features/OTHER.md#google-colab)
|
- [Google Colab](docs/features/OTHER.md#google-colab)
|
||||||
@ -157,7 +158,7 @@ For older changelogs, please visit the **[CHANGELOG](docs/features/CHANGELOG.md)
|
|||||||
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation
|
Please check out our **[Q&A](docs/help/TROUBLESHOOT.md)** to get solutions for common installation
|
||||||
problems and other issues.
|
problems and other issues.
|
||||||
|
|
||||||
### Contributing
|
# Contributing
|
||||||
|
|
||||||
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
|
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
|
||||||
cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how
|
cleanup, testing, or code reviews, is very much encouraged to do so. If you are unfamiliar with how
|
||||||
|
@ -74,9 +74,10 @@ To retrieve a (series of) opt objects corresponding to the metadata, do this:
|
|||||||
opt_list = metadata_loads(metadata)
|
opt_list = metadata_loads(metadata)
|
||||||
|
|
||||||
The metadata should be pulled out of the PNG image. pngwriter has a method
|
The metadata should be pulled out of the PNG image. pngwriter has a method
|
||||||
retrieve_metadata that will do this.
|
retrieve_metadata that will do this, or you can do it in one swell foop
|
||||||
|
with metadata_from_png():
|
||||||
|
|
||||||
|
opt_list = metadata_from_png('/path/to/image_file.png')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@ -87,6 +88,7 @@ import hashlib
|
|||||||
import os
|
import os
|
||||||
import copy
|
import copy
|
||||||
import base64
|
import base64
|
||||||
|
import ldm.dream.pngwriter
|
||||||
from ldm.dream.conditioning import split_weighted_subprompts
|
from ldm.dream.conditioning import split_weighted_subprompts
|
||||||
|
|
||||||
SAMPLER_CHOICES = [
|
SAMPLER_CHOICES = [
|
||||||
@ -208,10 +210,16 @@ class Args(object):
|
|||||||
# esrgan-specific parameters
|
# esrgan-specific parameters
|
||||||
if a['upscale']:
|
if a['upscale']:
|
||||||
switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}')
|
switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}')
|
||||||
|
|
||||||
|
# embiggen parameters
|
||||||
if a['embiggen']:
|
if a['embiggen']:
|
||||||
switches.append(f'--embiggen {" ".join([str(u) for u in a["embiggen"]])}')
|
switches.append(f'--embiggen {" ".join([str(u) for u in a["embiggen"]])}')
|
||||||
if a['embiggen_tiles']:
|
if a['embiggen_tiles']:
|
||||||
switches.append(f'--embiggen_tiles {" ".join([str(u) for u in a["embiggen_tiles"]])}')
|
switches.append(f'--embiggen_tiles {" ".join([str(u) for u in a["embiggen_tiles"]])}')
|
||||||
|
|
||||||
|
# outpainting parameters
|
||||||
|
if a['out_direction']:
|
||||||
|
switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}')
|
||||||
if a['with_variations']:
|
if a['with_variations']:
|
||||||
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
|
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
|
||||||
switches.append(f'-V {formatted_variations}')
|
switches.append(f'-V {formatted_variations}')
|
||||||
@ -546,6 +554,14 @@ class Args(object):
|
|||||||
help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
|
help='Strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely',
|
||||||
default=0.75,
|
default=0.75,
|
||||||
)
|
)
|
||||||
|
img2img_group.add_argument(
|
||||||
|
'-D',
|
||||||
|
'--out_direction',
|
||||||
|
nargs='+',
|
||||||
|
type=str,
|
||||||
|
metavar=('direction', 'pixels'),
|
||||||
|
help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
|
||||||
|
)
|
||||||
postprocessing_group.add_argument(
|
postprocessing_group.add_argument(
|
||||||
'-ft',
|
'-ft',
|
||||||
'--facetool',
|
'--facetool',
|
||||||
@ -710,6 +726,15 @@ def metadata_dumps(opt,
|
|||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
def metadata_from_png(png_file_path):
|
||||||
|
'''
|
||||||
|
Given the path to a PNG file created by dream.py, retrieves
|
||||||
|
an Args object containing the image metadata
|
||||||
|
'''
|
||||||
|
meta = ldm.dream.pngwriter.retrieve_metadata(png_file_path)
|
||||||
|
opts = metadata_loads(meta)
|
||||||
|
return opts[0]
|
||||||
|
|
||||||
def metadata_loads(metadata):
|
def metadata_loads(metadata):
|
||||||
'''
|
'''
|
||||||
Takes the dictionary corresponding to RFC266 (https://github.com/lstein/stable-diffusion/issues/266)
|
Takes the dictionary corresponding to RFC266 (https://github.com/lstein/stable-diffusion/issues/266)
|
||||||
|
139
ldm/generate.py
139
ldm/generate.py
@ -27,8 +27,8 @@ from ldm.util import instantiate_from_config
|
|||||||
from ldm.models.diffusion.ddim import DDIMSampler
|
from ldm.models.diffusion.ddim import DDIMSampler
|
||||||
from ldm.models.diffusion.plms import PLMSSampler
|
from ldm.models.diffusion.plms import PLMSSampler
|
||||||
from ldm.models.diffusion.ksampler import KSampler
|
from ldm.models.diffusion.ksampler import KSampler
|
||||||
from ldm.dream.pngwriter import PngWriter, retrieve_metadata
|
from ldm.dream.pngwriter import PngWriter
|
||||||
from ldm.dream.args import metadata_loads
|
from ldm.dream.args import metadata_from_png
|
||||||
from ldm.dream.image_util import InitImageResizer
|
from ldm.dream.image_util import InitImageResizer
|
||||||
from ldm.dream.devices import choose_torch_device, choose_precision
|
from ldm.dream.devices import choose_torch_device, choose_precision
|
||||||
from ldm.dream.conditioning import get_uc_and_c
|
from ldm.dream.conditioning import get_uc_and_c
|
||||||
@ -276,8 +276,9 @@ class Generate:
|
|||||||
strength = None,
|
strength = None,
|
||||||
init_color = None,
|
init_color = None,
|
||||||
# these are specific to embiggen (which also relies on img2img args)
|
# these are specific to embiggen (which also relies on img2img args)
|
||||||
embiggen=None,
|
embiggen = None,
|
||||||
embiggen_tiles=None,
|
embiggen_tiles = None,
|
||||||
|
out_direction = None,
|
||||||
# these are specific to GFPGAN/ESRGAN
|
# these are specific to GFPGAN/ESRGAN
|
||||||
facetool = None,
|
facetool = None,
|
||||||
gfpgan_strength = 0,
|
gfpgan_strength = 0,
|
||||||
@ -388,9 +389,14 @@ class Generate:
|
|||||||
log_tokens =self.log_tokenization
|
log_tokens =self.log_tokenization
|
||||||
)
|
)
|
||||||
|
|
||||||
(init_image, mask_image) = self._make_images(
|
init_image,mask_image = self._make_images(
|
||||||
init_img, init_mask, width, height, fit)
|
init_img,
|
||||||
|
init_mask,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
fit=fit,
|
||||||
|
out_direction=out_direction,
|
||||||
|
)
|
||||||
if (init_image is not None) and (mask_image is not None):
|
if (init_image is not None) and (mask_image is not None):
|
||||||
generator = self._make_inpaint()
|
generator = self._make_inpaint()
|
||||||
elif (embiggen != None or embiggen_tiles != None):
|
elif (embiggen != None or embiggen_tiles != None):
|
||||||
@ -469,16 +475,17 @@ class Generate:
|
|||||||
)
|
)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# this needs to be generalized to all sorts of postprocessors, but for now
|
# this needs to be generalized to all sorts of postprocessors, which should be wrapped
|
||||||
# sufficient to support most use cases
|
# in a nice harmonized call signature. For now we have a bunch of if/elses!
|
||||||
def apply_postprocessor(
|
def apply_postprocessor(
|
||||||
self,
|
self,
|
||||||
image_path,
|
image_path,
|
||||||
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen'
|
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen'
|
||||||
gfpgan_strength = 0.0,
|
gfpgan_strength = 0.0,
|
||||||
codeformer_fidelity = 0.75,
|
codeformer_fidelity = 0.75,
|
||||||
save_original = True, # to get new name
|
|
||||||
upscale = None,
|
upscale = None,
|
||||||
|
out_direction = None,
|
||||||
|
save_original = True, # to get new name
|
||||||
callback = None,
|
callback = None,
|
||||||
opt = None,
|
opt = None,
|
||||||
):
|
):
|
||||||
@ -489,8 +496,7 @@ class Generate:
|
|||||||
image_metadata = None
|
image_metadata = None
|
||||||
prompt = None
|
prompt = None
|
||||||
try:
|
try:
|
||||||
meta = retrieve_metadata(image_path)
|
args = metadata_from_png(image_path)
|
||||||
args = metadata_loads(meta)
|
|
||||||
if len(args) > 1:
|
if len(args) > 1:
|
||||||
print("* Can't postprocess a grid")
|
print("* Can't postprocess a grid")
|
||||||
return
|
return
|
||||||
@ -556,22 +562,56 @@ class Generate:
|
|||||||
embiggen_tiles = opt.embiggen_tiles,
|
embiggen_tiles = opt.embiggen_tiles,
|
||||||
image_callback = callback,
|
image_callback = callback,
|
||||||
)
|
)
|
||||||
|
elif tool == 'outpaint':
|
||||||
|
oldargs = metadata_from_png(image_path)
|
||||||
|
opt.strength = 0.83
|
||||||
|
opt.init_img = image_path
|
||||||
|
return self.prompt2image(
|
||||||
|
oldargs.prompt,
|
||||||
|
out_direction = opt.out_direction,
|
||||||
|
sampler = self.sampler,
|
||||||
|
steps = opt.steps,
|
||||||
|
cfg_scale = opt.cfg_scale,
|
||||||
|
ddim_eta = self.ddim_eta,
|
||||||
|
conditioning= get_uc_and_c(
|
||||||
|
oldargs.prompt, model =self.model,
|
||||||
|
skip_normalize=opt.skip_normalize,
|
||||||
|
log_tokens =opt.log_tokenization
|
||||||
|
),
|
||||||
|
width = opt.width,
|
||||||
|
height = opt.height,
|
||||||
|
init_img = image_path, # not the Image! (sigh)
|
||||||
|
strength = opt.strength,
|
||||||
|
image_callback = callback,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print(f'* postprocessing tool {tool} is not yet supported')
|
print(f'* postprocessing tool {tool} is not yet supported')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _make_images(self, img_path, mask_path, width, height, fit=False):
|
def _make_images(
|
||||||
|
self,
|
||||||
|
img_path,
|
||||||
|
mask_path,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
fit=False,
|
||||||
|
out_direction=None,
|
||||||
|
):
|
||||||
init_image = None
|
init_image = None
|
||||||
init_mask = None
|
init_mask = None
|
||||||
if not img_path:
|
if not img_path:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
image = self._load_img(img_path, width, height,
|
image = self._load_img(
|
||||||
fit=fit) # this returns an Image
|
img_path,
|
||||||
# this returns a torch tensor
|
width,
|
||||||
init_image = self._create_init_image(image)
|
height,
|
||||||
|
fit=fit
|
||||||
|
) # this returns an Image
|
||||||
|
if out_direction:
|
||||||
|
image = self._create_outpaint_image(image, out_direction)
|
||||||
|
init_image = self._create_init_image(image) # this returns a torch tensor
|
||||||
|
|
||||||
# if image has a transparent area and no mask was provided, then try to generate mask
|
# if image has a transparent area and no mask was provided, then try to generate mask
|
||||||
if self._has_transparency(image) and not mask_path:
|
if self._has_transparency(image) and not mask_path:
|
||||||
@ -789,6 +829,7 @@ class Generate:
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
def _load_img(self, path, width, height, fit=False):
|
def _load_img(self, path, width, height, fit=False):
|
||||||
|
print(f'DEBUG: path = {path}')
|
||||||
assert os.path.exists(path), f'>> {path}: File not found'
|
assert os.path.exists(path), f'>> {path}: File not found'
|
||||||
|
|
||||||
# with Image.open(path) as img:
|
# with Image.open(path) as img:
|
||||||
@ -815,6 +856,66 @@ class Generate:
|
|||||||
image = 2.0 * image - 1.0
|
image = 2.0 * image - 1.0
|
||||||
return image.to(self.device)
|
return image.to(self.device)
|
||||||
|
|
||||||
|
# TODO: outpainting is a post-processing application and should be made to behave
|
||||||
|
# like the other ones.
|
||||||
|
def _create_outpaint_image(self, image, direction_args):
|
||||||
|
assert len(direction_args) in [1, 2], 'Direction (-D) must have exactly one or two arguments.'
|
||||||
|
|
||||||
|
if len(direction_args) == 1:
|
||||||
|
direction = direction_args[0]
|
||||||
|
pixels = None
|
||||||
|
elif len(direction_args) == 2:
|
||||||
|
direction = direction_args[0]
|
||||||
|
pixels = int(direction_args[1])
|
||||||
|
|
||||||
|
assert direction in ['top', 'left', 'bottom', 'right'], 'Direction (-D) must be one of "top", "left", "bottom", "right"'
|
||||||
|
|
||||||
|
image = image.convert("RGBA")
|
||||||
|
# we always extend top, but rotate to extend along the requested side
|
||||||
|
if direction == 'left':
|
||||||
|
image = image.transpose(Image.Transpose.ROTATE_270)
|
||||||
|
elif direction == 'bottom':
|
||||||
|
image = image.transpose(Image.Transpose.ROTATE_180)
|
||||||
|
elif direction == 'right':
|
||||||
|
image = image.transpose(Image.Transpose.ROTATE_90)
|
||||||
|
|
||||||
|
pixels = image.height//2 if pixels is None else int(pixels)
|
||||||
|
assert 0 < pixels < image.height, 'Direction (-D) pixels length must be in the range 0 - image.size'
|
||||||
|
|
||||||
|
# the top part of the image is taken from the source image mirrored
|
||||||
|
# coordinates (0,0) are the upper left corner of an image
|
||||||
|
top = image.transpose(Image.Transpose.FLIP_TOP_BOTTOM).convert("RGBA")
|
||||||
|
top = top.crop((0, top.height - pixels, top.width, top.height))
|
||||||
|
|
||||||
|
# setting all alpha of the top part to 0
|
||||||
|
alpha = top.getchannel("A")
|
||||||
|
alpha.paste(0, (0, 0, top.width, top.height))
|
||||||
|
top.putalpha(alpha)
|
||||||
|
|
||||||
|
# taking the bottom from the original image
|
||||||
|
bottom = image.crop((0, 0, image.width, image.height - pixels))
|
||||||
|
|
||||||
|
new_img = image.copy()
|
||||||
|
new_img.paste(top, (0, 0))
|
||||||
|
new_img.paste(bottom, (0, pixels))
|
||||||
|
|
||||||
|
# create a 10% dither in the middle
|
||||||
|
dither = min(image.height//10, pixels)
|
||||||
|
for x in range(0, image.width, 2):
|
||||||
|
for y in range(pixels - dither, pixels + dither):
|
||||||
|
(r, g, b, a) = new_img.getpixel((x, y))
|
||||||
|
new_img.putpixel((x, y), (r, g, b, 0))
|
||||||
|
|
||||||
|
# let's rotate back again
|
||||||
|
if direction == 'left':
|
||||||
|
new_img = new_img.transpose(Image.Transpose.ROTATE_90)
|
||||||
|
elif direction == 'bottom':
|
||||||
|
new_img = new_img.transpose(Image.Transpose.ROTATE_180)
|
||||||
|
elif direction == 'right':
|
||||||
|
new_img = new_img.transpose(Image.Transpose.ROTATE_270)
|
||||||
|
|
||||||
|
return new_img
|
||||||
|
|
||||||
def _create_init_mask(self, image):
|
def _create_init_mask(self, image):
|
||||||
# convert into a black/white mask
|
# convert into a black/white mask
|
||||||
image = self._image_to_mask(image)
|
image = self._image_to_mask(image)
|
||||||
|
@ -4,11 +4,12 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import shlex
|
||||||
import copy
|
import copy
|
||||||
import warnings
|
import warnings
|
||||||
import time
|
import time
|
||||||
import ldm.dream.readline
|
import ldm.dream.readline
|
||||||
from ldm.dream.args import Args, metadata_dumps
|
from ldm.dream.args import Args, metadata_dumps, metadata_from_png
|
||||||
from ldm.dream.pngwriter import PngWriter
|
from ldm.dream.pngwriter import PngWriter
|
||||||
from ldm.dream.server import DreamServer, ThreadingDreamServer
|
from ldm.dream.server import DreamServer, ThreadingDreamServer
|
||||||
from ldm.dream.image_util import make_grid
|
from ldm.dream.image_util import make_grid
|
||||||
@ -166,6 +167,17 @@ def main_loop(gen, opt, infile):
|
|||||||
|
|
||||||
if opt.parse_cmd(command) is None:
|
if opt.parse_cmd(command) is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if opt.init_img:
|
||||||
|
try:
|
||||||
|
oldargs = metadata_from_png(opt.init_img)
|
||||||
|
opt.prompt = oldargs.prompt
|
||||||
|
print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}')
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
if len(opt.prompt) == 0:
|
if len(opt.prompt) == 0:
|
||||||
print('\nTry again with a prompt!')
|
print('\nTry again with a prompt!')
|
||||||
continue
|
continue
|
||||||
@ -197,7 +209,9 @@ def main_loop(gen, opt, infile):
|
|||||||
opt.seed = None
|
opt.seed = None
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# TODO - move this into a module
|
if opt.strength is None:
|
||||||
|
opt.strength = 0.75 if opt.out_direction is None else 0.83
|
||||||
|
|
||||||
if opt.with_variations is not None:
|
if opt.with_variations is not None:
|
||||||
# shotgun parsing, woo
|
# shotgun parsing, woo
|
||||||
parts = []
|
parts = []
|
||||||
@ -347,7 +361,15 @@ def do_postprocess (gen, opt, callback):
|
|||||||
print(f'* file {file_path} does not exist')
|
print(f'* file {file_path} does not exist')
|
||||||
return
|
return
|
||||||
|
|
||||||
tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale')
|
tool=None
|
||||||
|
if opt.gfpgan_strength > 0:
|
||||||
|
tool = opt.facetool
|
||||||
|
elif opt.embiggen:
|
||||||
|
tool = 'embiggen'
|
||||||
|
elif opt.upscale:
|
||||||
|
tool = 'upscale'
|
||||||
|
elif opt.out_direction:
|
||||||
|
tool = 'outpaint'
|
||||||
opt.save_original = True # do not overwrite old image!
|
opt.save_original = True # do not overwrite old image!
|
||||||
return gen.apply_postprocessor(
|
return gen.apply_postprocessor(
|
||||||
image_path = opt.prompt,
|
image_path = opt.prompt,
|
||||||
@ -356,6 +378,7 @@ def do_postprocess (gen, opt, callback):
|
|||||||
codeformer_fidelity = opt.codeformer_fidelity,
|
codeformer_fidelity = opt.codeformer_fidelity,
|
||||||
save_original = opt.save_original,
|
save_original = opt.save_original,
|
||||||
upscale = opt.upscale,
|
upscale = opt.upscale,
|
||||||
|
out_direction = opt.out_direction,
|
||||||
callback = callback,
|
callback = callback,
|
||||||
opt = opt,
|
opt = opt,
|
||||||
)
|
)
|
||||||
@ -415,5 +438,16 @@ def dream_server_loop(gen, host, port, outdir, gfpgan):
|
|||||||
|
|
||||||
dream_server.server_close()
|
dream_server.server_close()
|
||||||
|
|
||||||
|
def write_log_message(results, log_path):
|
||||||
|
"""logs the name of the output image, prompt, and prompt args to the terminal and log file"""
|
||||||
|
global output_cntr
|
||||||
|
log_lines = [f'{path}: {prompt}\n' for path, prompt in results]
|
||||||
|
for l in log_lines:
|
||||||
|
output_cntr += 1
|
||||||
|
print(f'[{output_cntr}] {l}',end='')
|
||||||
|
|
||||||
|
with open(log_path, 'a', encoding='utf-8') as file:
|
||||||
|
file.writelines(log_lines)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user