add ability to post-process images from the CLI

- supports gfpgan, esrgan, codeformer and embiggen
- To use:
   dream> !fix ./outputs/img-samples/000056.292144555.png -ft gfpgan -U2 -G0.8
   dream> !fix ./outputs/img-samples/000056.292144555.png -ft codeformer -G 0.8
   dream> !fix ./outputs/img-samples/000056.29214455.png  -U4
   dream> !fix ./outputs/img-samples/000056.292144555.png -embiggen 1.5

   The first example invokes gfpgan to fix faces and esrgan to upscale.
   The second example invokes codeformer to fix faces, no upscaling
   The third example uses esrgan to upscale 4X
   The four example runs embiggen to enlarge 1.5X

- This is very preliminary work. There are some anomalies to note:
  1. The syntax is non-obvious. I would prefer something like:
     !fix esrgan,gfpgan
     !fix esrgan
     !fix embiggen,codeformer

     However, this will require refactoring the gfpgan and embiggen
     code.

   2. Images generated using gfpgan, esrgan or codeformer all are named
      "xxxxxx.xxxxxx.postprocessed.png" and the original is saved.
      However, the prefix is a new one that is not related to the
      original.

   3. Images generated using embiggen are named "xxxxx.xxxxxxx.png",
      and once again the prefix is new. I'm not sure whether the
      prefix should be aligned with the original file's prefix or not.
      Probably not, but opinions welcome.
This commit is contained in:
Lincoln Stein 2022-09-18 17:26:09 -04:00
parent 1e8e5245eb
commit fccf809e3a
5 changed files with 148 additions and 16 deletions

View File

@ -397,7 +397,10 @@ class Args(object):
# This creates the parser that processes commands on the dream> command line # This creates the parser that processes commands on the dream> command line
def _create_dream_cmd_parser(self): def _create_dream_cmd_parser(self):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12' description="""
Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4
Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer
"""
) )
render_group = parser.add_argument_group('General rendering') render_group = parser.add_argument_group('General rendering')
img2img_group = parser.add_argument_group('Image-to-image and inpainting') img2img_group = parser.add_argument_group('Image-to-image and inpainting')
@ -520,6 +523,7 @@ class Args(object):
'-ft', '-ft',
'--facetool', '--facetool',
type=str, type=str,
default='gfpgan',
help='Select the face restoration AI to use: gfpgan, codeformer', help='Select the face restoration AI to use: gfpgan, codeformer',
) )
postprocessing_group.add_argument( postprocessing_group.add_argument(
@ -527,7 +531,7 @@ class Args(object):
'--gfpgan_strength', '--gfpgan_strength',
type=float, type=float,
help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.', help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.',
default=0, default=0.0,
) )
postprocessing_group.add_argument( postprocessing_group.add_argument(
'-cf', '-cf',
@ -674,7 +678,9 @@ def metadata_loads(metadata):
images = metadata['sd-metadata']['images'] images = metadata['sd-metadata']['images']
for image in images: for image in images:
# repack the prompt and variations # repack the prompt and variations
if 'prompt' in image:
image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']]) image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']])
if 'variations' in image:
image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']]) image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']])
# fix a bit of semantic drift here # fix a bit of semantic drift here
image['sampler_name']=image.pop('sampler') image['sampler_name']=image.pop('sampler')

View File

@ -26,8 +26,8 @@ class Completer:
'--init_color')): '--init_color')):
return self._path_completions(text, state, ('.png','.jpg','.jpeg')) return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
if buffer.strip().endswith('cd') or text.startswith(('.', '/')): if buffer.strip().endswith('pp') or text.startswith(('.', '/')):
return self._path_completions(text, state, ()) return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
response = None response = None
if state == 0: if state == 0:

View File

@ -27,7 +27,8 @@ from ldm.util import instantiate_from_config
from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.plms import PLMSSampler
from ldm.models.diffusion.ksampler import KSampler from ldm.models.diffusion.ksampler import KSampler
from ldm.dream.pngwriter import PngWriter from ldm.dream.pngwriter import PngWriter, retrieve_metadata
from ldm.dream.args import metadata_loads
from ldm.dream.image_util import InitImageResizer from ldm.dream.image_util import InitImageResizer
from ldm.dream.devices import choose_torch_device from ldm.dream.devices import choose_torch_device
from ldm.dream.conditioning import get_uc_and_c from ldm.dream.conditioning import get_uc_and_c
@ -284,6 +285,7 @@ class Generate:
strength = strength or self.strength strength = strength or self.strength
self.seed = seed self.seed = seed
self.log_tokenization = log_tokenization self.log_tokenization = log_tokenization
self.step_callback = step_callback
with_variations = [] if with_variations is None else with_variations with_variations = [] if with_variations is None else with_variations
# will instantiate the model or return it from cache # will instantiate the model or return it from cache
@ -412,6 +414,97 @@ class Generate:
) )
return results return results
# this needs to be generalized to all sorts of postprocessors, but for now
# sufficient to support most use cases
def apply_postprocessor(
self,
image_path,
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen'
gfpgan_strength = 0.0,
codeformer_fidelity = 0.75,
save_original = True, # to get new name
upscale = None,
callback = None,
opt = None,
):
# retrieve the seed from the image;
# note that we will try both the new way and the old way, since not all files have the
# metadata (yet)
seed = None
image_metadata = None
prompt = None
try:
meta = retrieve_metadata(image_path)
args = metadata_loads(meta)
if len(args) > 1:
print("* Can't postprocess a grid")
return
seed = args[0].seed
prompt = args[0].prompt
print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
except:
m = re.search('(\d+)\.png$',image_path)
if m:
seed = m.group(1)
if not seed:
print('* Could not recover seed for image. Replacing with 42. This will not affect image quality')
seed = 42
# face fixers and esrgan take an Image, but embiggen takes a path
image = Image.open(image_path)
# Note that we need to adopt a uniform API for the postprocessors.
# This is completely ad hoc ATCM
if tool in ('gfpgan','codeformer','upscale'):
if tool == 'gfpgan':
facetool = 'gfpgan'
elif tool == 'codeformer':
facetool = 'codeformer'
elif tool == 'upscale':
facetool = 'gfpgan' # but won't be run
gfpgan_strength = 0
return self.upscale_and_reconstruct(
[[image,seed]],
facetool = facetool,
strength = gfpgan_strength,
codeformer_fidelity = codeformer_fidelity,
save_original = save_original,
upscale = upscale,
image_callback = callback,
)
elif tool == 'embiggen':
# fetch the metadata from the image
generator = self._make_embiggen()
uc, c = get_uc_and_c(
prompt, model =self.model,
skip_normalize=opt.skip_normalize,
log_tokens =opt.log_tokenization
)
# embiggen takes a image path (sigh)
generator.generate(
prompt,
sampler = self.sampler,
steps = opt.steps,
cfg_scale = opt.cfg_scale,
ddim_eta = self.ddim_eta,
conditioning= (uc, c),
init_img = image_path, # not the Image! (sigh)
init_image = image, # embiggen wants both! (sigh)
strength = opt.strength,
width = opt.width,
height = opt.height,
embiggen = opt.embiggen,
embiggen_tiles = opt.embiggen_tiles,
image_callback = callback,
)
else:
print(f'* postprocessing tool {tool} is not yet supported')
return None
def _make_images(self, img_path, mask_path, width, height, fit=False): def _make_images(self, img_path, mask_path, width, height, fit=False):
init_image = None init_image = None
init_mask = None init_mask = None

View File

@ -111,6 +111,8 @@ def main_loop(gen, opt, infile):
name_max = 255 name_max = 255
while not done: while not done:
operation = 'generate' # default operation, alternative is 'postprocess'
try: try:
command = get_next_command(infile) command = get_next_command(infile)
except EOFError: except EOFError:
@ -131,7 +133,13 @@ def main_loop(gen, opt, infile):
if command.startswith( if command.startswith(
'!dream' '!dream'
): # in case a stored prompt still contains the !dream command ): # in case a stored prompt still contains the !dream command
command.replace('!dream','',1) command = command.replace('!dream ','',1)
if command.startswith(
'!fix'
):
command = command.replace('!fix ','',1)
operation = 'postprocess'
if opt.parse_cmd(command) is None: if opt.parse_cmd(command) is None:
continue continue
@ -145,7 +153,7 @@ def main_loop(gen, opt, infile):
if not opt.height: if not opt.height:
opt.height = model_config.height opt.height = model_config.height
# retrieve previous value! # retrieve previous value of init image if requested
if opt.init_img is not None and re.match('^-\\d+$', opt.init_img): if opt.init_img is not None and re.match('^-\\d+$', opt.init_img):
try: try:
opt.init_img = last_results[int(opt.init_img)][0] opt.init_img = last_results[int(opt.init_img)][0]
@ -156,7 +164,8 @@ def main_loop(gen, opt, infile):
opt.init_img = None opt.init_img = None
continue continue
if opt.seed is not None and opt.seed < 0: # retrieve previous value! # retrieve previous valueof seed if requested
if opt.seed is not None and opt.seed < 0:
try: try:
opt.seed = last_results[opt.seed][1] opt.seed = last_results[opt.seed][1]
print(f'>> Reusing previous seed {opt.seed}') print(f'>> Reusing previous seed {opt.seed}')
@ -255,12 +264,16 @@ def main_loop(gen, opt, infile):
results.append([path, formatted_dream_prompt]) results.append([path, formatted_dream_prompt])
last_results.append([path, seed]) last_results.append([path, seed])
if operation == 'generate':
catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts
gen.prompt2image( gen.prompt2image(
image_callback=image_writer, image_callback=image_writer,
catch_interrupts=catch_ctrl_c, catch_interrupts=catch_ctrl_c,
**vars(opt) **vars(opt)
) )
elif operation == 'postprocess':
print(f'>> fixing {opt.prompt}')
do_postprocess(gen,opt,image_writer)
if opt.grid and len(grid_images) > 0: if opt.grid and len(grid_images) > 0:
grid_img = make_grid(list(grid_images.values())) grid_img = make_grid(list(grid_images.values()))
@ -298,6 +311,26 @@ def main_loop(gen, opt, infile):
print('goodbye!') print('goodbye!')
def do_postprocess (gen, opt, callback):
file_path = opt.prompt # treat the prompt as the file pathname
if os.path.dirname(file_path) == '': #basename given
file_path = os.path.join(opt.outdir,file_path)
if not os.path.exists(file_path):
print(f'* file {file_path} does not exist')
return
tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale')
opt.save_original = True # do not overwrite old image!
return gen.apply_postprocessor(
image_path = opt.prompt,
tool = tool,
gfpgan_strength = opt.gfpgan_strength,
codeformer_fidelity = opt.codeformer_fidelity,
save_original = opt.save_original,
upscale = opt.upscale,
callback = callback,
opt = opt,
)
def get_next_command(infile=None) -> str: # command string def get_next_command(infile=None) -> str: # command string
if infile is None: if infile is None:

0
scripts/sd-metadata.py Normal file → Executable file
View File