add ability to post-process images from the CLI

- supports gfpgan, esrgan, codeformer and embiggen - To use: dream> !fix ./outputs/img-samples/000056.292144555.png -ft gfpgan -U2 -G0.8 dream> !fix ./outputs/img-samples/000056.292144555.png -ft codeformer -G 0.8 dream> !fix ./outputs/img-samples/000056.29214455.png -U4 dream> !fix ./outputs/img-samples/000056.292144555.png -embiggen 1.5 The first example invokes gfpgan to fix faces and esrgan to upscale. The second example invokes codeformer to fix faces, no upscaling The third example uses esrgan to upscale 4X The four example runs embiggen to enlarge 1.5X - This is very preliminary work. There are some anomalies to note: 1. The syntax is non-obvious. I would prefer something like: !fix esrgan,gfpgan !fix esrgan !fix embiggen,codeformer However, this will require refactoring the gfpgan and embiggen code. 2. Images generated using gfpgan, esrgan or codeformer all are named "xxxxxx.xxxxxx.postprocessed.png" and the original is saved. However, the prefix is a new one that is not related to the original. 3. Images generated using embiggen are named "xxxxx.xxxxxxx.png", and once again the prefix is new. I'm not sure whether the prefix should be aligned with the original file's prefix or not. Probably not, but opinions welcome.
2024-08-30 20:32:17 +00:00 · 2022-09-18 17:26:09 -04:00
parent 1b0d6a9bdb
commit e8bb39370c
5 changed files with 148 additions and 16 deletions
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -419,7 +419,10 @@ class Args(object):
    # This creates the parser that processes commands on the dream> command line
    def _create_dream_cmd_parser(self):
        parser = argparse.ArgumentParser(
-            description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12'
+            description="""
+            Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4
+            Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer
+            """
        )
        render_group     = parser.add_argument_group('General rendering')
        img2img_group    = parser.add_argument_group('Image-to-image and inpainting')
@ -542,6 +545,7 @@ class Args(object):
            '-ft',
            '--facetool',
            type=str,
+            default='gfpgan',
            help='Select the face restoration AI to use: gfpgan, codeformer',
        )
        postprocessing_group.add_argument(
@ -549,7 +553,7 @@ class Args(object):
            '--gfpgan_strength',
            type=float,
            help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.',
-            default=0,
+            default=0.0,
        )
        postprocessing_group.add_argument(
            '-cf',
@ -714,8 +718,10 @@ def metadata_loads(metadata):
            images = [metadata['sd-metadata']['image']]
        for image in images:
            # repack the prompt and variations
-            image['prompt']     = ','.join([':'.join([x['prompt'],   str(x['weight'])]) for x in image['prompt']])
-            image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']])
+            if 'prompt' in image:
+                image['prompt']     = ','.join([':'.join([x['prompt'],   str(x['weight'])]) for x in image['prompt']])
+            if 'variations' in image:
+                image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']])
            # fix a bit of semantic drift here
            image['sampler_name']=image.pop('sampler')
            opt = Args()
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -26,8 +26,8 @@ class Completer:
                            '--init_color')):
            return self._path_completions(text, state, ('.png','.jpg','.jpeg'))

-        if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
-            return self._path_completions(text, state, ())
+        if buffer.strip().endswith('pp') or text.startswith(('.', '/')):
+            return self._path_completions(text, state, ('.png','.jpg','.jpeg'))

        response = None
        if state == 0:
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -27,7 +27,8 @@ from ldm.util                      import instantiate_from_config
 from ldm.models.diffusion.ddim     import DDIMSampler
 from ldm.models.diffusion.plms     import PLMSSampler
 from ldm.models.diffusion.ksampler import KSampler
-from ldm.dream.pngwriter           import PngWriter
+from ldm.dream.pngwriter           import PngWriter, retrieve_metadata
+from ldm.dream.args                import metadata_loads
 from ldm.dream.image_util          import InitImageResizer
 from ldm.dream.devices             import choose_torch_device, choose_precision
 from ldm.dream.conditioning        import get_uc_and_c
@ -293,6 +294,7 @@ class Generate:
        strength              = strength   or self.strength
        self.seed             = seed
        self.log_tokenization = log_tokenization
+        self.step_callback    = step_callback
        with_variations = [] if with_variations is None else with_variations

        # will instantiate the model or return it from cache
@ -421,6 +423,97 @@ class Generate:
            )
        return results

+    # this needs to be generalized to all sorts of postprocessors, but for now
+    # sufficient to support most use cases
+    def apply_postprocessor(
+            self,
+            image_path,
+            tool                = 'gfpgan',  # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen'
+            gfpgan_strength     = 0.0,
+            codeformer_fidelity = 0.75,
+            save_original       = True, # to get new name
+            upscale             = None,
+            callback            = None,
+            opt                 = None,
+            ):
+        # retrieve the seed from the image;
+        # note that we will try both the new way and the old way, since not all files have the
+        # metadata (yet)
+        seed   = None
+        image_metadata = None
+        prompt = None
+        try:
+            meta = retrieve_metadata(image_path)
+            args = metadata_loads(meta)
+            if len(args) > 1:
+                print("* Can't postprocess a grid")
+                return
+            seed   = args[0].seed
+            prompt = args[0].prompt
+            print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}')
+        except:
+            m    = re.search('(\d+)\.png$',image_path)
+            if m:
+                seed = m.group(1)
+
+        if not seed:
+            print('* Could not recover seed for image. Replacing with 42. This will not affect image quality')
+            seed = 42
+        
+        # face fixers and esrgan take an Image, but embiggen takes a path
+        image = Image.open(image_path)
+
+        # Note that we need to adopt a uniform API for the postprocessors.
+        # This is completely ad hoc ATCM
+        if tool in ('gfpgan','codeformer','upscale'):
+            if tool == 'gfpgan':
+                facetool = 'gfpgan'
+            elif tool == 'codeformer':
+                facetool = 'codeformer'
+            elif tool == 'upscale':
+                facetool = 'gfpgan'   # but won't be run
+                gfpgan_strength = 0
+            return self.upscale_and_reconstruct(
+                [[image,seed]],
+                facetool = facetool,
+                strength = gfpgan_strength,
+                codeformer_fidelity = codeformer_fidelity,
+                save_original = save_original,
+                upscale = upscale,
+                image_callback = callback,
+            )
+
+        elif tool == 'embiggen':
+            # fetch the metadata from the image
+            generator = self._make_embiggen()
+            uc, c = get_uc_and_c(
+                prompt, model =self.model,
+                skip_normalize=opt.skip_normalize,
+                log_tokens    =opt.log_tokenization
+            )
+            # embiggen takes a image path (sigh)
+            generator.generate(
+                prompt,
+                sampler     = self.sampler,
+                steps       = opt.steps,
+                cfg_scale   = opt.cfg_scale,
+                ddim_eta    = self.ddim_eta,
+                conditioning= (uc, c),
+                init_img    = image_path,  # not the Image! (sigh)
+                init_image  = image,       # embiggen wants both! (sigh)
+                strength    = opt.strength,
+                width       = opt.width,
+                height      = opt.height,
+                embiggen    = opt.embiggen,
+                embiggen_tiles = opt.embiggen_tiles,
+                image_callback = callback,
+            )
+
+        else:
+            print(f'* postprocessing tool {tool} is not yet supported')
+            return None
+
+
    def _make_images(self, img_path, mask_path, width, height, fit=False):
        init_image      = None
        init_mask       = None
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -113,6 +113,8 @@ def main_loop(gen, opt, infile):
        name_max = 255

    while not done:
+        operation = 'generate'   # default operation, alternative is 'postprocess'
+        
        try:
            command = get_next_command(infile)
        except EOFError:
@ -133,8 +135,14 @@ def main_loop(gen, opt, infile):
        if command.startswith(
            '!dream'
        ):   # in case a stored prompt still contains the !dream command
-            command.replace('!dream','',1)
+            command = command.replace('!dream ','',1)

+        if command.startswith(
+                '!fix'
+        ):
+            command = command.replace('!fix ','',1)
+            operation = 'postprocess'
+            
        if opt.parse_cmd(command) is None:
            continue
        if len(opt.prompt) == 0:
@ -147,7 +155,7 @@ def main_loop(gen, opt, infile):
        if not opt.height:
            opt.height = model_config.height
        
-        # retrieve previous value!
+        # retrieve previous value of init image if requested
        if opt.init_img is not None and re.match('^-\\d+$', opt.init_img):
            try:
                opt.init_img = last_results[int(opt.init_img)][0]
@ -158,7 +166,8 @@ def main_loop(gen, opt, infile):
                opt.init_img = None
                continue

-        if opt.seed is not None and opt.seed < 0:   # retrieve previous value!
+        # retrieve previous valueof seed if requested
+        if opt.seed is not None and opt.seed < 0:   
            try:
                opt.seed = last_results[opt.seed][1]
                print(f'>> Reusing previous seed {opt.seed}')
@ -257,12 +266,16 @@ def main_loop(gen, opt, infile):
                        results.append([path, formatted_dream_prompt])
                last_results.append([path, seed])

-            catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts
-            gen.prompt2image(
-                image_callback=image_writer,
-                catch_interrupts=catch_ctrl_c,
-                **vars(opt)
-            )
+            if operation == 'generate':
+                catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts
+                gen.prompt2image(
+                    image_callback=image_writer,
+                    catch_interrupts=catch_ctrl_c,
+                    **vars(opt)
+                )
+            elif operation == 'postprocess':
+                print(f'>> fixing {opt.prompt}')
+                do_postprocess(gen,opt,image_writer)

            if opt.grid and len(grid_images) > 0:
                grid_img   = make_grid(list(grid_images.values()))
@ -300,7 +313,27 @@ def main_loop(gen, opt, infile):

    print('goodbye!')

+def do_postprocess (gen, opt, callback):
+    file_path = opt.prompt     # treat the prompt as the file pathname
+    if os.path.dirname(file_path) == '': #basename given
+        file_path = os.path.join(opt.outdir,file_path)
+    if not os.path.exists(file_path):
+        print(f'* file {file_path} does not exist')
+        return

+    tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale')
+    opt.save_original = True # do not overwrite old image!
+    return gen.apply_postprocessor(
+        image_path      = opt.prompt,
+        tool            = tool,
+        gfpgan_strength = opt.gfpgan_strength,
+        codeformer_fidelity = opt.codeformer_fidelity,
+        save_original       = opt.save_original,
+        upscale             = opt.upscale,
+        callback            = callback,
+        opt                 = opt,
+        )
+    
 def get_next_command(infile=None) -> str:  # command string
    if infile is None:
        command = input('dream> ')
--- a/scripts/sd-metadata.py
+++ b/scripts/sd-metadata.py