add outcrop postprocessor

2025-07-25 21:05:37 +00:00 · 2022-10-03 14:39:58 -04:00
parent 0f9bff66bc
commit 609983ffa8
6 changed files with 198 additions and 35 deletions
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -213,6 +213,9 @@ class Args(object):
        if a['gfpgan_strength']:
            switches.append(f'-G {a["gfpgan_strength"]}')

+        if a['outcrop']:
+            switches.append(f'-c {" ".join([str(u) for u in a["outcrop"]])}')
+
        # esrgan-specific parameters
        if a['upscale']:
            switches.append(f'-U {" ".join([str(u) for u in a["upscale"]])}')
@ -639,6 +642,14 @@ class Args(object):
            metavar=('direction', 'pixels'),
            help='Direction to extend the given image (left|right|top|bottom). If a distance pixel value is not specified it defaults to half the image size'
        )
+        img2img_group.add_argument(
+            '-c',
+            '--outcrop',
+            nargs='+',
+            type=str,
+            metavar=('direction:pixels'),
+            help='Outcrop the image "direction:pixels direction:pixels..." where direction is (top|left|bottom|right)'
+        )
        postprocessing_group.add_argument(
            '-ft',
            '--facetool',
@ -736,24 +747,12 @@ def metadata_dumps(opt,
        'app_version' : APP_VERSION,
    }

-    # add some RFC266 fields that are generated internally, and not as
-    # user args
+    # # add some RFC266 fields that are generated internally, and not as
+    # # user args
    image_dict = opt.to_dict(
-        postprocessing=postprocessing
+         postprocessing=postprocessing
    )

-    # 'postprocessing' is either null or an array of postprocessing metadatal
-    if postprocessing:
-        # TODO: This is just a hack until postprocessing pipeline work completed
-        image_dict['postprocessing'] = []
-
-        if image_dict['gfpgan_strength'] and image_dict['gfpgan_strength'] > 0:
-            image_dict['postprocessing'].append('GFPGAN (not RFC compliant)')
-        if image_dict['upscale'] and image_dict['upscale'][0] > 0:
-            image_dict['postprocessing'].append('ESRGAN (not RFC compliant)')
-    else:
-        image_dict['postprocessing'] = None
-
    # remove any image keys not mentioned in RFC #266
    rfc266_img_fields = ['type','postprocessing','sampler','prompt','seed','variations','steps',
                         'cfg_scale','threshold','perlin','step_number','width','height','extra','strength']
--- a/ldm/dream/generator/img2img.py
+++ b/ldm/dream/generator/img2img.py
@ -21,6 +21,7 @@ class Img2Img(Generator):
        """
        self.perlin = perlin

+        print(f'DEBUG: init_image = {init_image}')
        sampler.make_schedule(
            ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
        )
--- a/ldm/dream/pngwriter.py
+++ b/ldm/dream/pngwriter.py
@ -37,7 +37,7 @@ class PngWriter:
        path = os.path.join(self.outdir, name)
        info = PngImagePlugin.PngInfo()
        info.add_text('Dream', dream_prompt)
-        if metadata: # TODO: merge command line app's method of writing metadata and always just write metadata
+        if metadata:
          info.add_text('sd-metadata', json.dumps(metadata))
        image.save(path, 'PNG', pnginfo=info)
        return path
@ -61,3 +61,8 @@ def retrieve_metadata(img_path):
    dream_prompt = im.text.get('Dream', '')
    return {'sd-metadata': json.loads(md), 'Dream': dream_prompt}

+def write_metadata(img_path:str, meta:dict):
+    im = Image.open(img_path)
+    info = PngImagePlugin.PngInfo()
+    info.add_text('sd-metadata', json.dumps(meta))
+    im.save(img_path,'PNG',pnginfo=info)
--- a/ldm/dream/restoration/outcrop.py
+++ b/ldm/dream/restoration/outcrop.py
@ -0,0 +1,109 @@
+import warnings
+import math
+from ldm.dream.conditioning import get_uc_and_c
+from PIL import Image, ImageFilter
+
+class Outcrop():
+    def __init__(
+            self,
+            image,
+            generator,  # current generator object
+    ):
+        self.image     = image
+        self.generator = generator
+
+    def extend(
+            self,
+            extents:dict,
+            opt,
+            image_callback = None,
+            prefix = None
+    ):
+        extended_image = self._extend_all(extents)
+
+        # switch samplers temporarily
+        curr_sampler = self.generator.sampler
+        self.generator.sampler_name = opt.sampler_name
+        self.generator._set_sampler()
+
+        def wrapped_callback(img,seed,**kwargs):
+            image_callback(img,opt.seed,use_prefix=prefix,**kwargs)
+
+        result= self.generator.prompt2image(
+            opt.prompt,
+            sampler = self.generator.sampler,
+            steps       = opt.steps,
+            cfg_scale   = opt.cfg_scale,
+            ddim_eta    = self.generator.ddim_eta,
+            width       = extended_image.width,
+            height      = extended_image.height,
+            init_img    = extended_image,
+            strength    = opt.strength,
+            image_callback = wrapped_callback,
+        )
+        
+        # swap sampler back
+        self.generator.sampler = curr_sampler
+        return result
+
+    def _extend_all(
+            self,
+            extents:dict,
+    ) -> Image:
+        '''
+        Extend the image in direction ('top','bottom','left','right') by
+        the indicated value. The image canvas is extended, and the empty
+        rectangular section will be filled with a blurred copy of the
+        adjacent image.
+        '''
+        image = self.image
+        for direction in extents:
+            assert direction in ['top', 'left', 'bottom', 'right'],'Direction must be one of "top", "left", "bottom", "right"'
+            pixels = extents[direction]
+            # round pixels up to the nearest 64
+            pixels = math.ceil(pixels/64) * 64
+            print(f'>> extending image {direction}ward by {pixels} pixels')
+            image = self._rotate(image,direction)
+            image = self._extend(image,pixels)
+            image = self._rotate(image,direction,reverse=True)
+        return image
+
+    def _rotate(self,image:Image,direction:str,reverse=False) -> Image:
+        '''
+        Rotates image so that the area to extend is always at the top top.
+        Simplifies logic later. The reverse argument, if true, will undo the
+        previous transpose.
+        '''
+        transposes = {
+            'right':  ['ROTATE_90','ROTATE_270'],
+            'bottom': ['ROTATE_180','ROTATE_180'],
+            'left':   ['ROTATE_270','ROTATE_90']
+        }
+        if direction not in transposes:
+            return image
+        transpose = transposes[direction][1 if reverse else 0]
+        return image.transpose(Image.Transpose.__dict__[transpose])
+
+    def _extend(self,image:Image,pixels:int)-> Image:
+        extended_img = Image.new('RGBA',(image.width,image.height+pixels))
+
+        # first paste places old image at top of extended image, stretch
+        # it, and applies a gaussian blur to it
+        # take the top half region, stretch and paste it
+        top_slice = image.crop(box=(0,0,image.width,pixels//2))
+        top_slice = top_slice.resize((image.width,pixels))
+        extended_img.paste(top_slice,box=(0,0))
+
+        # second paste creates a copy of the image displaced pixels downward;
+        # The overall effect is to create a blurred duplicate of the top portion of
+        # the image.
+        extended_img.paste(image,box=(0,pixels))
+        extended_img = extended_img.filter(filter=ImageFilter.GaussianBlur(radius=pixels//2))
+        extended_img.paste(image,box=(0,pixels))
+        
+        # now make the top part transparent to use as a mask
+        alpha = extended_img.getchannel('A')
+        alpha.paste(0,(0,0,extended_img.width,pixels*2))
+        extended_img.putalpha(alpha)
+
+        return extended_img
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -499,6 +499,7 @@ class Generate:
            codeformer_fidelity = 0.75,
            upscale             = None,
            out_direction       = None,
+            outcrop             = [],
            save_original       = True, # to get new name
            callback            = None,
            opt                 = None,
@ -527,8 +528,13 @@ class Generate:
        # face fixers and esrgan take an Image, but embiggen takes a path
        image = Image.open(image_path)

-        # Note that we need to adopt a uniform API for the postprocessors.
-        # This is completely ad hoc ATCM
+        # used by multiple postfixers
+        uc, c = get_uc_and_c(
+            prompt, model =self.model,
+            skip_normalize=opt.skip_normalize,
+            log_tokens    =opt.log_tokenization
+        )
+
        if tool in ('gfpgan','codeformer','upscale'):
            if tool == 'gfpgan':
                facetool = 'gfpgan'
@ -548,14 +554,25 @@ class Generate:
                prefix = prefix,
            )

+        elif tool == 'outcrop':
+            from ldm.dream.restoration.outcrop import Outcrop
+            extend_instructions = {}
+            for direction,pixels in _pairwise(opt.outcrop):
+                extend_instructions[direction]=int(pixels)
+            generator = Outcrop(
+                image,
+                self,
+            )
+            return generator.extend(
+                extend_instructions,
+                args,
+                image_callback = callback,
+                prefix = prefix,
+            )
+
        elif tool == 'embiggen':
            # fetch the metadata from the image
            generator = self._make_embiggen()
-            uc, c = get_uc_and_c(
-                prompt, model =self.model,
-                skip_normalize=opt.skip_normalize,
-                log_tokens    =opt.log_tokenization
-            )
            opt.strength  = 0.40
            print(f'>> Setting img2img strength to {opt.strength} for happy embiggening')
            # embiggen takes a image path (sigh)
@ -586,16 +603,13 @@ class Generate:
                steps       = opt.steps,
                cfg_scale   = opt.cfg_scale,
                ddim_eta    = self.ddim_eta,
-                conditioning= get_uc_and_c(
-                    oldargs.prompt, model =self.model,
-                    skip_normalize=opt.skip_normalize,
-                    log_tokens    =opt.log_tokenization
-                ),
+                conditioning= (uc,c),
                width       = opt.width,
                height      = opt.height,
                init_img    = image_path,  # not the Image! (sigh)
                strength    = opt.strength,
                image_callback = callback,
+                prefix      = prefix,
                )
        elif tool is None:
            print(f'* please provide at least one postprocessing option, such as -G or -U')
@ -968,7 +982,6 @@ class Generate:

        image = image.resize((image.width//downsampling, image.height //
                              downsampling), resample=Image.Resampling.NEAREST)
-
        image = np.array(image)
        image = image.astype(np.float32) / 255.0
        image = image[None].transpose(0, 3, 1, 2)
@ -1088,3 +1101,8 @@ class Generate:
            image = self.sample_to_image(img)
            image.save(os.path.join(path,f'{counter:03}.png'),'PNG')
        return callback
+
+def _pairwise(iterable):
+    "s -> (s0, s1), (s2, s3), (s4, s5), ..."
+    a = iter(iterable)
+    return zip(a, a)
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -8,10 +8,11 @@ import shlex
 import copy
 import warnings
 import time
+import traceback
 sys.path.append('.')    # corrects a weird problem on Macs
 from ldm.dream.readline import get_completer
 from ldm.dream.args import Args, metadata_dumps, metadata_from_png, dream_cmd_from_png
-from ldm.dream.pngwriter import PngWriter
+from ldm.dream.pngwriter import PngWriter, retrieve_metadata, write_metadata
 from ldm.dream.image_util import make_grid
 from ldm.dream.log import write_log
 from omegaconf import OmegaConf
@ -57,7 +58,6 @@ def main():
        else:
            print('>> Face restoration and upscaling disabled')
    except (ModuleNotFoundError, ImportError):
-        import traceback
        print(traceback.format_exc(), file=sys.stderr)
        print('>> You may need to install the ESRGAN and/or GFPGAN modules')

@ -309,9 +309,22 @@ def main_loop(gen, opt, infile):
                        ),
                        name      = filename,
                    )
-                    if (not upscaled) or opt.save_original:
+
+                    # update rfc metadata
+                    if operation == 'postprocess':
+                        tool = re.match('postprocess:(\w+)',opt.last_operation).groups()[0]
+                        add_postprocessing_to_metadata(
+                            opt,
+                            opt.prompt,
+                            filename,
+                            tool,
+                            formatted_dream_prompt,
+                        )                           
+                        
+                    if (not postprocessed) or opt.save_original:
                        # only append to results if we didn't overwrite an earlier output
                        results.append([path, formatted_dream_prompt])
+
                # so that the seed autocompletes (on linux|mac when -S or --seed specified
                if completer:
                    completer.add_seed(seed)
@ -383,8 +396,10 @@ def do_postprocess (gen, opt, callback):
        tool = 'upscale'
    elif opt.out_direction:
        tool = 'outpaint'
-    opt.save_original = True # do not overwrite old image!
-    opt.last_operation    = f'postprocess:{tool}'
+    elif opt.outcrop:
+        tool = 'outcrop'
+    opt.save_original  = True # do not overwrite old image!
+    opt.last_operation = f'postprocess:{tool}'
    try:
        gen.apply_postprocessor(
            image_path      = file_path,
@ -394,6 +409,7 @@ def do_postprocess (gen, opt, callback):
            save_original       = opt.save_original,
            upscale             = opt.upscale,
            out_direction       = opt.out_direction,
+            outcrop             = opt.outcrop,
            callback            = callback,
            opt                 = opt,
        )
@ -401,9 +417,24 @@ def do_postprocess (gen, opt, callback):
        print(f'** {file_path}: file could not be read')
        return
    except (KeyError, AttributeError):
-        print(f'** {file_path}: file has no metadata')
+        print(traceback.format_exc(), file=sys.stderr)
        return
    return opt.last_operation
+
+def add_postprocessing_to_metadata(opt,original_file,new_file,tool,command):
+    original_file = original_file if os.path.exists(original_file) else os.path.join(opt.outdir,original_file)
+    new_file       = new_file     if os.path.exists(new_file)      else os.path.join(opt.outdir,new_file)
+    meta = retrieve_metadata(original_file)['sd-metadata']
+    img_data = meta['image']
+    pp = img_data.get('postprocessing',[]) or []
+    pp.append(
+        {
+            'tool':tool,
+            'dream_command':command,
+        }
+    )
+    meta['image']['postprocessing'] = pp
+    write_metadata(new_file,meta)
    
 def prepare_image_metadata(
        opt,