From fccf809e3aabff80ea3be9c3b819327e636254f1 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 18 Sep 2022 17:26:09 -0400 Subject: [PATCH 1/6] add ability to post-process images from the CLI - supports gfpgan, esrgan, codeformer and embiggen - To use: dream> !fix ./outputs/img-samples/000056.292144555.png -ft gfpgan -U2 -G0.8 dream> !fix ./outputs/img-samples/000056.292144555.png -ft codeformer -G 0.8 dream> !fix ./outputs/img-samples/000056.29214455.png -U4 dream> !fix ./outputs/img-samples/000056.292144555.png -embiggen 1.5 The first example invokes gfpgan to fix faces and esrgan to upscale. The second example invokes codeformer to fix faces, no upscaling The third example uses esrgan to upscale 4X The four example runs embiggen to enlarge 1.5X - This is very preliminary work. There are some anomalies to note: 1. The syntax is non-obvious. I would prefer something like: !fix esrgan,gfpgan !fix esrgan !fix embiggen,codeformer However, this will require refactoring the gfpgan and embiggen code. 2. Images generated using gfpgan, esrgan or codeformer all are named "xxxxxx.xxxxxx.postprocessed.png" and the original is saved. However, the prefix is a new one that is not related to the original. 3. Images generated using embiggen are named "xxxxx.xxxxxxx.png", and once again the prefix is new. I'm not sure whether the prefix should be aligned with the original file's prefix or not. Probably not, but opinions welcome. --- ldm/dream/args.py | 14 +++++-- ldm/dream/readline.py | 4 +- ldm/generate.py | 95 +++++++++++++++++++++++++++++++++++++++++- scripts/dream.py | 51 +++++++++++++++++++---- scripts/sd-metadata.py | 0 5 files changed, 148 insertions(+), 16 deletions(-) mode change 100644 => 100755 scripts/sd-metadata.py diff --git a/ldm/dream/args.py b/ldm/dream/args.py index 8a8a3a41b3..62799c2a11 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -397,7 +397,10 @@ class Args(object): # This creates the parser that processes commands on the dream> command line def _create_dream_cmd_parser(self): parser = argparse.ArgumentParser( - description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12' + description=""" + Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4 + Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer + """ ) render_group = parser.add_argument_group('General rendering') img2img_group = parser.add_argument_group('Image-to-image and inpainting') @@ -520,6 +523,7 @@ class Args(object): '-ft', '--facetool', type=str, + default='gfpgan', help='Select the face restoration AI to use: gfpgan, codeformer', ) postprocessing_group.add_argument( @@ -527,7 +531,7 @@ class Args(object): '--gfpgan_strength', type=float, help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.', - default=0, + default=0.0, ) postprocessing_group.add_argument( '-cf', @@ -674,8 +678,10 @@ def metadata_loads(metadata): images = metadata['sd-metadata']['images'] for image in images: # repack the prompt and variations - image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']]) - image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']]) + if 'prompt' in image: + image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']]) + if 'variations' in image: + image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']]) # fix a bit of semantic drift here image['sampler_name']=image.pop('sampler') opt = Args() diff --git a/ldm/dream/readline.py b/ldm/dream/readline.py index da94f5a61f..42f4a3ac01 100644 --- a/ldm/dream/readline.py +++ b/ldm/dream/readline.py @@ -26,8 +26,8 @@ class Completer: '--init_color')): return self._path_completions(text, state, ('.png','.jpg','.jpeg')) - if buffer.strip().endswith('cd') or text.startswith(('.', '/')): - return self._path_completions(text, state, ()) + if buffer.strip().endswith('pp') or text.startswith(('.', '/')): + return self._path_completions(text, state, ('.png','.jpg','.jpeg')) response = None if state == 0: diff --git a/ldm/generate.py b/ldm/generate.py index a470648cdc..1c157d44ed 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -27,7 +27,8 @@ from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler -from ldm.dream.pngwriter import PngWriter +from ldm.dream.pngwriter import PngWriter, retrieve_metadata +from ldm.dream.args import metadata_loads from ldm.dream.image_util import InitImageResizer from ldm.dream.devices import choose_torch_device from ldm.dream.conditioning import get_uc_and_c @@ -284,6 +285,7 @@ class Generate: strength = strength or self.strength self.seed = seed self.log_tokenization = log_tokenization + self.step_callback = step_callback with_variations = [] if with_variations is None else with_variations # will instantiate the model or return it from cache @@ -412,6 +414,97 @@ class Generate: ) return results + # this needs to be generalized to all sorts of postprocessors, but for now + # sufficient to support most use cases + def apply_postprocessor( + self, + image_path, + tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen' + gfpgan_strength = 0.0, + codeformer_fidelity = 0.75, + save_original = True, # to get new name + upscale = None, + callback = None, + opt = None, + ): + # retrieve the seed from the image; + # note that we will try both the new way and the old way, since not all files have the + # metadata (yet) + seed = None + image_metadata = None + prompt = None + try: + meta = retrieve_metadata(image_path) + args = metadata_loads(meta) + if len(args) > 1: + print("* Can't postprocess a grid") + return + seed = args[0].seed + prompt = args[0].prompt + print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') + except: + m = re.search('(\d+)\.png$',image_path) + if m: + seed = m.group(1) + + if not seed: + print('* Could not recover seed for image. Replacing with 42. This will not affect image quality') + seed = 42 + + # face fixers and esrgan take an Image, but embiggen takes a path + image = Image.open(image_path) + + # Note that we need to adopt a uniform API for the postprocessors. + # This is completely ad hoc ATCM + if tool in ('gfpgan','codeformer','upscale'): + if tool == 'gfpgan': + facetool = 'gfpgan' + elif tool == 'codeformer': + facetool = 'codeformer' + elif tool == 'upscale': + facetool = 'gfpgan' # but won't be run + gfpgan_strength = 0 + return self.upscale_and_reconstruct( + [[image,seed]], + facetool = facetool, + strength = gfpgan_strength, + codeformer_fidelity = codeformer_fidelity, + save_original = save_original, + upscale = upscale, + image_callback = callback, + ) + + elif tool == 'embiggen': + # fetch the metadata from the image + generator = self._make_embiggen() + uc, c = get_uc_and_c( + prompt, model =self.model, + skip_normalize=opt.skip_normalize, + log_tokens =opt.log_tokenization + ) + # embiggen takes a image path (sigh) + generator.generate( + prompt, + sampler = self.sampler, + steps = opt.steps, + cfg_scale = opt.cfg_scale, + ddim_eta = self.ddim_eta, + conditioning= (uc, c), + init_img = image_path, # not the Image! (sigh) + init_image = image, # embiggen wants both! (sigh) + strength = opt.strength, + width = opt.width, + height = opt.height, + embiggen = opt.embiggen, + embiggen_tiles = opt.embiggen_tiles, + image_callback = callback, + ) + + else: + print(f'* postprocessing tool {tool} is not yet supported') + return None + + def _make_images(self, img_path, mask_path, width, height, fit=False): init_image = None init_mask = None diff --git a/scripts/dream.py b/scripts/dream.py index 857b5637aa..7cf038f795 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -111,6 +111,8 @@ def main_loop(gen, opt, infile): name_max = 255 while not done: + operation = 'generate' # default operation, alternative is 'postprocess' + try: command = get_next_command(infile) except EOFError: @@ -131,8 +133,14 @@ def main_loop(gen, opt, infile): if command.startswith( '!dream' ): # in case a stored prompt still contains the !dream command - command.replace('!dream','',1) + command = command.replace('!dream ','',1) + if command.startswith( + '!fix' + ): + command = command.replace('!fix ','',1) + operation = 'postprocess' + if opt.parse_cmd(command) is None: continue if len(opt.prompt) == 0: @@ -145,7 +153,7 @@ def main_loop(gen, opt, infile): if not opt.height: opt.height = model_config.height - # retrieve previous value! + # retrieve previous value of init image if requested if opt.init_img is not None and re.match('^-\\d+$', opt.init_img): try: opt.init_img = last_results[int(opt.init_img)][0] @@ -156,7 +164,8 @@ def main_loop(gen, opt, infile): opt.init_img = None continue - if opt.seed is not None and opt.seed < 0: # retrieve previous value! + # retrieve previous valueof seed if requested + if opt.seed is not None and opt.seed < 0: try: opt.seed = last_results[opt.seed][1] print(f'>> Reusing previous seed {opt.seed}') @@ -255,12 +264,16 @@ def main_loop(gen, opt, infile): results.append([path, formatted_dream_prompt]) last_results.append([path, seed]) - catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts - gen.prompt2image( - image_callback=image_writer, - catch_interrupts=catch_ctrl_c, - **vars(opt) - ) + if operation == 'generate': + catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts + gen.prompt2image( + image_callback=image_writer, + catch_interrupts=catch_ctrl_c, + **vars(opt) + ) + elif operation == 'postprocess': + print(f'>> fixing {opt.prompt}') + do_postprocess(gen,opt,image_writer) if opt.grid and len(grid_images) > 0: grid_img = make_grid(list(grid_images.values())) @@ -298,7 +311,27 @@ def main_loop(gen, opt, infile): print('goodbye!') +def do_postprocess (gen, opt, callback): + file_path = opt.prompt # treat the prompt as the file pathname + if os.path.dirname(file_path) == '': #basename given + file_path = os.path.join(opt.outdir,file_path) + if not os.path.exists(file_path): + print(f'* file {file_path} does not exist') + return + tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale') + opt.save_original = True # do not overwrite old image! + return gen.apply_postprocessor( + image_path = opt.prompt, + tool = tool, + gfpgan_strength = opt.gfpgan_strength, + codeformer_fidelity = opt.codeformer_fidelity, + save_original = opt.save_original, + upscale = opt.upscale, + callback = callback, + opt = opt, + ) + def get_next_command(infile=None) -> str: # command string if infile is None: command = input('dream> ') diff --git a/scripts/sd-metadata.py b/scripts/sd-metadata.py old mode 100644 new mode 100755 From 7b0cbb34d618098b4072f14870937ee9eb4369a1 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Wed, 14 Sep 2022 05:17:14 +1200 Subject: [PATCH 2/6] GFPGAN and Real ESRGAN Implementation Refactor --- docs/features/UPSCALE.md | 10 +- ldm/dream/args.py | 17 +- ldm/dream/generator/embiggen.py | 217 ++++++++++++--------- ldm/dream/server.py | 8 +- ldm/generate.py | 234 +++++++++++++---------- ldm/gfpgan/gfpgan_tools.py | 168 ---------------- ldm/restoration/codeformer/codeformer.py | 12 +- ldm/restoration/gfpgan/gfpgan.py | 76 ++++++++ ldm/restoration/realesrgan/realesrgan.py | 102 ++++++++++ ldm/restoration/restoration.py | 34 ++++ scripts/dream.py | 34 +++- 11 files changed, 526 insertions(+), 386 deletions(-) delete mode 100644 ldm/gfpgan/gfpgan_tools.py create mode 100644 ldm/restoration/gfpgan/gfpgan.py create mode 100644 ldm/restoration/realesrgan/realesrgan.py create mode 100644 ldm/restoration/restoration.py diff --git a/docs/features/UPSCALE.md b/docs/features/UPSCALE.md index 28d85c1d71..259b569e88 100644 --- a/docs/features/UPSCALE.md +++ b/docs/features/UPSCALE.md @@ -2,10 +2,16 @@ title: Upscale --- +## **Intro** + +The script provides the ability to restore faces and upscale. + +You can enable these features by passing `--restore` and `--esrgan` to your launch script to enable +face restoration modules and upscaling modules respectively. + ## **GFPGAN and Real-ESRGAN Support** -The script also provides the ability to do face restoration and upscaling with the help of GFPGAN -and Real-ESRGAN respectively. +The default face restoration module is GFPGAN and the default upscaling module is ESRGAN. As of version 1.14, environment.yaml will install the Real-ESRGAN package into the standard install location for python packages, and will put GFPGAN into a subdirectory of "src" in the diff --git a/ldm/dream/args.py b/ldm/dream/args.py index db6d963645..f0feacad73 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -348,16 +348,19 @@ class Args(object): type=str, help='Path to a pre-trained embedding manager checkpoint - can only be set on command line', ) - # GFPGAN related args + # Restoration related args postprocessing_group.add_argument( - '--gfpgan_bg_upsampler', - type=str, - default='realesrgan', - help='Background upsampler. Default: realesrgan. Options: realesrgan, none.', - + '--restore', + action='store_true', + help='Enable Face Restoration', ) postprocessing_group.add_argument( - '--gfpgan_bg_tile', + '--esrgan', + action='store_true', + help='Enable Upscaling', + ) + postprocessing_group.add_argument( + '--esrgan_bg_tile', type=int, default=400, help='Tile size for background sampler, 0 for no tile during testing. Default: 400.', diff --git a/ldm/dream/generator/embiggen.py b/ldm/dream/generator/embiggen.py index cb9c029a66..e196e3005f 100644 --- a/ldm/dream/generator/embiggen.py +++ b/ldm/dream/generator/embiggen.py @@ -4,16 +4,17 @@ and generates with ldm.dream.generator.img2img ''' import torch -import numpy as np +import numpy as np from PIL import Image -from ldm.dream.generator.base import Generator -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.dream.generator.img2img import Img2Img +from ldm.dream.generator.base import Generator +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.dream.generator.img2img import Img2Img + class Embiggen(Generator): - def __init__(self,model): + def __init__(self, model): super().__init__(model) - self.init_latent = None + self.init_latent = None @torch.no_grad() def get_make_image( @@ -38,19 +39,20 @@ class Embiggen(Generator): Return value depends on the seed at the time you call it """ # Construct embiggen arg array, and sanity check arguments - if embiggen == None: # embiggen can also be called with just embiggen_tiles - embiggen = [1.0] # If not specified, assume no scaling - elif embiggen[0] < 0 : + if embiggen == None: # embiggen can also be called with just embiggen_tiles + embiggen = [1.0] # If not specified, assume no scaling + elif embiggen[0] < 0: embiggen[0] = 1.0 - print('>> Embiggen scaling factor cannot be negative, fell back to the default of 1.0 !') + print( + '>> Embiggen scaling factor cannot be negative, fell back to the default of 1.0 !') if len(embiggen) < 2: embiggen.append(0.75) - elif embiggen[1] > 1.0 or embiggen[1] < 0 : + elif embiggen[1] > 1.0 or embiggen[1] < 0: embiggen[1] = 0.75 print('>> Embiggen upscaling strength for ESRGAN must be between 0 and 1, fell back to the default of 0.75 !') if len(embiggen) < 3: embiggen.append(0.25) - elif embiggen[2] < 0 : + elif embiggen[2] < 0: embiggen[2] = 0.25 print('>> Overlap size for Embiggen must be a positive ratio between 0 and 1 OR a number of pixels, fell back to the default of 0.25 !') @@ -76,29 +78,30 @@ class Embiggen(Generator): if embiggen[0] != 1.0: initsuperwidth = round(initsuperwidth*embiggen[0]) initsuperheight = round(initsuperheight*embiggen[0]) - if embiggen[1] > 0: # No point in ESRGAN upscaling if strength is set zero - from ldm.gfpgan.gfpgan_tools import ( - real_esrgan_upscale, - ) - print(f'>> ESRGAN upscaling init image prior to cutting with Embiggen with strength {embiggen[1]}') + if embiggen[1] > 0: # No point in ESRGAN upscaling if strength is set zero + from ldm.restoration.realesrgan import ESRGAN + esrgan = ESRGAN() + print( + f'>> ESRGAN upscaling init image prior to cutting with Embiggen with strength {embiggen[1]}') if embiggen[0] > 2: - initsuperimage = real_esrgan_upscale( + initsuperimage = esrgan.process( initsuperimage, - embiggen[1], # upscale strength - 4, # upscale scale + embiggen[1], # upscale strength self.seed, + 4, # upscale scale ) else: - initsuperimage = real_esrgan_upscale( + initsuperimage = esrgan.process( initsuperimage, - embiggen[1], # upscale strength - 2, # upscale scale + embiggen[1], # upscale strength self.seed, + 2, # upscale scale ) # We could keep recursively re-running ESRGAN for a requested embiggen[0] larger than 4x # but from personal experiance it doesn't greatly improve anything after 4x # Resize to target scaling factor resolution - initsuperimage = initsuperimage.resize((initsuperwidth, initsuperheight), Image.Resampling.LANCZOS) + initsuperimage = initsuperimage.resize( + (initsuperwidth, initsuperheight), Image.Resampling.LANCZOS) # Use width and height as tile widths and height # Determine buffer size in pixels @@ -121,28 +124,31 @@ class Embiggen(Generator): emb_tiles_x = 1 emb_tiles_y = 1 if (initsuperwidth - width) > 0: - emb_tiles_x = ceildiv(initsuperwidth - width, width - overlap_size_x) + 1 + emb_tiles_x = ceildiv(initsuperwidth - width, + width - overlap_size_x) + 1 if (initsuperheight - height) > 0: - emb_tiles_y = ceildiv(initsuperheight - height, height - overlap_size_y) + 1 + emb_tiles_y = ceildiv(initsuperheight - height, + height - overlap_size_y) + 1 # Sanity assert emb_tiles_x > 1 or emb_tiles_y > 1, f'ERROR: Based on the requested dimensions of {initsuperwidth}x{initsuperheight} and tiles of {width}x{height} you don\'t need to Embiggen! Check your arguments.' # Prep alpha layers -------------- # https://stackoverflow.com/questions/69321734/how-to-create-different-transparency-like-gradient-with-python-pil # agradientL is Left-side transparent - agradientL = Image.linear_gradient('L').rotate(90).resize((overlap_size_x, height)) + agradientL = Image.linear_gradient('L').rotate( + 90).resize((overlap_size_x, height)) # agradientT is Top-side transparent agradientT = Image.linear_gradient('L').resize((width, overlap_size_y)) # radial corner is the left-top corner, made full circle then cut to just the left-top quadrant agradientC = Image.new('L', (256, 256)) for y in range(256): for x in range(256): - #Find distance to lower right corner (numpy takes arrays) + # Find distance to lower right corner (numpy takes arrays) distanceToLR = np.sqrt([(255 - x) ** 2 + (255 - y) ** 2])[0] - #Clamp values to max 255 + # Clamp values to max 255 if distanceToLR > 255: distanceToLR = 255 - #Place the pixel as invert of distance + # Place the pixel as invert of distance agradientC.putpixel((x, y), int(255 - distanceToLR)) # Create alpha layers default fully white @@ -154,59 +160,79 @@ class Embiggen(Generator): alphaLayerT.paste(agradientT, (0, 0)) alphaLayerLTC.paste(agradientL, (0, 0)) alphaLayerLTC.paste(agradientT, (0, 0)) - alphaLayerLTC.paste(agradientC.resize((overlap_size_x, overlap_size_y)), (0, 0)) + alphaLayerLTC.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) if embiggen_tiles: # Individual unconnected sides alphaLayerR = Image.new("L", (width, height), 255) - alphaLayerR.paste(agradientL.rotate(180), (width - overlap_size_x, 0)) + alphaLayerR.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) alphaLayerB = Image.new("L", (width, height), 255) - alphaLayerB.paste(agradientT.rotate(180), (0, height - overlap_size_y)) + alphaLayerB.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) alphaLayerTB = Image.new("L", (width, height), 255) alphaLayerTB.paste(agradientT, (0, 0)) - alphaLayerTB.paste(agradientT.rotate(180), (0, height - overlap_size_y)) + alphaLayerTB.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) alphaLayerLR = Image.new("L", (width, height), 255) alphaLayerLR.paste(agradientL, (0, 0)) - alphaLayerLR.paste(agradientL.rotate(180), (width - overlap_size_x, 0)) + alphaLayerLR.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) # Sides and corner Layers alphaLayerRBC = Image.new("L", (width, height), 255) - alphaLayerRBC.paste(agradientL.rotate(180), (width - overlap_size_x, 0)) - alphaLayerRBC.paste(agradientT.rotate(180), (0, height - overlap_size_y)) - alphaLayerRBC.paste(agradientC.rotate(180).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) + alphaLayerRBC.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + alphaLayerRBC.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerRBC.paste(agradientC.rotate(180).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) alphaLayerLBC = Image.new("L", (width, height), 255) alphaLayerLBC.paste(agradientL, (0, 0)) - alphaLayerLBC.paste(agradientT.rotate(180), (0, height - overlap_size_y)) - alphaLayerLBC.paste(agradientC.rotate(90).resize((overlap_size_x, overlap_size_y)), (0, height - overlap_size_y)) + alphaLayerLBC.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerLBC.paste(agradientC.rotate(90).resize( + (overlap_size_x, overlap_size_y)), (0, height - overlap_size_y)) alphaLayerRTC = Image.new("L", (width, height), 255) - alphaLayerRTC.paste(agradientL.rotate(180), (width - overlap_size_x, 0)) + alphaLayerRTC.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) alphaLayerRTC.paste(agradientT, (0, 0)) - alphaLayerRTC.paste(agradientC.rotate(270).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) + alphaLayerRTC.paste(agradientC.rotate(270).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) # All but X layers alphaLayerABT = Image.new("L", (width, height), 255) alphaLayerABT.paste(alphaLayerLBC, (0, 0)) - alphaLayerABT.paste(agradientL.rotate(180), (width - overlap_size_x, 0)) - alphaLayerABT.paste(agradientC.rotate(180).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) + alphaLayerABT.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + alphaLayerABT.paste(agradientC.rotate(180).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) alphaLayerABL = Image.new("L", (width, height), 255) alphaLayerABL.paste(alphaLayerRTC, (0, 0)) - alphaLayerABL.paste(agradientT.rotate(180), (0, height - overlap_size_y)) - alphaLayerABL.paste(agradientC.rotate(180).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) + alphaLayerABL.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerABL.paste(agradientC.rotate(180).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) alphaLayerABR = Image.new("L", (width, height), 255) alphaLayerABR.paste(alphaLayerLBC, (0, 0)) alphaLayerABR.paste(agradientT, (0, 0)) - alphaLayerABR.paste(agradientC.resize((overlap_size_x, overlap_size_y)), (0, 0)) + alphaLayerABR.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) alphaLayerABB = Image.new("L", (width, height), 255) alphaLayerABB.paste(alphaLayerRTC, (0, 0)) alphaLayerABB.paste(agradientL, (0, 0)) - alphaLayerABB.paste(agradientC.resize((overlap_size_x, overlap_size_y)), (0, 0)) + alphaLayerABB.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) # All-around layer alphaLayerAA = Image.new("L", (width, height), 255) alphaLayerAA.paste(alphaLayerABT, (0, 0)) alphaLayerAA.paste(agradientT, (0, 0)) - alphaLayerAA.paste(agradientC.resize((overlap_size_x, overlap_size_y)), (0, 0)) - alphaLayerAA.paste(agradientC.rotate(270).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) + alphaLayerAA.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) + alphaLayerAA.paste(agradientC.rotate(270).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) # Clean up temporary gradients del agradientL @@ -218,7 +244,8 @@ class Embiggen(Generator): if embiggen_tiles: print(f'>> Making {len(embiggen_tiles)} Embiggen tiles...') else: - print(f'>> Making {(emb_tiles_x * emb_tiles_y)} Embiggen tiles ({emb_tiles_x}x{emb_tiles_y})...') + print( + f'>> Making {(emb_tiles_x * emb_tiles_y)} Embiggen tiles ({emb_tiles_x}x{emb_tiles_y})...') emb_tile_store = [] for tile in range(emb_tiles_x * emb_tiles_y): @@ -240,20 +267,23 @@ class Embiggen(Generator): top = round(emb_row_i * (height - overlap_size_y)) right = left + width bottom = top + height - + # Cropped image of above dimension (does not modify the original) newinitimage = initsuperimage.crop((left, top, right, bottom)) # DEBUG: # newinitimagepath = init_img[0:-4] + f'_emb_Ti{tile}.png' # newinitimage.save(newinitimagepath) - + if embiggen_tiles: - print(f'Making tile #{tile + 1} ({embiggen_tiles.index(tile) + 1} of {len(embiggen_tiles)} requested)') + print( + f'Making tile #{tile + 1} ({embiggen_tiles.index(tile) + 1} of {len(embiggen_tiles)} requested)') else: - print(f'Starting {tile + 1} of {(emb_tiles_x * emb_tiles_y)} tiles') + print( + f'Starting {tile + 1} of {(emb_tiles_x * emb_tiles_y)} tiles') # create a torch tensor from an Image - newinitimage = np.array(newinitimage).astype(np.float32) / 255.0 + newinitimage = np.array( + newinitimage).astype(np.float32) / 255.0 newinitimage = newinitimage[None].transpose(0, 3, 1, 2) newinitimage = torch.from_numpy(newinitimage) newinitimage = 2.0 * newinitimage - 1.0 @@ -261,33 +291,35 @@ class Embiggen(Generator): tile_results = gen_img2img.generate( prompt, - iterations = 1, - seed = self.seed, - sampler = sampler, - steps = steps, - cfg_scale = cfg_scale, - conditioning = conditioning, - ddim_eta = ddim_eta, - image_callback = None, # called only after the final image is generated - step_callback = step_callback, # called after each intermediate image is generated - width = width, - height = height, - init_img = init_img, # img2img doesn't need this, but it might in the future - init_image = newinitimage, # notice that init_image is different from init_img - mask_image = None, - strength = strength, + iterations=1, + seed=self.seed, + sampler=sampler, + steps=steps, + cfg_scale=cfg_scale, + conditioning=conditioning, + ddim_eta=ddim_eta, + image_callback=None, # called only after the final image is generated + step_callback=step_callback, # called after each intermediate image is generated + width=width, + height=height, + init_img=init_img, # img2img doesn't need this, but it might in the future + init_image=newinitimage, # notice that init_image is different from init_img + mask_image=None, + strength=strength, ) emb_tile_store.append(tile_results[0][0]) # DEBUG (but, also has other uses), worth saving if you want tiles without a transparency overlap to manually composite # emb_tile_store[-1].save(init_img[0:-4] + f'_emb_To{tile}.png') del newinitimage - + # Sanity check we have them all if len(emb_tile_store) == (emb_tiles_x * emb_tiles_y) or (embiggen_tiles != [] and len(emb_tile_store) == len(embiggen_tiles)): - outputsuperimage = Image.new("RGBA", (initsuperwidth, initsuperheight)) + outputsuperimage = Image.new( + "RGBA", (initsuperwidth, initsuperheight)) if embiggen_tiles: - outputsuperimage.alpha_composite(initsuperimage.convert('RGBA'), (0, 0)) + outputsuperimage.alpha_composite( + initsuperimage.convert('RGBA'), (0, 0)) for tile in range(emb_tiles_x * emb_tiles_y): if embiggen_tiles: if tile in embiggen_tiles: @@ -308,7 +340,8 @@ class Embiggen(Generator): if emb_column_i + 1 == emb_tiles_x: left = initsuperwidth - width else: - left = round(emb_column_i * (width - overlap_size_x)) + left = round(emb_column_i * + (width - overlap_size_x)) if emb_row_i + 1 == emb_tiles_y: top = initsuperheight - height else: @@ -319,33 +352,33 @@ class Embiggen(Generator): # top of image if emb_row_i == 0: if emb_column_i == 0: - if (tile+1) in embiggen_tiles: # Look-ahead right - if (tile+emb_tiles_x) not in embiggen_tiles: # Look-ahead down + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) not in embiggen_tiles: # Look-ahead down intileimage.putalpha(alphaLayerB) # Otherwise do nothing on this tile - elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only intileimage.putalpha(alphaLayerR) else: intileimage.putalpha(alphaLayerRBC) elif emb_column_i == emb_tiles_x - 1: - if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down intileimage.putalpha(alphaLayerL) else: intileimage.putalpha(alphaLayerLBC) else: - if (tile+1) in embiggen_tiles: # Look-ahead right - if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down intileimage.putalpha(alphaLayerL) else: intileimage.putalpha(alphaLayerLBC) - elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only intileimage.putalpha(alphaLayerLR) else: intileimage.putalpha(alphaLayerABT) # bottom of image elif emb_row_i == emb_tiles_y - 1: if emb_column_i == 0: - if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+1) in embiggen_tiles: # Look-ahead right intileimage.putalpha(alphaLayerT) else: intileimage.putalpha(alphaLayerRTC) @@ -353,34 +386,34 @@ class Embiggen(Generator): # No tiles to look ahead to intileimage.putalpha(alphaLayerLTC) else: - if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+1) in embiggen_tiles: # Look-ahead right intileimage.putalpha(alphaLayerLTC) else: intileimage.putalpha(alphaLayerABB) # vertical middle of image else: if emb_column_i == 0: - if (tile+1) in embiggen_tiles: # Look-ahead right - if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down intileimage.putalpha(alphaLayerT) else: intileimage.putalpha(alphaLayerTB) - elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only intileimage.putalpha(alphaLayerRTC) else: intileimage.putalpha(alphaLayerABL) elif emb_column_i == emb_tiles_x - 1: - if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down intileimage.putalpha(alphaLayerLTC) else: intileimage.putalpha(alphaLayerABR) else: - if (tile+1) in embiggen_tiles: # Look-ahead right - if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down intileimage.putalpha(alphaLayerLTC) else: intileimage.putalpha(alphaLayerABR) - elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only intileimage.putalpha(alphaLayerABB) else: intileimage.putalpha(alphaLayerAA) @@ -400,4 +433,4 @@ class Embiggen(Generator): # after internal loops and patching up return Embiggen image return outputsuperimage # end of function declaration - return make_image \ No newline at end of file + return make_image diff --git a/ldm/dream/server.py b/ldm/dream/server.py index 9e37c070d1..03114ac9d2 100644 --- a/ldm/dream/server.py +++ b/ldm/dream/server.py @@ -37,6 +37,8 @@ def build_opt(post_data, seed, gfpgan_model_exists): setattr(opt, 'seed', None if int(post_data['seed']) == -1 else int(post_data['seed'])) setattr(opt, 'variation_amount', float(post_data['variation_amount']) if int(post_data['seed']) != -1 else 0) setattr(opt, 'with_variations', []) + setattr(opt, 'embiggen', None) + setattr(opt, 'embiggen_tiles', None) broken = False if int(post_data['seed']) != -1 and post_data['with_variations'] != '': @@ -80,12 +82,11 @@ class DreamServer(BaseHTTPRequestHandler): self.wfile.write(content.read()) elif self.path == "/config.js": # unfortunately this import can't be at the top level, since that would cause a circular import - from ldm.gfpgan.gfpgan_tools import gfpgan_model_exists self.send_response(200) self.send_header("Content-type", "application/javascript") self.end_headers() config = { - 'gfpgan_model_exists': gfpgan_model_exists + 'gfpgan_model_exists': self.gfpgan_model_exists } self.wfile.write(bytes("let config = " + json.dumps(config) + ";\n", "utf-8")) elif self.path == "/run_log.json": @@ -138,11 +139,10 @@ class DreamServer(BaseHTTPRequestHandler): self.end_headers() # unfortunately this import can't be at the top level, since that would cause a circular import - from ldm.gfpgan.gfpgan_tools import gfpgan_model_exists content_length = int(self.headers['Content-Length']) post_data = json.loads(self.rfile.read(content_length)) - opt = build_opt(post_data, self.model.seed, gfpgan_model_exists) + opt = build_opt(post_data, self.model.seed, self.gfpgan_model_exists) self.canceled.clear() # In order to handle upscaled images, the PngWriter needs to maintain state diff --git a/ldm/generate.py b/ldm/generate.py index a470648cdc..2bd53ac57b 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -23,14 +23,32 @@ from PIL import Image, ImageOps from torch import nn from pytorch_lightning import seed_everything, logging -from ldm.util import instantiate_from_config -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.models.diffusion.plms import PLMSSampler +from ldm.util import instantiate_from_config +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler -from ldm.dream.pngwriter import PngWriter -from ldm.dream.image_util import InitImageResizer -from ldm.dream.devices import choose_torch_device -from ldm.dream.conditioning import get_uc_and_c +from ldm.dream.pngwriter import PngWriter +from ldm.dream.image_util import InitImageResizer +from ldm.dream.devices import choose_torch_device +from ldm.dream.conditioning import get_uc_and_c + +def fix_func(orig): + if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): + def new_func(*args, **kw): + device = kw.get("device", "mps") + kw["device"]="cpu" + return orig(*args, **kw).to(device) + return new_func + return orig + +torch.rand = fix_func(torch.rand) +torch.rand_like = fix_func(torch.rand_like) +torch.randn = fix_func(torch.randn) +torch.randn_like = fix_func(torch.randn_like) +torch.randint = fix_func(torch.randint) +torch.randint_like = fix_func(torch.randint_like) +torch.bernoulli = fix_func(torch.bernoulli) +torch.multinomial = fix_func(torch.multinomial) def fix_func(orig): if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): @@ -133,6 +151,9 @@ class Generate: # these are deprecated; if present they override values in the conf file weights = None, config = None, + gfpgan=None, + codeformer=None, + esrgan=None ): models = OmegaConf.load(conf) mconfig = models[model] @@ -156,6 +177,9 @@ class Generate: self.generators = {} self.base_generator = None self.seed = None + self.gfpgan = gfpgan + self.codeformer = codeformer + self.esrgan = esrgan # Note that in previous versions, there was an option to pass the # device to Generate(). However the device was then ignored, so @@ -224,8 +248,8 @@ class Generate: strength = None, init_color = None, # these are specific to embiggen (which also relies on img2img args) - embiggen = None, - embiggen_tiles = None, + embiggen=None, + embiggen_tiles=None, # these are specific to GFPGAN/ESRGAN facetool = None, gfpgan_strength = 0, @@ -274,15 +298,15 @@ class Generate: write the prompt into the PNG metadata. """ # TODO: convert this into a getattr() loop - steps = steps or self.steps - width = width or self.width - height = height or self.height - seamless = seamless or self.seamless - cfg_scale = cfg_scale or self.cfg_scale - ddim_eta = ddim_eta or self.ddim_eta - iterations = iterations or self.iterations - strength = strength or self.strength - self.seed = seed + steps = steps or self.steps + width = width or self.width + height = height or self.height + seamless = seamless or self.seamless + cfg_scale = cfg_scale or self.cfg_scale + ddim_eta = ddim_eta or self.ddim_eta + iterations = iterations or self.iterations + strength = strength or self.strength + self.seed = seed self.log_tokenization = log_tokenization with_variations = [] if with_variations is None else with_variations @@ -292,16 +316,17 @@ class Generate: for m in model.modules(): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): m.padding_mode = 'circular' if seamless else m._orig_padding_mode - + assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0' assert ( 0.0 < strength < 1.0 ), 'img2img and inpaint strength can only work with 0.0 < strength < 1.0' assert ( - 0.0 <= variation_amount <= 1.0 + 0.0 <= variation_amount <= 1.0 ), '-v --variation_amount must be in [0.0, 1.0]' assert ( - (embiggen == None and embiggen_tiles == None) or ((embiggen != None or embiggen_tiles != None) and init_img != None) + (embiggen == None and embiggen_tiles == None) or ( + (embiggen != None or embiggen_tiles != None) and init_img != None) ), 'Embiggen requires an init/input image to be specified' if len(with_variations) > 0 or variation_amount > 1.0: @@ -323,9 +348,9 @@ class Generate: if self._has_cuda(): torch.cuda.reset_peak_memory_stats() - results = list() - init_image = None - mask_image = None + results = list() + init_image = None + mask_image = None try: uc, c = get_uc_and_c( @@ -334,8 +359,9 @@ class Generate: log_tokens =self.log_tokenization ) - (init_image,mask_image) = self._make_images(init_img,init_mask, width, height, fit) - + (init_image, mask_image) = self._make_images( + init_img, init_mask, width, height, fit) + if (init_image is not None) and (mask_image is not None): generator = self._make_inpaint() elif (embiggen != None or embiggen_tiles != None): @@ -345,26 +371,27 @@ class Generate: else: generator = self._make_txt2img() - generator.set_variation(self.seed, variation_amount, with_variations) + generator.set_variation( + self.seed, variation_amount, with_variations) results = generator.generate( prompt, - iterations = iterations, - seed = self.seed, - sampler = self.sampler, - steps = steps, - cfg_scale = cfg_scale, - conditioning = (uc,c), - ddim_eta = ddim_eta, - image_callback = image_callback, # called after the final image is generated - step_callback = step_callback, # called after each intermediate image is generated - width = width, - height = height, - init_img = init_img, # embiggen needs to manipulate from the unmodified init_img - init_image = init_image, # notice that init_image is different from init_img - mask_image = mask_image, - strength = strength, - embiggen = embiggen, - embiggen_tiles = embiggen_tiles, + iterations=iterations, + seed=self.seed, + sampler=self.sampler, + steps=steps, + cfg_scale=cfg_scale, + conditioning=(uc, c), + ddim_eta=ddim_eta, + image_callback=image_callback, # called after the final image is generated + step_callback=step_callback, # called after each intermediate image is generated + width=width, + height=height, + init_img=init_img, # embiggen needs to manipulate from the unmodified init_img + init_image=init_image, # notice that init_image is different from init_img + mask_image=mask_image, + strength=strength, + embiggen=embiggen, + embiggen_tiles=embiggen_tiles, ) if init_color: @@ -393,7 +420,8 @@ class Generate: toc = time.time() print('>> Usage stats:') print( - f'>> {len(results)} image(s) generated in', '%4.2fs' % (toc - tic) + f'>> {len(results)} image(s) generated in', '%4.2fs' % ( + toc - tic) ) if self._has_cuda(): print( @@ -413,36 +441,42 @@ class Generate: return results def _make_images(self, img_path, mask_path, width, height, fit=False): - init_image = None - init_mask = None + init_image = None + init_mask = None if not img_path: - return None,None + return None, None - image = self._load_img(img_path, width, height, fit=fit) # this returns an Image - init_image = self._create_init_image(image) # this returns a torch tensor + image = self._load_img(img_path, width, height, + fit=fit) # this returns an Image + # this returns a torch tensor + init_image = self._create_init_image(image) - if self._has_transparency(image) and not mask_path: # if image has a transparent area and no mask was provided, then try to generate mask - print('>> Initial image has transparent areas. Will inpaint in these regions.') + # if image has a transparent area and no mask was provided, then try to generate mask + if self._has_transparency(image) and not mask_path: + print( + '>> Initial image has transparent areas. Will inpaint in these regions.') if self._check_for_erasure(image): print( '>> WARNING: Colors underneath the transparent region seem to have been erased.\n', '>> Inpainting will be suboptimal. Please preserve the colors when making\n', '>> a transparency mask, or provide mask explicitly using --init_mask (-M).' ) - init_mask = self._create_init_mask(image) # this returns a torch tensor + # this returns a torch tensor + init_mask = self._create_init_mask(image) if mask_path: - mask_image = self._load_img(mask_path, width, height, fit=fit) # this returns an Image - init_mask = self._create_init_mask(mask_image) + mask_image = self._load_img( + mask_path, width, height, fit=fit) # this returns an Image + init_mask = self._create_init_mask(mask_image) - return init_image,init_mask + return init_image, init_mask def _make_img2img(self): if not self.generators.get('img2img'): from ldm.dream.generator.img2img import Img2Img self.generators['img2img'] = Img2Img(self.model) return self.generators['img2img'] - + def _make_embiggen(self): if not self.generators.get('embiggen'): from ldm.dream.generator.embiggen import Embiggen @@ -517,38 +551,26 @@ class Generate: codeformer_fidelity = 0.75, save_original = False, image_callback = None): - try: - if upscale is not None: - from ldm.gfpgan.gfpgan_tools import real_esrgan_upscale - if strength > 0: - if facetool == 'codeformer': - from ldm.restoration.codeformer.codeformer import CodeFormerRestoration - else: - from ldm.gfpgan.gfpgan_tools import run_gfpgan - except (ModuleNotFoundError, ImportError): - print(traceback.format_exc(), file=sys.stderr) - print('>> You may need to install the ESRGAN and/or GFPGAN modules') - return for r in image_list: image, seed = r try: if upscale is not None: - if len(upscale) < 2: - upscale.append(0.75) - image = real_esrgan_upscale( - image, - upscale[1], - int(upscale[0]), - seed, - ) - if strength > 0: - if facetool == 'codeformer': - image = CodeFormerRestoration().process(image=image, strength=strength, device=self.device, seed=seed, fidelity=codeformer_fidelity) + if self.esrgan is not None: + if len(upscale) < 2: + upscale.append(0.75) + image = self.esrgan.process( + image, upscale[1], seed, int(upscale[0])) else: - image = run_gfpgan( - image, strength, seed, 1 - ) + print(">> ESRGAN is disabled. Image not upscaled.") + if strength > 0: + if self.gfpgan is not None and self.codeformer is not None: + if facetool == 'codeformer': + image = self.codeformer.process(image=image, strength=strength, device=self.device, seed=seed, fidelity=codeformer_fidelity) + else: + image = self.gfpgan.process(image, strength, seed) + else: + print(">> Face Restoration is disabled.") except Exception as e: print( f'>> Error running RealESRGAN or GFPGAN. Your image was not upscaled.\n{e}' @@ -560,10 +582,10 @@ class Generate: r[0] = image # to help WebGUI - front end to generator util function - def sample_to_image(self,samples): + def sample_to_image(self, samples): return self._sample_to_image(samples) - def _sample_to_image(self,samples): + def _sample_to_image(self, samples): if not self.base_generator: from ldm.dream.generator import Generator self.base_generator = Generator(self.model) @@ -606,7 +628,7 @@ class Generate: # for usage statistics device_type = choose_torch_device() if device_type == 'cuda': - torch.cuda.reset_peak_memory_stats() + torch.cuda.reset_peak_memory_stats() tic = time.time() # this does the work @@ -657,12 +679,12 @@ class Generate: f'>> loaded input image of size {image.width}x{image.height} from {path}' ) if fit: - image = self._fit_image(image,(width,height)) + image = self._fit_image(image, (width, height)) else: image = self._squeeze_image(image) return image - def _create_init_image(self,image): + def _create_init_image(self, image): image = image.convert('RGB') # print( # f'>> DEBUG: writing the image to img.png' @@ -671,7 +693,7 @@ class Generate: image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) - image = 2.0 * image - 1.0 + image = 2.0 * image - 1.0 return image.to(self.device) def _create_init_mask(self, image): @@ -680,7 +702,8 @@ class Generate: image = image.convert('RGB') # BUG: We need to use the model's downsample factor rather than hardcoding "8" from ldm.dream.generator.base import downsampling - image = image.resize((image.width//downsampling, image.height//downsampling), resample=Image.Resampling.LANCZOS) + image = image.resize((image.width//downsampling, image.height // + downsampling), resample=Image.Resampling.LANCZOS) # print( # f'>> DEBUG: writing the mask to mask.png' # ) @@ -702,7 +725,7 @@ class Generate: mask = ImageOps.invert(mask) return mask - def _has_transparency(self,image): + def _has_transparency(self, image): if image.info.get("transparency", None) is not None: return True if image.mode == "P": @@ -716,11 +739,10 @@ class Generate: return True return False - - def _check_for_erasure(self,image): + def _check_for_erasure(self, image): width, height = image.size - pixdata = image.load() - colored = 0 + pixdata = image.load() + colored = 0 for y in range(height): for x in range(width): if pixdata[x, y][3] == 0: @@ -730,28 +752,28 @@ class Generate: colored += 1 return colored == 0 - def _squeeze_image(self,image): - x,y,resize_needed = self._resolution_check(image.width,image.height) + def _squeeze_image(self, image): + x, y, resize_needed = self._resolution_check(image.width, image.height) if resize_needed: - return InitImageResizer(image).resize(x,y) + return InitImageResizer(image).resize(x, y) return image - - def _fit_image(self,image,max_dimensions): - w,h = max_dimensions + def _fit_image(self, image, max_dimensions): + w, h = max_dimensions print( f'>> image will be resized to fit inside a box {w}x{h} in size.' ) if image.width > image.height: - h = None # by setting h to none, we tell InitImageResizer to fit into the width and calculate height + h = None # by setting h to none, we tell InitImageResizer to fit into the width and calculate height elif image.height > image.width: - w = None # ditto for w + w = None # ditto for w else: pass - image = InitImageResizer(image).resize(w,h) # note that InitImageResizer does the multiple of 64 truncation internally + # note that InitImageResizer does the multiple of 64 truncation internally + image = InitImageResizer(image).resize(w, h) print( f'>> after adjusting image dimensions to be multiples of 64, init image is {image.width}x{image.height}' - ) + ) return image def _resolution_check(self, width, height, log=False): @@ -765,7 +787,7 @@ class Generate: f'>> Provided width and height must be multiples of 64. Auto-resizing to {w}x{h}' ) height = h - width = w + width = w resize_needed = True if (width * height) > (self.width * self.height): diff --git a/ldm/gfpgan/gfpgan_tools.py b/ldm/gfpgan/gfpgan_tools.py deleted file mode 100644 index 3adfc907a4..0000000000 --- a/ldm/gfpgan/gfpgan_tools.py +++ /dev/null @@ -1,168 +0,0 @@ -import torch -import warnings -import os -import sys -import numpy as np - -from PIL import Image -#from scripts.dream import create_argv_parser -from ldm.dream.args import Args - -opt = Args() -opt.parse_args() -model_path = os.path.join(opt.gfpgan_dir, opt.gfpgan_model_path) -gfpgan_model_exists = os.path.isfile(model_path) - -def run_gfpgan(image, strength, seed, upsampler_scale=4): - print(f'>> GFPGAN - Restoring Faces for image seed:{seed}') - gfpgan = None - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', category=DeprecationWarning) - warnings.filterwarnings('ignore', category=UserWarning) - - try: - if not gfpgan_model_exists: - raise Exception('GFPGAN model not found at path ' + model_path) - - sys.path.append(os.path.abspath(opt.gfpgan_dir)) - from gfpgan import GFPGANer - - bg_upsampler = _load_gfpgan_bg_upsampler( - opt.gfpgan_bg_upsampler, upsampler_scale, opt.gfpgan_bg_tile - ) - - gfpgan = GFPGANer( - model_path=model_path, - upscale=upsampler_scale, - arch='clean', - channel_multiplier=2, - bg_upsampler=bg_upsampler, - ) - except Exception: - import traceback - - print('>> Error loading GFPGAN:', file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) - - if gfpgan is None: - print( - f'>> WARNING: GFPGAN not initialized.' - ) - print( - f'>> Download https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth to {model_path}, \nor change GFPGAN directory with --gfpgan_dir.' - ) - return image - - image = image.convert('RGB') - - cropped_faces, restored_faces, restored_img = gfpgan.enhance( - np.array(image, dtype=np.uint8), - has_aligned=False, - only_center_face=False, - paste_back=True, - ) - res = Image.fromarray(restored_img) - - if strength < 1.0: - # Resize the image to the new image if the sizes have changed - if restored_img.size != image.size: - image = image.resize(res.size) - res = Image.blend(image, res, strength) - - if torch.cuda.is_available(): - torch.cuda.empty_cache() - gfpgan = None - - return res - - -def _load_gfpgan_bg_upsampler(bg_upsampler, upsampler_scale, bg_tile=400): - if bg_upsampler == 'realesrgan': - if not torch.cuda.is_available(): # CPU or MPS on M1 - use_half_precision = False - else: - use_half_precision = True - - model_path = { - 2: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth', - 4: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth', - } - - if upsampler_scale not in model_path: - return None - - from basicsr.archs.rrdbnet_arch import RRDBNet - from realesrgan import RealESRGANer - - if upsampler_scale == 4: - model = RRDBNet( - num_in_ch=3, - num_out_ch=3, - num_feat=64, - num_block=23, - num_grow_ch=32, - scale=4, - ) - if upsampler_scale == 2: - model = RRDBNet( - num_in_ch=3, - num_out_ch=3, - num_feat=64, - num_block=23, - num_grow_ch=32, - scale=2, - ) - - bg_upsampler = RealESRGANer( - scale=upsampler_scale, - model_path=model_path[upsampler_scale], - model=model, - tile=bg_tile, - tile_pad=10, - pre_pad=0, - half=use_half_precision, - ) - else: - bg_upsampler = None - - return bg_upsampler - - -def real_esrgan_upscale(image, strength, upsampler_scale, seed): - print( - f'>> Real-ESRGAN Upscaling seed:{seed} : scale:{upsampler_scale}x' - ) - - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', category=DeprecationWarning) - warnings.filterwarnings('ignore', category=UserWarning) - - try: - upsampler = _load_gfpgan_bg_upsampler( - opt.gfpgan_bg_upsampler, upsampler_scale, opt.gfpgan_bg_tile - ) - except Exception: - import traceback - - print('>> Error loading Real-ESRGAN:', file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) - - output, img_mode = upsampler.enhance( - np.array(image, dtype=np.uint8), - outscale=upsampler_scale, - alpha_upsampler=opt.gfpgan_bg_upsampler, - ) - - res = Image.fromarray(output) - - if strength < 1.0: - # Resize the image to the new image if the sizes have changed - if output.size != image.size: - image = image.resize(res.size) - res = Image.blend(image, res, strength) - - if torch.cuda.is_available(): - torch.cuda.empty_cache() - upsampler = None - - return res diff --git a/ldm/restoration/codeformer/codeformer.py b/ldm/restoration/codeformer/codeformer.py index ff81085793..f725ef9144 100644 --- a/ldm/restoration/codeformer/codeformer.py +++ b/ldm/restoration/codeformer/codeformer.py @@ -2,12 +2,20 @@ import os import torch import numpy as np import warnings +import sys pretrained_model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth' class CodeFormerRestoration(): - def __init__(self) -> None: - pass + def __init__(self, + codeformer_dir='ldm/restoration/codeformer', + codeformer_model_path='weights/codeformer.pth') -> None: + self.model_path = os.path.join(codeformer_dir, codeformer_model_path) + self.codeformer_model_exists = os.path.isfile(self.model_path) + + if not self.codeformer_model_exists: + print('## NOT FOUND: CodeFormer model not found at ' + self.model_path) + sys.path.append(os.path.abspath(codeformer_dir)) def process(self, image, strength, device, seed=None, fidelity=0.75): if seed is not None: diff --git a/ldm/restoration/gfpgan/gfpgan.py b/ldm/restoration/gfpgan/gfpgan.py new file mode 100644 index 0000000000..643d1e9559 --- /dev/null +++ b/ldm/restoration/gfpgan/gfpgan.py @@ -0,0 +1,76 @@ +import torch +import warnings +import os +import sys +import numpy as np + +from PIL import Image + + +class GFPGAN(): + def __init__( + self, + gfpgan_dir='src/gfpgan', + gfpgan_model_path='experiments/pretrained_models/GFPGANv1.3.pth') -> None: + + self.model_path = os.path.join(gfpgan_dir, gfpgan_model_path) + self.gfpgan_model_exists = os.path.isfile(self.model_path) + + if not self.gfpgan_model_exists: + raise Exception( + 'GFPGAN model not found at path ' + self.model_path) + sys.path.append(os.path.abspath(gfpgan_dir)) + + def model_exists(self): + return os.path.isfile(self.model_path) + + def process(self, image, strength: float, seed: str = None): + if seed is not None: + print(f'>> GFPGAN - Restoring Faces for image seed:{seed}') + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + warnings.filterwarnings('ignore', category=UserWarning) + try: + from gfpgan import GFPGANer + self.gfpgan = GFPGANer( + model_path=self.model_path, + upscale=1, + arch='clean', + channel_multiplier=2, + bg_upsampler=None, + ) + except Exception: + import traceback + print('>> Error loading GFPGAN:', file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + if self.gfpgan is None: + print( + f'>> WARNING: GFPGAN not initialized.' + ) + print( + f'>> Download https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth to {self.model_path}, \nor change GFPGAN directory with --gfpgan_dir.' + ) + + image = image.convert('RGB') + + _, _, restored_img = self.gfpgan.enhance( + np.array(image, dtype=np.uint8), + has_aligned=False, + only_center_face=False, + paste_back=True, + ) + res = Image.fromarray(restored_img) + + if strength < 1.0: + # Resize the image to the new image if the sizes have changed + if restored_img.size != image.size: + image = image.resize(res.size) + res = Image.blend(image, res, strength) + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + self.gfpgan = None + + return res diff --git a/ldm/restoration/realesrgan/realesrgan.py b/ldm/restoration/realesrgan/realesrgan.py new file mode 100644 index 0000000000..9823a2cbf4 --- /dev/null +++ b/ldm/restoration/realesrgan/realesrgan.py @@ -0,0 +1,102 @@ +import torch +import warnings +import numpy as np + +from PIL import Image + + +class ESRGAN(): + def __init__(self, bg_tile_size=400) -> None: + self.bg_tile_size = bg_tile_size + + if not torch.cuda.is_available(): # CPU or MPS on M1 + use_half_precision = False + else: + use_half_precision = True + + def load_esrgan_bg_upsampler(self, upsampler_scale): + if not torch.cuda.is_available(): # CPU or MPS on M1 + use_half_precision = False + else: + use_half_precision = True + + model_path = { + 2: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth', + 4: 'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth', + } + + if upsampler_scale not in model_path: + return None + else: + from basicsr.archs.rrdbnet_arch import RRDBNet + from realesrgan import RealESRGANer + + if upsampler_scale == 4: + model = RRDBNet( + num_in_ch=3, + num_out_ch=3, + num_feat=64, + num_block=23, + num_grow_ch=32, + scale=4, + ) + if upsampler_scale == 2: + model = RRDBNet( + num_in_ch=3, + num_out_ch=3, + num_feat=64, + num_block=23, + num_grow_ch=32, + scale=2, + ) + + bg_upsampler = RealESRGANer( + scale=upsampler_scale, + model_path=model_path[upsampler_scale], + model=model, + tile=self.bg_tile_size, + tile_pad=10, + pre_pad=0, + half=use_half_precision, + ) + + return bg_upsampler + + def process(self, image, strength: float, seed: str = None, upsampler_scale: int = 2): + if seed is not None: + print( + f'>> Real-ESRGAN Upscaling seed:{seed} : scale:{upsampler_scale}x' + ) + + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=DeprecationWarning) + warnings.filterwarnings('ignore', category=UserWarning) + + try: + upsampler = self.load_esrgan_bg_upsampler(upsampler_scale) + except Exception: + import traceback + import sys + + print('>> Error loading Real-ESRGAN:', file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + + output, _ = upsampler.enhance( + np.array(image, dtype=np.uint8), + outscale=upsampler_scale, + alpha_upsampler='realesrgan', + ) + + res = Image.fromarray(output) + + if strength < 1.0: + # Resize the image to the new image if the sizes have changed + if output.size != image.size: + image = image.resize(res.size) + res = Image.blend(image, res, strength) + + if torch.cuda.is_available(): + torch.cuda.empty_cache() + upsampler = None + + return res diff --git a/ldm/restoration/restoration.py b/ldm/restoration/restoration.py new file mode 100644 index 0000000000..d9caebd4fa --- /dev/null +++ b/ldm/restoration/restoration.py @@ -0,0 +1,34 @@ +class Restoration(): + def __init__(self, gfpgan_dir='./src/gfpgan', gfpgan_model_path='experiments/pretrained_models/GFPGANv1.3.pth', esrgan_bg_tile=400) -> None: + self.gfpgan_dir = gfpgan_dir + self.gfpgan_model_path = gfpgan_model_path + self.esrgan_bg_tile = esrgan_bg_tile + + def load_face_restore_models(self): + # Load GFPGAN + gfpgan = self.load_gfpgan() + if gfpgan.gfpgan_model_exists: + print('>> GFPGAN Initialized') + + # Load CodeFormer + codeformer = self.load_codeformer() + if codeformer.codeformer_model_exists: + print('>> CodeFormer Initialized') + + return gfpgan, codeformer + + # Face Restore Models + def load_gfpgan(self): + from ldm.restoration.gfpgan.gfpgan import GFPGAN + return GFPGAN(self.gfpgan_dir, self.gfpgan_model_path) + + def load_codeformer(self): + from ldm.restoration.codeformer.codeformer import CodeFormerRestoration + return CodeFormerRestoration() + + # Upscale Models + def load_ersgan(self): + from ldm.restoration.realesrgan.realesrgan import ESRGAN + esrgan = ESRGAN(self.esrgan_bg_tile) + print('>> ESRGAN Initialized') + return esrgan; \ No newline at end of file diff --git a/scripts/dream.py b/scripts/dream.py index 857b5637aa..dcc54aa15f 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -42,7 +42,25 @@ def main(): import transformers transformers.logging.set_verbosity_error() - # creating a simple Generate object with a handful of + # Loading Face Restoration and ESRGAN Modules + try: + gfpgan, codeformer, esrgan = None, None, None + from ldm.restoration.restoration import Restoration + restoration = Restoration(opt.gfpgan_dir, opt.gfpgan_model_path, opt.esrgan_bg_tile) + if opt.restore: + gfpgan, codeformer = restoration.load_face_restore_models() + else: + print('>> Face Restoration Disabled') + if opt.esrgan: + esrgan = restoration.load_ersgan() + else: + print('>> ESRGAN Disabled') + except (ModuleNotFoundError, ImportError): + import traceback + print(traceback.format_exc(), file=sys.stderr) + print('>> You may need to install the ESRGAN and/or GFPGAN modules') + + # creating a simple text2image object with a handful of # defaults passed on the command line. # additional parameters will be added (or overriden) during # the user input loop @@ -53,6 +71,9 @@ def main(): sampler_name = opt.sampler_name, embedding_path = opt.embedding_path, full_precision = opt.full_precision, + gfpgan=gfpgan, + codeformer=codeformer, + esrgan=esrgan ) except (FileNotFoundError, IOError, KeyError) as e: print(f'{e}. Aborting.') @@ -89,7 +110,7 @@ def main(): # web server loops forever if opt.web: - dream_server_loop(gen, opt.host, opt.port, opt.outdir) + dream_server_loop(gen, opt.host, opt.port, opt.outdir, gfpgan) sys.exit(0) main_loop(gen, opt, infile) @@ -312,7 +333,7 @@ def get_next_command(infile=None) -> str: # command string print(f'#{command}') return command -def dream_server_loop(gen, host, port, outdir): +def dream_server_loop(gen, host, port, outdir, gfpgan): print('\n* --web was specified, starting web server...') # Change working directory to the stable-diffusion directory os.chdir( @@ -322,6 +343,10 @@ def dream_server_loop(gen, host, port, outdir): # Start server DreamServer.model = gen # misnomer in DreamServer - this is not the model you are looking for DreamServer.outdir = outdir + DreamServer.gfpgan_model_exists = False + if gfpgan is not None: + DreamServer.gfpgan_model_exists = gfpgan.gfpgan_model_exists + dream_server = ThreadingDreamServer((host, port)) print(">> Started Stable Diffusion dream server!") if host == '0.0.0.0': @@ -345,8 +370,7 @@ def write_log_message(results, log_path): log_lines = [f'{path}: {prompt}\n' for path, prompt in results] for l in log_lines: output_cntr += 1 - print(f'[{output_cntr}] {l}',end='') - + print(f'[{output_cntr}] {l}', end='') with open(log_path, 'a', encoding='utf-8') as file: file.writelines(log_lines) From f816526d0d56d20fc3e801d007a28e0e739da6db Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 18 Sep 2022 17:26:09 -0400 Subject: [PATCH 3/6] add ability to post-process images from the CLI - supports gfpgan, esrgan, codeformer and embiggen - To use: dream> !fix ./outputs/img-samples/000056.292144555.png -ft gfpgan -U2 -G0.8 dream> !fix ./outputs/img-samples/000056.292144555.png -ft codeformer -G 0.8 dream> !fix ./outputs/img-samples/000056.29214455.png -U4 dream> !fix ./outputs/img-samples/000056.292144555.png -embiggen 1.5 The first example invokes gfpgan to fix faces and esrgan to upscale. The second example invokes codeformer to fix faces, no upscaling The third example uses esrgan to upscale 4X The four example runs embiggen to enlarge 1.5X - This is very preliminary work. There are some anomalies to note: 1. The syntax is non-obvious. I would prefer something like: !fix esrgan,gfpgan !fix esrgan !fix embiggen,codeformer However, this will require refactoring the gfpgan and embiggen code. 2. Images generated using gfpgan, esrgan or codeformer all are named "xxxxxx.xxxxxx.postprocessed.png" and the original is saved. However, the prefix is a new one that is not related to the original. 3. Images generated using embiggen are named "xxxxx.xxxxxxx.png", and once again the prefix is new. I'm not sure whether the prefix should be aligned with the original file's prefix or not. Probably not, but opinions welcome. --- ldm/dream/args.py | 14 +++++-- ldm/dream/readline.py | 4 +- ldm/generate.py | 92 ++++++++++++++++++++++++++++++++++++++++++ scripts/dream.py | 51 ++++++++++++++++++----- scripts/sd-metadata.py | 0 5 files changed, 146 insertions(+), 15 deletions(-) mode change 100644 => 100755 scripts/sd-metadata.py diff --git a/ldm/dream/args.py b/ldm/dream/args.py index f0feacad73..424dadc1a4 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -400,7 +400,10 @@ class Args(object): # This creates the parser that processes commands on the dream> command line def _create_dream_cmd_parser(self): parser = argparse.ArgumentParser( - description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12' + description=""" + Generate example: dream> a fantastic alien landscape -W576 -H512 -s60 -n4 + Postprocess example: dream> !pp 0000045.4829112.png -G1 -U4 -ft codeformer + """ ) render_group = parser.add_argument_group('General rendering') img2img_group = parser.add_argument_group('Image-to-image and inpainting') @@ -523,6 +526,7 @@ class Args(object): '-ft', '--facetool', type=str, + default='gfpgan', help='Select the face restoration AI to use: gfpgan, codeformer', ) postprocessing_group.add_argument( @@ -530,7 +534,7 @@ class Args(object): '--gfpgan_strength', type=float, help='The strength at which to apply the GFPGAN model to the result, in order to improve faces.', - default=0, + default=0.0, ) postprocessing_group.add_argument( '-cf', @@ -690,8 +694,10 @@ def metadata_loads(metadata): images = [metadata['sd-metadata']['image']] for image in images: # repack the prompt and variations - image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']]) - image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']]) + if 'prompt' in image: + image['prompt'] = ','.join([':'.join([x['prompt'], str(x['weight'])]) for x in image['prompt']]) + if 'variations' in image: + image['variations'] = ','.join([':'.join([str(x['seed']),str(x['weight'])]) for x in image['variations']]) # fix a bit of semantic drift here image['sampler_name']=image.pop('sampler') opt = Args() diff --git a/ldm/dream/readline.py b/ldm/dream/readline.py index da94f5a61f..42f4a3ac01 100644 --- a/ldm/dream/readline.py +++ b/ldm/dream/readline.py @@ -26,8 +26,8 @@ class Completer: '--init_color')): return self._path_completions(text, state, ('.png','.jpg','.jpeg')) - if buffer.strip().endswith('cd') or text.startswith(('.', '/')): - return self._path_completions(text, state, ()) + if buffer.strip().endswith('pp') or text.startswith(('.', '/')): + return self._path_completions(text, state, ('.png','.jpg','.jpeg')) response = None if state == 0: diff --git a/ldm/generate.py b/ldm/generate.py index 2bd53ac57b..80a6c4c4be 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -308,6 +308,7 @@ class Generate: strength = strength or self.strength self.seed = seed self.log_tokenization = log_tokenization + self.step_callback = step_callback with_variations = [] if with_variations is None else with_variations # will instantiate the model or return it from cache @@ -440,6 +441,97 @@ class Generate: ) return results + # this needs to be generalized to all sorts of postprocessors, but for now + # sufficient to support most use cases + def apply_postprocessor( + self, + image_path, + tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', or 'embiggen' + gfpgan_strength = 0.0, + codeformer_fidelity = 0.75, + save_original = True, # to get new name + upscale = None, + callback = None, + opt = None, + ): + # retrieve the seed from the image; + # note that we will try both the new way and the old way, since not all files have the + # metadata (yet) + seed = None + image_metadata = None + prompt = None + try: + meta = retrieve_metadata(image_path) + args = metadata_loads(meta) + if len(args) > 1: + print("* Can't postprocess a grid") + return + seed = args[0].seed + prompt = args[0].prompt + print(f'>> retrieved seed {seed} and prompt "{prompt}" from {image_path}') + except: + m = re.search('(\d+)\.png$',image_path) + if m: + seed = m.group(1) + + if not seed: + print('* Could not recover seed for image. Replacing with 42. This will not affect image quality') + seed = 42 + + # face fixers and esrgan take an Image, but embiggen takes a path + image = Image.open(image_path) + + # Note that we need to adopt a uniform API for the postprocessors. + # This is completely ad hoc ATCM + if tool in ('gfpgan','codeformer','upscale'): + if tool == 'gfpgan': + facetool = 'gfpgan' + elif tool == 'codeformer': + facetool = 'codeformer' + elif tool == 'upscale': + facetool = 'gfpgan' # but won't be run + gfpgan_strength = 0 + return self.upscale_and_reconstruct( + [[image,seed]], + facetool = facetool, + strength = gfpgan_strength, + codeformer_fidelity = codeformer_fidelity, + save_original = save_original, + upscale = upscale, + image_callback = callback, + ) + + elif tool == 'embiggen': + # fetch the metadata from the image + generator = self._make_embiggen() + uc, c = get_uc_and_c( + prompt, model =self.model, + skip_normalize=opt.skip_normalize, + log_tokens =opt.log_tokenization + ) + # embiggen takes a image path (sigh) + generator.generate( + prompt, + sampler = self.sampler, + steps = opt.steps, + cfg_scale = opt.cfg_scale, + ddim_eta = self.ddim_eta, + conditioning= (uc, c), + init_img = image_path, # not the Image! (sigh) + init_image = image, # embiggen wants both! (sigh) + strength = opt.strength, + width = opt.width, + height = opt.height, + embiggen = opt.embiggen, + embiggen_tiles = opt.embiggen_tiles, + image_callback = callback, + ) + + else: + print(f'* postprocessing tool {tool} is not yet supported') + return None + + def _make_images(self, img_path, mask_path, width, height, fit=False): init_image = None init_mask = None diff --git a/scripts/dream.py b/scripts/dream.py index dcc54aa15f..989db58f9f 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -132,6 +132,8 @@ def main_loop(gen, opt, infile): name_max = 255 while not done: + operation = 'generate' # default operation, alternative is 'postprocess' + try: command = get_next_command(infile) except EOFError: @@ -152,8 +154,14 @@ def main_loop(gen, opt, infile): if command.startswith( '!dream' ): # in case a stored prompt still contains the !dream command - command.replace('!dream','',1) + command = command.replace('!dream ','',1) + if command.startswith( + '!fix' + ): + command = command.replace('!fix ','',1) + operation = 'postprocess' + if opt.parse_cmd(command) is None: continue if len(opt.prompt) == 0: @@ -166,7 +174,7 @@ def main_loop(gen, opt, infile): if not opt.height: opt.height = model_config.height - # retrieve previous value! + # retrieve previous value of init image if requested if opt.init_img is not None and re.match('^-\\d+$', opt.init_img): try: opt.init_img = last_results[int(opt.init_img)][0] @@ -177,7 +185,8 @@ def main_loop(gen, opt, infile): opt.init_img = None continue - if opt.seed is not None and opt.seed < 0: # retrieve previous value! + # retrieve previous valueof seed if requested + if opt.seed is not None and opt.seed < 0: try: opt.seed = last_results[opt.seed][1] print(f'>> Reusing previous seed {opt.seed}') @@ -276,12 +285,16 @@ def main_loop(gen, opt, infile): results.append([path, formatted_dream_prompt]) last_results.append([path, seed]) - catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts - gen.prompt2image( - image_callback=image_writer, - catch_interrupts=catch_ctrl_c, - **vars(opt) - ) + if operation == 'generate': + catch_ctrl_c = infile is None # if running interactively, we catch keyboard interrupts + gen.prompt2image( + image_callback=image_writer, + catch_interrupts=catch_ctrl_c, + **vars(opt) + ) + elif operation == 'postprocess': + print(f'>> fixing {opt.prompt}') + do_postprocess(gen,opt,image_writer) if opt.grid and len(grid_images) > 0: grid_img = make_grid(list(grid_images.values())) @@ -319,7 +332,27 @@ def main_loop(gen, opt, infile): print('goodbye!') +def do_postprocess (gen, opt, callback): + file_path = opt.prompt # treat the prompt as the file pathname + if os.path.dirname(file_path) == '': #basename given + file_path = os.path.join(opt.outdir,file_path) + if not os.path.exists(file_path): + print(f'* file {file_path} does not exist') + return + tool = opt.facetool if opt.gfpgan_strength > 0 else ('embiggen' if opt.embiggen else 'upscale') + opt.save_original = True # do not overwrite old image! + return gen.apply_postprocessor( + image_path = opt.prompt, + tool = tool, + gfpgan_strength = opt.gfpgan_strength, + codeformer_fidelity = opt.codeformer_fidelity, + save_original = opt.save_original, + upscale = opt.upscale, + callback = callback, + opt = opt, + ) + def get_next_command(infile=None) -> str: # command string if infile is None: command = input('dream> ') diff --git a/scripts/sd-metadata.py b/scripts/sd-metadata.py old mode 100644 new mode 100755 From c14bdcb8fd89aa4946ebe9fba2e631bf36aaa787 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 19 Sep 2022 13:59:43 -0400 Subject: [PATCH 4/6] combine PRs #690 and #683 --- ldm/dream/args.py | 14 ++++++++------ ldm/dream/generator/embiggen.py | 2 +- .../restoration.py => dream/restoration/base.py} | 8 ++++---- .../codeformer => dream/restoration}/codeformer.py | 8 ++++---- ldm/dream/restoration/codeformer/weights/README | 3 +++ .../restoration}/codeformer_arch.py | 4 ++-- .../gfpgan => dream/restoration}/gfpgan.py | 0 .../realesrgan => dream/restoration}/realesrgan.py | 0 .../codeformer => dream/restoration}/vqgan_arch.py | 0 ldm/generate.py | 3 ++- ldm/restoration/codeformer/weights/.gitkeep | 0 scripts/dream.py | 6 +++--- scripts/preload_models.py | 4 ++-- 13 files changed, 29 insertions(+), 23 deletions(-) rename ldm/{restoration/restoration.py => dream/restoration/base.py} (81%) rename ldm/{restoration/codeformer => dream/restoration}/codeformer.py (93%) create mode 100644 ldm/dream/restoration/codeformer/weights/README rename ldm/{restoration/codeformer => dream/restoration}/codeformer_arch.py (99%) rename ldm/{restoration/gfpgan => dream/restoration}/gfpgan.py (100%) rename ldm/{restoration/realesrgan => dream/restoration}/realesrgan.py (100%) rename ldm/{restoration/codeformer => dream/restoration}/vqgan_arch.py (100%) delete mode 100644 ldm/restoration/codeformer/weights/.gitkeep diff --git a/ldm/dream/args.py b/ldm/dream/args.py index 424dadc1a4..50bfe357db 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -350,14 +350,16 @@ class Args(object): ) # Restoration related args postprocessing_group.add_argument( - '--restore', - action='store_true', - help='Enable Face Restoration', + '--no_restore', + dest='restore', + action='store_false', + help='Disable face restoration with GFPGAN or codeformer', ) postprocessing_group.add_argument( - '--esrgan', - action='store_true', - help='Enable Upscaling', + '--no_upscale', + dest='esrgan', + action='store_false', + help='Disable upscaling with ESRGAN', ) postprocessing_group.add_argument( '--esrgan_bg_tile', diff --git a/ldm/dream/generator/embiggen.py b/ldm/dream/generator/embiggen.py index e196e3005f..69046db4dd 100644 --- a/ldm/dream/generator/embiggen.py +++ b/ldm/dream/generator/embiggen.py @@ -79,7 +79,7 @@ class Embiggen(Generator): initsuperwidth = round(initsuperwidth*embiggen[0]) initsuperheight = round(initsuperheight*embiggen[0]) if embiggen[1] > 0: # No point in ESRGAN upscaling if strength is set zero - from ldm.restoration.realesrgan import ESRGAN + from ldm.dream.restoration.realesrgan import ESRGAN esrgan = ESRGAN() print( f'>> ESRGAN upscaling init image prior to cutting with Embiggen with strength {embiggen[1]}') diff --git a/ldm/restoration/restoration.py b/ldm/dream/restoration/base.py similarity index 81% rename from ldm/restoration/restoration.py rename to ldm/dream/restoration/base.py index d9caebd4fa..539301d802 100644 --- a/ldm/restoration/restoration.py +++ b/ldm/dream/restoration/base.py @@ -19,16 +19,16 @@ class Restoration(): # Face Restore Models def load_gfpgan(self): - from ldm.restoration.gfpgan.gfpgan import GFPGAN + from ldm.dream.restoration.gfpgan import GFPGAN return GFPGAN(self.gfpgan_dir, self.gfpgan_model_path) def load_codeformer(self): - from ldm.restoration.codeformer.codeformer import CodeFormerRestoration + from ldm.dream.restoration.codeformer import CodeFormerRestoration return CodeFormerRestoration() # Upscale Models def load_ersgan(self): - from ldm.restoration.realesrgan.realesrgan import ESRGAN + from ldm.dream.restoration.realesrgan import ESRGAN esrgan = ESRGAN(self.esrgan_bg_tile) print('>> ESRGAN Initialized') - return esrgan; \ No newline at end of file + return esrgan; diff --git a/ldm/restoration/codeformer/codeformer.py b/ldm/dream/restoration/codeformer.py similarity index 93% rename from ldm/restoration/codeformer/codeformer.py rename to ldm/dream/restoration/codeformer.py index f725ef9144..78648865aa 100644 --- a/ldm/restoration/codeformer/codeformer.py +++ b/ldm/dream/restoration/codeformer.py @@ -8,7 +8,7 @@ pretrained_model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v class CodeFormerRestoration(): def __init__(self, - codeformer_dir='ldm/restoration/codeformer', + codeformer_dir='ldm/dream/restoration/codeformer', codeformer_model_path='weights/codeformer.pth') -> None: self.model_path = os.path.join(codeformer_dir, codeformer_model_path) self.codeformer_model_exists = os.path.isfile(self.model_path) @@ -27,7 +27,7 @@ class CodeFormerRestoration(): from basicsr.utils.download_util import load_file_from_url from basicsr.utils import img2tensor, tensor2img from facexlib.utils.face_restoration_helper import FaceRestoreHelper - from ldm.restoration.codeformer.codeformer_arch import CodeFormer + from ldm.dream.restoration.codeformer_arch import CodeFormer from torchvision.transforms.functional import normalize from PIL import Image @@ -35,7 +35,7 @@ class CodeFormerRestoration(): cf = cf_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(device) - checkpoint_path = load_file_from_url(url=pretrained_model_url, model_dir=os.path.abspath('ldm/restoration/codeformer/weights'), progress=True) + checkpoint_path = load_file_from_url(url=pretrained_model_url, model_dir=os.path.abspath('ldm/dream/restoration/codeformer/weights'), progress=True) checkpoint = torch.load(checkpoint_path)['params_ema'] cf.load_state_dict(checkpoint) cf.eval() @@ -81,4 +81,4 @@ class CodeFormerRestoration(): cf = None - return res \ No newline at end of file + return res diff --git a/ldm/dream/restoration/codeformer/weights/README b/ldm/dream/restoration/codeformer/weights/README new file mode 100644 index 0000000000..1cc40dc026 --- /dev/null +++ b/ldm/dream/restoration/codeformer/weights/README @@ -0,0 +1,3 @@ +To use codeformer face reconstruction, you will need to copy +https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth +into this directory. diff --git a/ldm/restoration/codeformer/codeformer_arch.py b/ldm/dream/restoration/codeformer_arch.py similarity index 99% rename from ldm/restoration/codeformer/codeformer_arch.py rename to ldm/dream/restoration/codeformer_arch.py index 082d0d5eca..cf6c5c91be 100644 --- a/ldm/restoration/codeformer/codeformer_arch.py +++ b/ldm/dream/restoration/codeformer_arch.py @@ -5,7 +5,7 @@ from torch import nn, Tensor import torch.nn.functional as F from typing import Optional, List -from ldm.restoration.codeformer.vqgan_arch import * +from ldm.dream.restoration.vqgan_arch import * from basicsr.utils import get_root_logger from basicsr.utils.registry import ARCH_REGISTRY @@ -273,4 +273,4 @@ class CodeFormer(VQAutoEncoder): x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w) out = x # logits doesn't need softmax before cross_entropy loss - return out, logits, lq_feat \ No newline at end of file + return out, logits, lq_feat diff --git a/ldm/restoration/gfpgan/gfpgan.py b/ldm/dream/restoration/gfpgan.py similarity index 100% rename from ldm/restoration/gfpgan/gfpgan.py rename to ldm/dream/restoration/gfpgan.py diff --git a/ldm/restoration/realesrgan/realesrgan.py b/ldm/dream/restoration/realesrgan.py similarity index 100% rename from ldm/restoration/realesrgan/realesrgan.py rename to ldm/dream/restoration/realesrgan.py diff --git a/ldm/restoration/codeformer/vqgan_arch.py b/ldm/dream/restoration/vqgan_arch.py similarity index 100% rename from ldm/restoration/codeformer/vqgan_arch.py rename to ldm/dream/restoration/vqgan_arch.py diff --git a/ldm/generate.py b/ldm/generate.py index 80a6c4c4be..d3ccc8bb98 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -27,7 +27,8 @@ from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler -from ldm.dream.pngwriter import PngWriter +from ldm.dream.pngwriter import PngWriter, retrieve_metadata +from ldm.dream.args import metadata_loads from ldm.dream.image_util import InitImageResizer from ldm.dream.devices import choose_torch_device from ldm.dream.conditioning import get_uc_and_c diff --git a/ldm/restoration/codeformer/weights/.gitkeep b/ldm/restoration/codeformer/weights/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/scripts/dream.py b/scripts/dream.py index 989db58f9f..a74a3409a5 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -45,16 +45,16 @@ def main(): # Loading Face Restoration and ESRGAN Modules try: gfpgan, codeformer, esrgan = None, None, None - from ldm.restoration.restoration import Restoration + from ldm.dream.restoration import Restoration restoration = Restoration(opt.gfpgan_dir, opt.gfpgan_model_path, opt.esrgan_bg_tile) if opt.restore: gfpgan, codeformer = restoration.load_face_restore_models() else: - print('>> Face Restoration Disabled') + print('>> Face restoration disabled') if opt.esrgan: esrgan = restoration.load_ersgan() else: - print('>> ESRGAN Disabled') + print('>> Upscaling disabled') except (ModuleNotFoundError, ImportError): import traceback print(traceback.format_exc(), file=sys.stderr) diff --git a/scripts/preload_models.py b/scripts/preload_models.py index 6c082d80e9..35bcea6ce1 100644 --- a/scripts/preload_models.py +++ b/scripts/preload_models.py @@ -103,11 +103,11 @@ print('preloading CodeFormer model file...') try: import urllib.request model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth' - model_dest = 'ldm/restoration/codeformer/weights/codeformer.pth' + model_dest = 'ldm/dream/restoration/codeformer/weights/codeformer.pth' if not os.path.exists(model_dest): print('downloading codeformer model file...') os.makedirs(os.path.dirname(model_dest), exist_ok=True) - urllib.request.urlretrieve(model_path,model_dest) + urllib.request.urlretrieve(model_url,model_dest) except Exception: import traceback print('Error loading CodeFormer:') From 23af057e5c5c96d2a971e5b3e68f9da8ea47e5f8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 19 Sep 2022 14:54:52 -0400 Subject: [PATCH 5/6] tweaks to make postprocess fixing work better - modify strength of embiggen to reduce tiling ghosts - normalize naming of postprocessed files (could improve more to avoid name collisions) - move restoration modules under ldm.dream --- docs/features/EMBIGGEN.md | 1 - ldm/dream/generator/embiggen.py | 3 +++ ldm/generate.py | 2 ++ scripts/dream.py | 24 ++++++++++++++++++++---- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/features/EMBIGGEN.md b/docs/features/EMBIGGEN.md index 70f35fe758..c879102160 100644 --- a/docs/features/EMBIGGEN.md +++ b/docs/features/EMBIGGEN.md @@ -55,7 +55,6 @@ and it can also be less than one if the init_img is too big. Esrgan_strength defaults to 0.75, and the overlap_ratio defaults to 0.25, both are optional. - Unlike Img2Img, the `--width` (`-W`) and `--height` (`-H`) arguments do not control the size of the image as a whole, but the size of the tiles used to Embiggen the image. diff --git a/ldm/dream/generator/embiggen.py b/ldm/dream/generator/embiggen.py index 69046db4dd..b65957c386 100644 --- a/ldm/dream/generator/embiggen.py +++ b/ldm/dream/generator/embiggen.py @@ -62,6 +62,9 @@ class Embiggen(Generator): embiggen_tiles = list(map(lambda n: n-1, embiggen_tiles)) embiggen_tiles.sort() + if strength >= 0.5: + print(f'* WARNING: Embiggen may produce mirror motifs if the strength (-f) is too high (currently {strength}). Try values between 0.35-0.45.') + # Prep img2img generator, since we wrap over it gen_img2img = Img2Img(self.model) diff --git a/ldm/generate.py b/ldm/generate.py index d3ccc8bb98..0823c9d661 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -510,6 +510,8 @@ class Generate: skip_normalize=opt.skip_normalize, log_tokens =opt.log_tokenization ) + opt.strength = 0.40 + print(f'>> Setting img2img strength to {opt.strength} for happy embiggening') # embiggen takes a image path (sigh) generator.generate( prompt, diff --git a/scripts/dream.py b/scripts/dream.py index a74a3409a5..8b2dbbc52c 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -243,9 +243,9 @@ def main_loop(gen, opt, infile): # Here is where the images are actually generated! last_results = [] try: - file_writer = PngWriter(current_outdir) - prefix = file_writer.unique_prefix() - results = [] # list of filename, prompt pairs + file_writer = PngWriter(current_outdir) + prefix = file_writer.unique_prefix() + results = [] # list of filename, prompt pairs grid_images = dict() # seed -> Image, only used if `opt.grid` prior_variations = opt.with_variations or [] first_seed = opt.seed @@ -257,7 +257,9 @@ def main_loop(gen, opt, infile): if opt.grid: grid_images[seed] = image else: - if upscaled and opt.save_original: + if operation == 'postprocess': + filename = choose_postprocess_name(opt.prompt) + elif upscaled and opt.save_original: filename = f'{prefix}.{seed}.postprocessed.png' else: filename = f'{prefix}.{seed}.png' @@ -268,6 +270,8 @@ def main_loop(gen, opt, infile): formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed) elif len(prior_variations) > 0: formatted_dream_prompt = opt.dream_prompt_str(seed=first_seed) + elif operation == 'postprocess': + formatted_dream_prompt = '!fix '+opt.dream_prompt_str(seed=seed) else: formatted_dream_prompt = opt.dream_prompt_str(seed=seed) path = file_writer.save_image_and_prompt_to_png( @@ -353,6 +357,18 @@ def do_postprocess (gen, opt, callback): opt = opt, ) +def choose_postprocess_name(original_filename): + basename,_ = os.path.splitext(os.path.basename(original_filename)) + if re.search('\d+\.\d+$',basename): + return f'{basename}.fixed.png' + match = re.search('(\d+\.\d+)\.fixed(-(\d+))?$',basename) + if match: + counter = match.group(3) or 0 + return '{prefix}-{counter:02d}.png'.format(prefix=match.group(1), counter=int(counter)+1) + else: + return f'{basename}.fixed.png' + + def get_next_command(infile=None) -> str: # command string if infile is None: command = input('dream> ') From 43c9288534dadc5de83a3b8e7e9179aa4959ce4e Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 20 Sep 2022 23:01:21 -0400 Subject: [PATCH 6/6] Update UPSCALE.md --- docs/features/UPSCALE.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/docs/features/UPSCALE.md b/docs/features/UPSCALE.md index db5649ecdb..d90e637488 100644 --- a/docs/features/UPSCALE.md +++ b/docs/features/UPSCALE.md @@ -2,15 +2,13 @@ title: Upscale --- -## **Intro** +## Intro The script provides the ability to restore faces and upscale. You can apply these operations at the time you generate the images, or at any time to a previously-generated PNG file, using -the [!fix](#Fixing Previously-Generated Images) command. +the [!fix](#fixing-previously-generated-images) command. -# :material-image-size-select-large: Upscale - -## **Face Fixing** +## Face Fixing The default face restoration module is GFPGAN. The default upscale is Real-ESRGAN. For an alternative face restoration module, see [CodeFormer Support] below. @@ -46,11 +44,11 @@ other GFPGAN related boot arguments if you wish to customize further._ may run `python3 scripts/preload_models.py` after you have installed GFPGAN and all its dependencies. -## **Usage** +## Usage You will now have access to two new prompt arguments. -### **Upscaling** +### Upscaling `-U : ` @@ -64,7 +62,7 @@ retain some of those for natural looking results, we recommend using values betw If you do not explicitly specify an upscaling_strength, it will default to 0.75. -### **Face Restoration** +### Face Restoration `-G : ` @@ -81,7 +79,7 @@ When you use either `-U` or `-G`, the final result you get is upscaled or face m to save the original Stable Diffusion generation, you can use the `-save_orig` prompt argument to save the original unaffected version too. -### **Example Usage** +### Example Usage ```bash dream> superman dancing with a panda bear -U 2 0.6 -G 0.4 @@ -121,13 +119,13 @@ saving it to `ldm/restoration/codeformer/weights` folder. You can use `-ft` prompt argument to swap between CodeFormer and the default GFPGAN. The above mentioned `-G` prompt argument will allow you to control the strength of the restoration effect. -### **Usage:** +### Usage: The following command will perform face restoration with CodeFormer instead of the default gfpgan. ` -G 0.8 -ft codeformer` -**Other Options:** +### Other Options: - `-cf` - cf or CodeFormer Fidelity takes values between `0` and `1`. 0 produces high quality results but low accuracy and 1 produces lower quality results but higher accuacy to your original @@ -157,7 +155,7 @@ dream> !fix ./outputs/img-samples/000044.2945021133.png -G 0.8 -U 2 A new file named `000044.2945021133.fixed.png` will be created in the output directory. Note that the `!fix` command does not replace the original file, unlike the behavior at generate time. -**Disabling:** +### Disabling: If, for some reason, you do not wish to load the GFPGAN and/or ESRGAN libraries, you can disable them on the dream.py command line with the `--no_restore` and `--no_upscale` options, respectively.