From 5711b6d611ed7fb892ade5e45ebf17850d3abb0d Mon Sep 17 00:00:00 2001
From: Sean McLellan <sean@baristalabs.io>
Date: Thu, 25 Aug 2022 22:57:30 -0400
Subject: [PATCH 1/4] Add optional GFPGAN support

---
 README.md        | 30 +++++++++++++++++++
 ldm/simplet2i.py | 31 ++++++++++++++++++--
 scripts/dream.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 2af4a2a53a..6260032d3f 100644
--- a/README.md
+++ b/README.md
@@ -80,6 +80,36 @@ You may also pass a -v<count> option to generate count variants on the original
 passing the first generated image back into img2img the requested number of times. It generates interesting
 variants.
 
+## GFPGAN Support
+
+This script also provides the ability to invoke GFPGAN after image generation. Doing so will enhance faces
+and optionally upscale the image to a higher resolution.
+
+To use the ability, clone the [GFPGAN repository](https://github.com/TencentARC/GFPGAN) and follow their
+installation instructions. By default, we expect GFPGAN to be installed in a 'gfpgan' sibling directory.
+
+You may also want to install Real-ESRGAN, if you want to enhance non-face regions in the image by installing
+the pip Real-ESRGAN package.
+```
+pip install realesrgan
+
+```
+
+Now, you can run this script by adding the --gfpgan option. Any issues with GFPGAN will be reported on initialization.
+
+When generating prompts, add a -G or --gfpgan_strenth option to control the strength of the GFPGAN enhancement.
+0.0 is no enhancement, 1.0 is maximum enhancement.
+
+So for instance, to apply the maximum strength:
+~~~~
+dream> a man wearing a pineapple hat -G 1
+~~~~
+
+That's it!
+
+There's also a bunch of options to control GFPGAN settings when starting the script for different configs that you can
+read about in the help text. This will let you control where GFPGAN is installed, if upsampling is enapled, the upsampler to use and the model path.
+
 ## Barebones Web Server
 
 As of version 1.10, this distribution comes with a bare bones web server (see screenshot). To use it,
diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py
index 0ec3d60d98..ab40330e43 100644
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@@ -132,7 +132,8 @@ The vast majority of these arguments default to reasonable values.
                  strength=0.75, # default in scripts/img2img.py
                  embedding_path=None,
                  latent_diffusion_weights=False,  # just to keep track of this parameter when regenerating prompt
-                 device='cuda'
+                 device='cuda',
+                 gfpgan=None,
     ):
         self.batch_size      = batch_size
         self.iterations = iterations
@@ -154,6 +155,7 @@ The vast majority of these arguments default to reasonable values.
         self.sampler    = None
         self.latent_diffusion_weights=latent_diffusion_weights
         self.device = device
+        self.gfpgan = gfpgan
         if seed is None:
             self.seed = self._new_seed()
         else:
@@ -199,6 +201,7 @@ The vast majority of these arguments default to reasonable values.
                      # these are specific to img2img
                      init_img=None,
                      strength=None,
+                     gfpgan_strength=None,
                      variants=None,
                      **args):   # eat up additional cruft
         '''
@@ -214,6 +217,7 @@ The vast majority of these arguments default to reasonable values.
            cfg_scale                       // how strongly the prompt influences the image (7.5) (must be >1)
            init_img                        // path to an initial image - its dimensions override width and height
            strength                        // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
+           gfpgan_strength                 // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
            ddim_eta                        // image randomness (eta=0.0 means the same seed always produces the same image)
            variants                        // if >0, the 1st generated image will be passed back to img2img to generate the requested number of variants
            callback                        // a function or method that will be called each time an image is generated
@@ -260,7 +264,8 @@ The vast majority of these arguments default to reasonable values.
                                     batch_size=batch_size,iterations=iterations,
                                     steps=steps,seed=seed,cfg_scale=cfg_scale,ddim_eta=ddim_eta,
                                     skip_normalize=skip_normalize,
-                                    init_img=init_img,strength=strength,variants=variants,
+                                    init_img=init_img,strength=strength,
+                                    gfpgan_strength=gfpgan_strength,variants=variants,
                                     callback=image_callback)
         else:
             results = self._txt2img(prompt,
@@ -268,6 +273,7 @@ The vast majority of these arguments default to reasonable values.
                                     batch_size=batch_size,iterations=iterations,
                                     steps=steps,seed=seed,cfg_scale=cfg_scale,ddim_eta=ddim_eta,
                                     skip_normalize=skip_normalize,
+                                    gfpgan_strength=gfpgan_strength,
                                     width=width,height=height,
                                     callback=image_callback)
         toc  = time.time()
@@ -280,6 +286,7 @@ The vast majority of these arguments default to reasonable values.
                  batch_size,iterations,
                  steps,seed,cfg_scale,ddim_eta,
                  skip_normalize,
+                 gfpgan_strength,
                  width,height,
                  callback):    # the callback is called each time a new Image is generated
         """
@@ -335,6 +342,8 @@ The vast majority of these arguments default to reasonable values.
                         for x_sample in x_samples_ddim:
                             x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
                             image = Image.fromarray(x_sample.astype(np.uint8))
+                            if gfpgan_strength > 0:
+                                image = self._run_gfpgan(image, gfpgan_strength)
                             images.append([image,seed])
                             if callback is not None:
                                 callback(image,seed)
@@ -354,6 +363,7 @@ The vast majority of these arguments default to reasonable values.
                  batch_size,iterations,
                  steps,seed,cfg_scale,ddim_eta,
                  skip_normalize,
+                 gfpgan_strength,
                  init_img,strength,variants,
                  callback):
         """
@@ -419,6 +429,8 @@ The vast majority of these arguments default to reasonable values.
                         for x_sample in x_samples:
                             x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
                             image = Image.fromarray(x_sample.astype(np.uint8))
+                            if gfpgan_strength > 0:
+                                image = self._run_gfpgan(image, gfpgan_strength)
                             images.append([image,seed])
                             if callback is not None:
                                 callback(image,seed)
@@ -549,3 +561,18 @@ The vast majority of these arguments default to reasonable values.
                     weights.append(1.0)
                 remaining = 0
         return prompts, weights
+
+    def _run_gfpgan(self, image, strength):
+        if (self.gfpgan is None):
+            print(f"GFPGAN not initialized, it must be loaded via the --gfpgan argument")
+            return image
+        
+        image = image.convert("RGB")
+
+        cropped_faces, restored_faces, restored_img = self.gfpgan.enhance(np.array(image, dtype=np.uint8), has_aligned=False, only_center_face=False, paste_back=True)
+        res = Image.fromarray(restored_img)
+
+        if strength < 1.0:
+            res = Image.blend(image, res, strength)
+
+        return res
diff --git a/scripts/dream.py b/scripts/dream.py
index 24dac5b927..c49340d655 100755
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -6,6 +6,7 @@ import shlex
 import os
 import sys
 import copy
+
 from ldm.dream_util import Completer,PngWriter,PromptFormatter
 
 debugging = False
@@ -68,6 +69,28 @@ def main():
 
     # preload the model
     t2i.load_model()
+
+    # load GFPGAN if requested
+    if opt.use_gfpgan:
+        print("\n* --gfpgan was specified, loading gfpgan...")
+        try:
+            model_path = os.path.join(opt.gfpgan_dir, opt.gfpgan_model_path)
+            if not os.path.isfile(model_path):
+                raise Exception("GFPGAN model not found at path "+model_path)
+
+            sys.path.append(os.path.abspath(opt.gfpgan_dir))
+            from gfpgan import GFPGANer
+
+            bg_upsampler = None
+            if opt.gfpgan_bg_upsampler is not None:
+                bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile)
+
+            t2i.gfpgan = GFPGANer(model_path=model_path, upscale=opt.gfpgan_upscale, arch='clean', channel_multiplier=2, bg_upsampler=bg_upsampler)
+        except Exception:
+            import traceback
+            print("Error loading GFPGAN:", file=sys.stderr)
+            print(traceback.format_exc(), file=sys.stderr)
+
     print("\n* Initialization done! Awaiting your command (-h for help, 'q' to quit, 'cd' to change output dir, 'pwd' to print output dir)...")
 
     log_path   = os.path.join(opt.outdir,'dream_log.txt')
@@ -183,6 +206,32 @@ def main_loop(t2i,outdir,parser,log,infile):
 
     print("goodbye!")
 
+def load_gfpgan_bg_upsampler(bg_upsampler, bg_tile=400):
+    import torch
+
+    if bg_upsampler == 'realesrgan':
+        if not torch.cuda.is_available():  # CPU
+            import warnings
+            warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. '
+                          'If you really want to use it, please modify the corresponding codes.')
+            bg_upsampler = None
+        else:
+            from basicsr.archs.rrdbnet_arch import RRDBNet
+            from realesrgan import RealESRGANer
+            model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
+            bg_upsampler = RealESRGANer(
+                scale=2,
+                model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
+                model=model,
+                tile=bg_tile,
+                tile_pad=10,
+                pre_pad=0,
+                half=True)  # need to set False in CPU mode
+    else:
+        bg_upsampler = None
+
+    return bg_upsampler
+
 # variant generation is going to be superseded by a generalized
 # "prompt-morph" functionality
 # def generate_variants(t2i,outdir,opt,previous_gens):
@@ -261,6 +310,31 @@ def create_argv_parser():
                         type=str,
                         default="cuda",
                         help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if avalible")
+    # GFPGAN related args
+    parser.add_argument('--gfpgan',
+                        dest='use_gfpgan',
+                        action='store_true',
+                        help="load gfpgan for use in the dreambot. Note: Enabling GFPGAN will require more GPU memory")
+    parser.add_argument("--gfpgan_upscale",
+                        type=int,
+                        default=2,
+                        help="The final upsampling scale of the image. Default: 2. Only used if --gfpgan is specified")
+    parser.add_argument("--gfpgan_bg_upsampler",
+                        type=str,
+                        default='realesrgan',
+                        help="Background upsampler. Default: None. Options: realesrgan, none. Only used if --gfpgan is specified")
+    parser.add_argument("--gfpgan_bg_tile",
+                        type=int,
+                        default=400,
+                        help="Tile size for background sampler, 0 for no tile during testing. Default: 400. Only used if --gfpgan is specified")
+    parser.add_argument("--gfpgan_model_path",
+                        type=str,
+                        default='experiments/pretrained_models/GFPGANv1.3.pth',
+                        help="indicates the path to the GFPGAN model, relative to --gfpgan_dir. Only used if --gfpgan is specified")
+    parser.add_argument("--gfpgan_dir",
+                        type=str,
+                        default='../gfpgan',
+                        help="indicates the directory containing the GFPGAN code. Only used if --gfpgan is specified")
     return parser
                         
     
@@ -278,6 +352,7 @@ def create_cmd_parser():
     parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
     parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)")
     parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
+    parser.add_argument('-G','--gfpgan_strength', default=0.5, type=float, help="The strength at which to apply the GFPGAN model to the result, in order to improve faces.")
 # variants is going to be superseded by a generalized "prompt-morph" function
 #    parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
     parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")

From 60ed00432837bc393f205321750788f91f3aa4b0 Mon Sep 17 00:00:00 2001
From: Sean McLellan <sean@baristalabs.io>
Date: Thu, 25 Aug 2022 23:31:08 -0400
Subject: [PATCH 2/4] Update readme, fix defaults for case-sensitive fs's

---
 README.md        | 14 ++++++++++++--
 scripts/dream.py |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 28063203c8..f3cf91877f 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ This script also provides the ability to invoke GFPGAN after image generation. D
 and optionally upscale the image to a higher resolution.
 
 To use the ability, clone the [GFPGAN repository](https://github.com/TencentARC/GFPGAN) and follow their
-installation instructions. By default, we expect GFPGAN to be installed in a 'gfpgan' sibling directory.
+installation instructions. By default, we expect GFPGAN to be installed in a 'GFPGAN' sibling directory.
 
 You may also want to install Real-ESRGAN, if you want to enhance non-face regions in the image by installing
 the pip Real-ESRGAN package.
@@ -97,6 +97,15 @@ pip install realesrgan
 
 Now, you can run this script by adding the --gfpgan option. Any issues with GFPGAN will be reported on initialization.
 
+~~~~
+(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py
+* Initializing, be patient...
+(...more initialization messages...)
+* --gfpgan was specified, loading gfpgan...
+(...even more initialization messages...)
+* Initialization done! Awaiting your command...
+~~~~
+
 When generating prompts, add a -G or --gfpgan_strenth option to control the strength of the GFPGAN enhancement.
 0.0 is no enhancement, 1.0 is maximum enhancement.
 
@@ -115,7 +124,8 @@ That's it!
 There's also a bunch of options to control GFPGAN settings when starting the script for different configs that you can
 read about in the help text. This will let you control where GFPGAN is installed, if upsampling is enapled, the upsampler to use and the model path.
 
-Note that loading GFPGAN consumes additional GPU memory, additionaly, a couple of seconds will be tacked on when generating your images.
+Note that loading GFPGAN consumes additional GPU memory, but hey, 3090s with 24Gi of VRAM are cheap now *cough*.
+Additionally, a couple of seconds will be tacked on when generating your images, but hey, it's worth it.
 
 ## Barebones Web Server
 
diff --git a/scripts/dream.py b/scripts/dream.py
index 66980bf1f1..0e888bf99c 100755
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -333,7 +333,7 @@ def create_argv_parser():
                         help="indicates the path to the GFPGAN model, relative to --gfpgan_dir. Only used if --gfpgan is specified")
     parser.add_argument("--gfpgan_dir",
                         type=str,
-                        default='../gfpgan',
+                        default='../GFPGAN',
                         help="indicates the directory containing the GFPGAN code. Only used if --gfpgan is specified")
     return parser
                         

From 3a30a8f2d246c0f7934064233de271b00adb3477 Mon Sep 17 00:00:00 2001
From: Sean McLellan <sean@baristalabs.io>
Date: Thu, 25 Aug 2022 23:39:03 -0400
Subject: [PATCH 3/4] Fix not being able to disable bgupscaler; update readme

---
 README.md        | 2 +-
 scripts/dream.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f3cf91877f..179be569b2 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ and optionally upscale the image to a higher resolution.
 To use the ability, clone the [GFPGAN repository](https://github.com/TencentARC/GFPGAN) and follow their
 installation instructions. By default, we expect GFPGAN to be installed in a 'GFPGAN' sibling directory.
 
-You may also want to install Real-ESRGAN, if you want to enhance non-face regions in the image by installing
+You may also want to install Real-ESRGAN, if you want to enhance non-face regions in the image, by installing
 the pip Real-ESRGAN package.
 ```
 pip install realesrgan
diff --git a/scripts/dream.py b/scripts/dream.py
index 0e888bf99c..e2825f8142 100755
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -82,7 +82,7 @@ def main():
             from gfpgan import GFPGANer
 
             bg_upsampler = None
-            if opt.gfpgan_bg_upsampler is not None:
+            if opt.gfpgan_bg_upsampler == 'realesrgan':
                 bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile)
 
             t2i.gfpgan = GFPGANer(model_path=model_path, upscale=opt.gfpgan_upscale, arch='clean', channel_multiplier=2, bg_upsampler=bg_upsampler)

From cb86b9ae6e849cbe4609a62eaf686388eda9b2df Mon Sep 17 00:00:00 2001
From: Sean McLellan <sean@baristalabs.io>
Date: Thu, 25 Aug 2022 23:48:35 -0400
Subject: [PATCH 4/4] Remove the redundancy, better logging

---
 ldm/simplet2i.py | 4 ++--
 scripts/dream.py | 4 +---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py
index f0713081c8..1cc199c119 100644
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@@ -399,8 +399,8 @@ The vast majority of these arguments default to reasonable values.
             try:
                 if gfpgan_strength > 0:
                     image = self._run_gfpgan(image, gfpgan_strength)
-            except Exception:
-                print(f"Error running GFPGAN - Your image was not enhanced.")
+            except Exception as e:
+                print(f"Error running GFPGAN - Your image was not enhanced.\n{e}")
             images.append(image)
         return images
 
diff --git a/scripts/dream.py b/scripts/dream.py
index e2825f8142..4def627519 100755
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -81,9 +81,7 @@ def main():
             sys.path.append(os.path.abspath(opt.gfpgan_dir))
             from gfpgan import GFPGANer
 
-            bg_upsampler = None
-            if opt.gfpgan_bg_upsampler == 'realesrgan':
-                bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile)
+            bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile)
 
             t2i.gfpgan = GFPGANer(model_path=model_path, upscale=opt.gfpgan_upscale, arch='clean', channel_multiplier=2, bg_upsampler=bg_upsampler)
         except Exception: