Add optional GFPGAN support

This commit is contained in:
Sean McLellan 2022-08-25 22:57:30 -04:00
parent 0eba55ddbc
commit 5711b6d611
3 changed files with 134 additions and 2 deletions

View File

@ -80,6 +80,36 @@ You may also pass a -v<count> option to generate count variants on the original
passing the first generated image back into img2img the requested number of times. It generates interesting
variants.
## GFPGAN Support
This script also provides the ability to invoke GFPGAN after image generation. Doing so will enhance faces
and optionally upscale the image to a higher resolution.
To use the ability, clone the [GFPGAN repository](https://github.com/TencentARC/GFPGAN) and follow their
installation instructions. By default, we expect GFPGAN to be installed in a 'gfpgan' sibling directory.
You may also want to install Real-ESRGAN, if you want to enhance non-face regions in the image by installing
the pip Real-ESRGAN package.
```
pip install realesrgan
```
Now, you can run this script by adding the --gfpgan option. Any issues with GFPGAN will be reported on initialization.
When generating prompts, add a -G or --gfpgan_strenth option to control the strength of the GFPGAN enhancement.
0.0 is no enhancement, 1.0 is maximum enhancement.
So for instance, to apply the maximum strength:
~~~~
dream> a man wearing a pineapple hat -G 1
~~~~
That's it!
There's also a bunch of options to control GFPGAN settings when starting the script for different configs that you can
read about in the help text. This will let you control where GFPGAN is installed, if upsampling is enapled, the upsampler to use and the model path.
## Barebones Web Server
As of version 1.10, this distribution comes with a bare bones web server (see screenshot). To use it,

View File

@ -132,7 +132,8 @@ The vast majority of these arguments default to reasonable values.
strength=0.75, # default in scripts/img2img.py
embedding_path=None,
latent_diffusion_weights=False, # just to keep track of this parameter when regenerating prompt
device='cuda'
device='cuda',
gfpgan=None,
):
self.batch_size = batch_size
self.iterations = iterations
@ -154,6 +155,7 @@ The vast majority of these arguments default to reasonable values.
self.sampler = None
self.latent_diffusion_weights=latent_diffusion_weights
self.device = device
self.gfpgan = gfpgan
if seed is None:
self.seed = self._new_seed()
else:
@ -199,6 +201,7 @@ The vast majority of these arguments default to reasonable values.
# these are specific to img2img
init_img=None,
strength=None,
gfpgan_strength=None,
variants=None,
**args): # eat up additional cruft
'''
@ -214,6 +217,7 @@ The vast majority of these arguments default to reasonable values.
cfg_scale // how strongly the prompt influences the image (7.5) (must be >1)
init_img // path to an initial image - its dimensions override width and height
strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image)
variants // if >0, the 1st generated image will be passed back to img2img to generate the requested number of variants
callback // a function or method that will be called each time an image is generated
@ -260,7 +264,8 @@ The vast majority of these arguments default to reasonable values.
batch_size=batch_size,iterations=iterations,
steps=steps,seed=seed,cfg_scale=cfg_scale,ddim_eta=ddim_eta,
skip_normalize=skip_normalize,
init_img=init_img,strength=strength,variants=variants,
init_img=init_img,strength=strength,
gfpgan_strength=gfpgan_strength,variants=variants,
callback=image_callback)
else:
results = self._txt2img(prompt,
@ -268,6 +273,7 @@ The vast majority of these arguments default to reasonable values.
batch_size=batch_size,iterations=iterations,
steps=steps,seed=seed,cfg_scale=cfg_scale,ddim_eta=ddim_eta,
skip_normalize=skip_normalize,
gfpgan_strength=gfpgan_strength,
width=width,height=height,
callback=image_callback)
toc = time.time()
@ -280,6 +286,7 @@ The vast majority of these arguments default to reasonable values.
batch_size,iterations,
steps,seed,cfg_scale,ddim_eta,
skip_normalize,
gfpgan_strength,
width,height,
callback): # the callback is called each time a new Image is generated
"""
@ -335,6 +342,8 @@ The vast majority of these arguments default to reasonable values.
for x_sample in x_samples_ddim:
x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
image = Image.fromarray(x_sample.astype(np.uint8))
if gfpgan_strength > 0:
image = self._run_gfpgan(image, gfpgan_strength)
images.append([image,seed])
if callback is not None:
callback(image,seed)
@ -354,6 +363,7 @@ The vast majority of these arguments default to reasonable values.
batch_size,iterations,
steps,seed,cfg_scale,ddim_eta,
skip_normalize,
gfpgan_strength,
init_img,strength,variants,
callback):
"""
@ -419,6 +429,8 @@ The vast majority of these arguments default to reasonable values.
for x_sample in x_samples:
x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c')
image = Image.fromarray(x_sample.astype(np.uint8))
if gfpgan_strength > 0:
image = self._run_gfpgan(image, gfpgan_strength)
images.append([image,seed])
if callback is not None:
callback(image,seed)
@ -549,3 +561,18 @@ The vast majority of these arguments default to reasonable values.
weights.append(1.0)
remaining = 0
return prompts, weights
def _run_gfpgan(self, image, strength):
if (self.gfpgan is None):
print(f"GFPGAN not initialized, it must be loaded via the --gfpgan argument")
return image
image = image.convert("RGB")
cropped_faces, restored_faces, restored_img = self.gfpgan.enhance(np.array(image, dtype=np.uint8), has_aligned=False, only_center_face=False, paste_back=True)
res = Image.fromarray(restored_img)
if strength < 1.0:
res = Image.blend(image, res, strength)
return res

View File

@ -6,6 +6,7 @@ import shlex
import os
import sys
import copy
from ldm.dream_util import Completer,PngWriter,PromptFormatter
debugging = False
@ -68,6 +69,28 @@ def main():
# preload the model
t2i.load_model()
# load GFPGAN if requested
if opt.use_gfpgan:
print("\n* --gfpgan was specified, loading gfpgan...")
try:
model_path = os.path.join(opt.gfpgan_dir, opt.gfpgan_model_path)
if not os.path.isfile(model_path):
raise Exception("GFPGAN model not found at path "+model_path)
sys.path.append(os.path.abspath(opt.gfpgan_dir))
from gfpgan import GFPGANer
bg_upsampler = None
if opt.gfpgan_bg_upsampler is not None:
bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile)
t2i.gfpgan = GFPGANer(model_path=model_path, upscale=opt.gfpgan_upscale, arch='clean', channel_multiplier=2, bg_upsampler=bg_upsampler)
except Exception:
import traceback
print("Error loading GFPGAN:", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
print("\n* Initialization done! Awaiting your command (-h for help, 'q' to quit, 'cd' to change output dir, 'pwd' to print output dir)...")
log_path = os.path.join(opt.outdir,'dream_log.txt')
@ -183,6 +206,32 @@ def main_loop(t2i,outdir,parser,log,infile):
print("goodbye!")
def load_gfpgan_bg_upsampler(bg_upsampler, bg_tile=400):
import torch
if bg_upsampler == 'realesrgan':
if not torch.cuda.is_available(): # CPU
import warnings
warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. '
'If you really want to use it, please modify the corresponding codes.')
bg_upsampler = None
else:
from basicsr.archs.rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
bg_upsampler = RealESRGANer(
scale=2,
model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
model=model,
tile=bg_tile,
tile_pad=10,
pre_pad=0,
half=True) # need to set False in CPU mode
else:
bg_upsampler = None
return bg_upsampler
# variant generation is going to be superseded by a generalized
# "prompt-morph" functionality
# def generate_variants(t2i,outdir,opt,previous_gens):
@ -261,6 +310,31 @@ def create_argv_parser():
type=str,
default="cuda",
help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if avalible")
# GFPGAN related args
parser.add_argument('--gfpgan',
dest='use_gfpgan',
action='store_true',
help="load gfpgan for use in the dreambot. Note: Enabling GFPGAN will require more GPU memory")
parser.add_argument("--gfpgan_upscale",
type=int,
default=2,
help="The final upsampling scale of the image. Default: 2. Only used if --gfpgan is specified")
parser.add_argument("--gfpgan_bg_upsampler",
type=str,
default='realesrgan',
help="Background upsampler. Default: None. Options: realesrgan, none. Only used if --gfpgan is specified")
parser.add_argument("--gfpgan_bg_tile",
type=int,
default=400,
help="Tile size for background sampler, 0 for no tile during testing. Default: 400. Only used if --gfpgan is specified")
parser.add_argument("--gfpgan_model_path",
type=str,
default='experiments/pretrained_models/GFPGANv1.3.pth',
help="indicates the path to the GFPGAN model, relative to --gfpgan_dir. Only used if --gfpgan is specified")
parser.add_argument("--gfpgan_dir",
type=str,
default='../gfpgan',
help="indicates the directory containing the GFPGAN code. Only used if --gfpgan is specified")
return parser
@ -278,6 +352,7 @@ def create_cmd_parser():
parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)")
parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
parser.add_argument('-G','--gfpgan_strength', default=0.5, type=float, help="The strength at which to apply the GFPGAN model to the result, in order to improve faces.")
# variants is going to be superseded by a generalized "prompt-morph" function
# parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")