diff --git a/README.md b/README.md index 17215f9ea0..6726a7eca4 100644 --- a/README.md +++ b/README.md @@ -80,10 +80,69 @@ You may also pass a -v option to generate count variants on the original passing the first generated image back into img2img the requested number of times. It generates interesting variants. +## GFPGAN Support + +This script also provides the ability to invoke GFPGAN after image +generation. Doing so will enhance faces and optionally upscale the +image to a higher resolution. + +To use the ability, clone the [GFPGAN +repository](https://github.com/TencentARC/GFPGAN) and follow their +installation instructions. By default, we expect GFPGAN to be +installed in a 'GFPGAN' sibling directory. Be sure that the "ldm" +conda environment is active as you install GFPGAN. + +You may also want to install Real-ESRGAN, if you want to enhance +non-face regions in the image, by installing the pip Real-ESRGAN +package. + +``` +pip install realesrgan + +``` + +Now, you can run this script by adding the --gfpgan option. Any issues +with GFPGAN will be reported on initialization. + +~~~~ +(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --gfpgan +* Initializing, be patient... +(...more initialization messages...) +* --gfpgan was specified, loading gfpgan... +(...even more initialization messages...) +* Initialization done! Awaiting your command... +~~~~ + +When generating prompts, add a -G or --gfpgan_strenth option to +control the strength of the GFPGAN enhancement. 0.0 is no +enhancement, 1.0 is maximum enhancement. + +So for instance, to apply the maximum strength: +~~~~ +dream> a man wearing a pineapple hat -G 1 +~~~~ + +This also works with img2img: +~~~ +dream> a man wearing a pineapple hat -I path/to/your/file.png -G 1 +~~~ + +That's it! + +There's also a bunch of options to control GFPGAN settings when +starting the script for different configs that you can read about in +the help text. This will let you control where GFPGAN is installed, if +upsampling is enapled, the upsampler to use and the model path. + +Note that loading GFPGAN consumes additional GPU memory, but hey, +3090s with 24Gi of VRAM are cheap now *cough*. Additionally, a couple +of seconds will be tacked on when generating your images, but hey, +it's worth it. + ## Barebones Web Server -As of version 1.10, this distribution comes with a bare bones web server (see screenshot). To use it, -run the command: +As of version 1.10, this distribution comes with a bare bones web +server (see screenshot). To use it, run the command: ~~~~ (ldm) ~/stable-diffusion$ python3 scripts/dream_web.py diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 80a0194957..0de3a33237 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -132,7 +132,8 @@ The vast majority of these arguments default to reasonable values. strength=0.75, # default in scripts/img2img.py embedding_path=None, latent_diffusion_weights=False, # just to keep track of this parameter when regenerating prompt - device='cuda' + device='cuda', + gfpgan=None, ): self.batch_size = batch_size self.iterations = iterations @@ -154,6 +155,7 @@ The vast majority of these arguments default to reasonable values. self.sampler = None self.latent_diffusion_weights=latent_diffusion_weights self.device = device + self.gfpgan = gfpgan if seed is None: self.seed = self._new_seed() else: @@ -199,6 +201,7 @@ The vast majority of these arguments default to reasonable values. # these are specific to img2img init_img=None, strength=None, + gfpgan_strength=None, variants=None, **args): # eat up additional cruft ''' @@ -214,6 +217,7 @@ The vast majority of these arguments default to reasonable values. cfg_scale // how strongly the prompt influences the image (7.5) (must be >1) init_img // path to an initial image - its dimensions override width and height strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely + gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image) variants // if >0, the 1st generated image will be passed back to img2img to generate the requested number of variants image_callback // a function or method that will be called each time an image is generated @@ -262,6 +266,7 @@ The vast majority of these arguments default to reasonable values. batch_size=batch_size, steps=steps,cfg_scale=cfg_scale,ddim_eta=ddim_eta, skip_normalize=skip_normalize, + gfpgan_strength=gfpgan_strength, init_img=init_img,strength=strength) else: images_iterator = self._txt2img(prompt, @@ -269,6 +274,7 @@ The vast majority of these arguments default to reasonable values. batch_size=batch_size, steps=steps,cfg_scale=cfg_scale,ddim_eta=ddim_eta, skip_normalize=skip_normalize, + gfpgan_strength=gfpgan_strength, width=width,height=height) with scope(self.device.type), self.model.ema_scope(): @@ -299,11 +305,13 @@ The vast majority of these arguments default to reasonable values. batch_size, steps,cfg_scale,ddim_eta, skip_normalize, + gfpgan_strength, width,height): """ An infinite iterator of images from the prompt. """ + sampler = self.sampler while True: @@ -317,7 +325,7 @@ The vast majority of these arguments default to reasonable values. unconditional_guidance_scale=cfg_scale, unconditional_conditioning=uc, eta=ddim_eta) - yield self._samples_to_images(samples) + yield self._samples_to_images(samples, gfpgan_strength=gfpgan_strength) @torch.no_grad() def _img2img(self, @@ -326,6 +334,7 @@ The vast majority of these arguments default to reasonable values. batch_size, steps,cfg_scale,ddim_eta, skip_normalize, + gfpgan_strength, init_img,strength): """ An infinite iterator of images from the prompt and the initial image @@ -356,7 +365,7 @@ The vast majority of these arguments default to reasonable values. # decode it samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale, unconditional_conditioning=uc,) - yield self._samples_to_images(samples) + yield self._samples_to_images(samples, gfpgan_strength) # TODO: does this actually need to run every loop? does anything in it vary by random seed? def _get_uc_and_c(self, prompt, batch_size, skip_normalize): @@ -380,13 +389,18 @@ The vast majority of these arguments default to reasonable values. c = self.model.get_learned_conditioning(batch_size * [prompt]) return (uc, c) - def _samples_to_images(self, samples): + def _samples_to_images(self, samples, gfpgan_strength=0): x_samples = self.model.decode_first_stage(samples) x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) images = list() for x_sample in x_samples: x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') image = Image.fromarray(x_sample.astype(np.uint8)) + try: + if gfpgan_strength > 0: + image = self._run_gfpgan(image, gfpgan_strength) + except Exception as e: + print(f"Error running GFPGAN - Your image was not enhanced.\n{e}") images.append(image) return images @@ -507,3 +521,18 @@ The vast majority of these arguments default to reasonable values. weights.append(1.0) remaining = 0 return prompts, weights + + def _run_gfpgan(self, image, strength): + if (self.gfpgan is None): + print(f"GFPGAN not initialized, it must be loaded via the --gfpgan argument") + return image + + image = image.convert("RGB") + + cropped_faces, restored_faces, restored_img = self.gfpgan.enhance(np.array(image, dtype=np.uint8), has_aligned=False, only_center_face=False, paste_back=True) + res = Image.fromarray(restored_img) + + if strength < 1.0: + res = Image.blend(image, res, strength) + + return res diff --git a/scripts/dream.py b/scripts/dream.py index b9090e79f4..1aaae59cb0 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -6,8 +6,10 @@ import shlex import os import sys import copy +import warnings import ldm.dream.readline from ldm.dream.pngwriter import PngWriter,PromptFormatter + debugging = False def main(): @@ -68,6 +70,29 @@ def main(): # preload the model t2i.load_model() + + # load GFPGAN if requested + if opt.use_gfpgan: + print("\n* --gfpgan was specified, loading gfpgan...") + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + + try: + model_path = os.path.join(opt.gfpgan_dir, opt.gfpgan_model_path) + if not os.path.isfile(model_path): + raise Exception("GFPGAN model not found at path "+model_path) + + sys.path.append(os.path.abspath(opt.gfpgan_dir)) + from gfpgan import GFPGANer + + bg_upsampler = load_gfpgan_bg_upsampler(opt.gfpgan_bg_upsampler, opt.gfpgan_bg_tile) + + t2i.gfpgan = GFPGANer(model_path=model_path, upscale=opt.gfpgan_upscale, arch='clean', channel_multiplier=2, bg_upsampler=bg_upsampler) + except Exception: + import traceback + print("Error loading GFPGAN:", file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) + print("\n* Initialization done! Awaiting your command (-h for help, 'q' to quit, 'cd' to change output dir, 'pwd' to print output dir)...") log_path = os.path.join(opt.outdir,'dream_log.txt') @@ -183,6 +208,32 @@ def main_loop(t2i,outdir,parser,log,infile): print("goodbye!") +def load_gfpgan_bg_upsampler(bg_upsampler, bg_tile=400): + import torch + + if bg_upsampler == 'realesrgan': + if not torch.cuda.is_available(): # CPU + import warnings + warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. ' + 'If you really want to use it, please modify the corresponding codes.') + bg_upsampler = None + else: + from basicsr.archs.rrdbnet_arch import RRDBNet + from realesrgan import RealESRGANer + model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2) + bg_upsampler = RealESRGANer( + scale=2, + model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth', + model=model, + tile=bg_tile, + tile_pad=10, + pre_pad=0, + half=True) # need to set False in CPU mode + else: + bg_upsampler = None + + return bg_upsampler + # variant generation is going to be superseded by a generalized # "prompt-morph" functionality # def generate_variants(t2i,outdir,opt,previous_gens): @@ -261,6 +312,31 @@ def create_argv_parser(): type=str, default="cuda", help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if avalible") + # GFPGAN related args + parser.add_argument('--gfpgan', + dest='use_gfpgan', + action='store_true', + help="load gfpgan for use in the dreambot. Note: Enabling GFPGAN will require more GPU memory") + parser.add_argument("--gfpgan_upscale", + type=int, + default=2, + help="The final upsampling scale of the image. Default: 2. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_bg_upsampler", + type=str, + default='realesrgan', + help="Background upsampler. Default: None. Options: realesrgan, none. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_bg_tile", + type=int, + default=400, + help="Tile size for background sampler, 0 for no tile during testing. Default: 400. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_model_path", + type=str, + default='experiments/pretrained_models/GFPGANv1.3.pth', + help="indicates the path to the GFPGAN model, relative to --gfpgan_dir. Only used if --gfpgan is specified") + parser.add_argument("--gfpgan_dir", + type=str, + default='../GFPGAN', + help="indicates the directory containing the GFPGAN code. Only used if --gfpgan is specified") return parser @@ -278,6 +354,7 @@ def create_cmd_parser(): parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)") parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)") parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely") + parser.add_argument('-G','--gfpgan_strength', default=0.5, type=float, help="The strength at which to apply the GFPGAN model to the result, in order to improve faces.") # variants is going to be superseded by a generalized "prompt-morph" function # parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants") parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization") diff --git a/scripts/preload_models.py b/scripts/preload_models.py index 19138c287b..d7538c82b8 100755 --- a/scripts/preload_models.py +++ b/scripts/preload_models.py @@ -5,6 +5,8 @@ # two machines must share a common .cache directory. import sys import transformers +import os +import warnings transformers.logging.set_verbosity_error() @@ -15,15 +17,15 @@ tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") print("...success") # this will download requirements for Kornia -print("preloading Kornia requirements (ignore the warnings)...") -import kornia +print("preloading Kornia requirements (ignore the deprecation warnings)...") +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import kornia print("...success") -# doesn't work - probably wrong logger -# logging.getLogger('transformers.tokenization_utils').setLevel(logging.ERROR) version='openai/clip-vit-large-patch14' -print('preloading CLIP model (Ignore the warnings)...') +print('preloading CLIP model (Ignore the deprecation warnings)...') sys.stdout.flush() import clip from transformers import CLIPTokenizer, CLIPTextModel @@ -31,4 +33,28 @@ tokenizer =CLIPTokenizer.from_pretrained(version) transformer=CLIPTextModel.from_pretrained(version) print('\n\n...success') +# In the event that the user has installed GFPGAN and also elected to use +# RealESRGAN, this will attempt to download the model needed by RealESRGANer +gfpgan = False +try: + from realesrgan import RealESRGANer + gfpgan = True +except ModuleNotFoundError: + pass + +if gfpgan: + print("Loading models from RealESRGAN and facexlib") + try: + from basicsr.archs.rrdbnet_arch import RRDBNet + from facexlib.utils.face_restoration_helper import FaceRestoreHelper + RealESRGANer(scale=2, + model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth', + model=RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)) + FaceRestoreHelper(1,det_model='retinaface_resnet50') + print("...success") + except Exception: + import traceback + print("Error loading GFPGAN:") + print(traceback.format_exc()) +