diff --git a/README.md b/README.md index 6726a7eca4..8d108733ea 100644 --- a/README.md +++ b/README.md @@ -101,8 +101,15 @@ pip install realesrgan ``` -Now, you can run this script by adding the --gfpgan option. Any issues -with GFPGAN will be reported on initialization. +Users whose GPU machines are isolated from the Internet (e.g. on a +University cluster) should be aware that the first time you run +dream.py with GFPGAN turned on, it will try to download model files +from the Internet. To rectify this, you may run `python3 +scripts/preload_models.pl` after you have installed GFPGAN and all its +dependencies. + +Now, you can run this script by adding the **--gfpgan** option. Any +issues with GFPGAN will be reported on initialization. ~~~~ (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --gfpgan @@ -132,12 +139,18 @@ That's it! There's also a bunch of options to control GFPGAN settings when starting the script for different configs that you can read about in the help text. This will let you control where GFPGAN is installed, if -upsampling is enapled, the upsampler to use and the model path. +upsampling is enabled, the upsampler to use and the model path. -Note that loading GFPGAN consumes additional GPU memory, but hey, -3090s with 24Gi of VRAM are cheap now *cough*. Additionally, a couple -of seconds will be tacked on when generating your images, but hey, -it's worth it. +By default, images will be upscaled by 2-fold, meaning that the old +Stable Diffusion default size of 512x512 will now be a glorious +detailed 1024x1024. The extent of upscaling is set when you run the +script, and can't be changed while it's running. However, at any time +you may specify **-G0** to turn off upscaling and facial enhancement +for that image or set of images. + +Note that loading GFPGAN consumes additional GPU memory, and will add +a few seconds to image generation. However, if can afford a 3090s with +24Gi, the results are well worth it. ## Barebones Web Server @@ -175,6 +188,34 @@ Then pass this file's name to dream.py when you invoke it: (ldm) ~/stable-diffusion$ python3 scripts/dream.py --from_file="path/to/prompts.txt" ~~~~ +## Shortcut for reusing seeds from the previous command + +Since it is so common to reuse seeds while refining a prompt, there is +now a shortcut as of version 1.11. Provide a **-S** (or **--seed**) +switch of -1 to use the seed of the most recent image generated. If +you produced multiple images with the **-n** switch, then you can go +back further using -2, -3, etc. up to the first image generated by the +previous command. Sorry, but you can't go back further than one +command. + +Here's an example of using this to do a quick refinement. It also +illustrates using the new **-G** switch to turn on upscaling and +face enhancement (see previous section): + +~~~~ +dream> a cute child playing hopscotch -G0.5 +[...] +outputs/img-samples/000039.3498014304.png: "a cute child playing hopscotch" -s50 -b1 -W512 -H512 -C7.5 -mk_lms -S3498014304 + +# I wonder what it will look like if I bump up the steps and set facial enhancement to full strength? +dream> a cute child playing hopscotch -G1.0 -s100 -S -1 +reusing previous seed 3498014304 +[...] +outputs/img-samples/000040.3498014304.png: "a cute child playing hopscotch" -G1.0 -s100 -b1 -W512 -H512 -C7.5 -mk_lms -S3498014304 +~~~~ + + + ## Weighted Prompts You may weight different sections of the prompt to tell the sampler to attach different levels of @@ -257,7 +298,11 @@ repository and associated paper for details and limitations. ## Changes - * v1.11 (pending) + * v1.11 (26 August 2022) + * NEW FEATURE: Support upscaling and face enhancement using the GFPGAN module. (kudos to [Oceanswave](https://github.com/Oceanswave) + * You now can specify a seed of -1 to use the previous image's seed, -2 to use the seed for the image generated before that, etc. + Seed memory only extends back to the previous command, but will work on all images generated with the -n# switch. + * Variant generation support temporarily disabled pending more general solution. * Created a feature branch named **yunsaki-morphing-dream** which adds experimental support for iteratively modifying the prompt and its parameters. Please see[ Pull Request #86](https://github.com/lstein/stable-diffusion/pull/86) for a synopsis of how this works. Note that when this feature is eventually added to the main branch, it will may be modified @@ -268,7 +313,8 @@ repository and associated paper for details and limitations. * v1.09 (24 August 2022) * A new -v option allows you to generate multiple variants of an initial image - in img2img mode. (kudos to [Oceanswave](https://github.com/Oceanswave). [See this discussion in the PR for examples and details on use](https://github.com/lstein/stable-diffusion/pull/71#issuecomment-1226700810)) + in img2img mode. (kudos to [Oceanswave](https://github.com/Oceanswave). [ + See this discussion in the PR for examples and details on use](https://github.com/lstein/stable-diffusion/pull/71#issuecomment-1226700810)) * Added ability to personalize text to image generation (kudos to [Oceanswave](https://github.com/Oceanswave) and [nicolai256](https://github.com/nicolai256)) * Enabled all of the samplers from k_diffusion diff --git a/ldm/dream/pngwriter.py b/ldm/dream/pngwriter.py index aca7b47c21..ecbbbd4ff7 100644 --- a/ldm/dream/pngwriter.py +++ b/ldm/dream/pngwriter.py @@ -109,6 +109,8 @@ class PromptFormatter(): switches.append(f'-I{opt.init_img}') if opt.strength and opt.init_img is not None: switches.append(f'-f{opt.strength or t2i.strength}') + if opt.gfpgan_strength: + switches.append(f'-G{opt.gfpgan_strength}') if t2i.full_precision: switches.append('-F') return ' '.join(switches) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 0de3a33237..f1f88bba5e 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -266,7 +266,6 @@ The vast majority of these arguments default to reasonable values. batch_size=batch_size, steps=steps,cfg_scale=cfg_scale,ddim_eta=ddim_eta, skip_normalize=skip_normalize, - gfpgan_strength=gfpgan_strength, init_img=init_img,strength=strength) else: images_iterator = self._txt2img(prompt, @@ -274,7 +273,6 @@ The vast majority of these arguments default to reasonable values. batch_size=batch_size, steps=steps,cfg_scale=cfg_scale,ddim_eta=ddim_eta, skip_normalize=skip_normalize, - gfpgan_strength=gfpgan_strength, width=width,height=height) with scope(self.device.type), self.model.ema_scope(): @@ -282,6 +280,11 @@ The vast majority of these arguments default to reasonable values. seed_everything(seed) iter_images = next(images_iterator) for image in iter_images: + try: + if gfpgan_strength > 0: + image = self._run_gfpgan(image, gfpgan_strength) + except Exception as e: + print(f"Error running GFPGAN - Your image was not enhanced.\n{e}") results.append([image, seed]) if image_callback is not None: image_callback(image,seed) @@ -305,7 +308,6 @@ The vast majority of these arguments default to reasonable values. batch_size, steps,cfg_scale,ddim_eta, skip_normalize, - gfpgan_strength, width,height): """ An infinite iterator of images from the prompt. @@ -325,7 +327,7 @@ The vast majority of these arguments default to reasonable values. unconditional_guidance_scale=cfg_scale, unconditional_conditioning=uc, eta=ddim_eta) - yield self._samples_to_images(samples, gfpgan_strength=gfpgan_strength) + yield self._samples_to_images(samples) @torch.no_grad() def _img2img(self, @@ -334,7 +336,6 @@ The vast majority of these arguments default to reasonable values. batch_size, steps,cfg_scale,ddim_eta, skip_normalize, - gfpgan_strength, init_img,strength): """ An infinite iterator of images from the prompt and the initial image @@ -365,7 +366,7 @@ The vast majority of these arguments default to reasonable values. # decode it samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale, unconditional_conditioning=uc,) - yield self._samples_to_images(samples, gfpgan_strength) + yield self._samples_to_images(samples) # TODO: does this actually need to run every loop? does anything in it vary by random seed? def _get_uc_and_c(self, prompt, batch_size, skip_normalize): @@ -389,18 +390,13 @@ The vast majority of these arguments default to reasonable values. c = self.model.get_learned_conditioning(batch_size * [prompt]) return (uc, c) - def _samples_to_images(self, samples, gfpgan_strength=0): + def _samples_to_images(self, samples): x_samples = self.model.decode_first_stage(samples) x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) images = list() for x_sample in x_samples: x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') image = Image.fromarray(x_sample.astype(np.uint8)) - try: - if gfpgan_strength > 0: - image = self._run_gfpgan(image, gfpgan_strength) - except Exception as e: - print(f"Error running GFPGAN - Your image was not enhanced.\n{e}") images.append(image) return images @@ -533,6 +529,9 @@ The vast majority of these arguments default to reasonable values. res = Image.fromarray(restored_img) if strength < 1.0: + # Resize the image to the new image if the sizes have changed + if restored_img.size != image.size: + image = image.resize(res.size) res = Image.blend(image, res, strength) return res diff --git a/scripts/dream.py b/scripts/dream.py index 1aaae59cb0..6a17656593 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -106,7 +106,8 @@ def main(): def main_loop(t2i,outdir,parser,log,infile): ''' prompt/read/execute loop ''' - done = False + done = False + last_seeds = [] while not done: try: @@ -176,7 +177,14 @@ def main_loop(t2i,outdir,parser,log,infile): if len(opt.prompt)==0: print("Try again with a prompt!") continue - + if opt.seed is not None and opt.seed<0: # retrieve previous value! + try: + opt.seed = last_seeds[opt.seed] + print(f"reusing previous seed {opt.seed}") + except IndexError: + print(f"No previous seed at position {opt.seed} found") + opt.seed = None + normalized_prompt = PromptFormatter(t2i,opt).normalize_prompt() individual_images = not opt.grid @@ -195,6 +203,8 @@ def main_loop(t2i,outdir,parser,log,infile): metadata_prompt = f'{normalized_prompt} -S{results[0][1]}' file_writer.save_image_and_prompt_to_png(grid_img,metadata_prompt,filename) + last_seeds = [r[1] for r in results] + except AssertionError as e: print(e) continue @@ -344,7 +354,7 @@ def create_cmd_parser(): parser = argparse.ArgumentParser(description='Example: dream> a fantastic alien landscape -W1024 -H960 -s100 -n12') parser.add_argument('prompt') parser.add_argument('-s','--steps',type=int,help="number of steps") - parser.add_argument('-S','--seed',type=int,help="image seed") + parser.add_argument('-S','--seed',type=int,help="image seed; a +ve integer, or use -1 for the previous seed, -2 for the one before that, etc") parser.add_argument('-n','--iterations',type=int,default=1,help="number of samplings to perform (slower, but will provide seeds for individual images)") parser.add_argument('-b','--batch_size',type=int,default=1,help="number of images to produce per sampling (will not provide seeds for individual images!)") parser.add_argument('-W','--width',type=int,help="image width, multiple of 64")