diff --git a/docs/features/CLI.md b/docs/features/CLI.md index 530d659c64..2073c67f34 100644 --- a/docs/features/CLI.md +++ b/docs/features/CLI.md @@ -146,6 +146,7 @@ Here are the dream> command that apply to txt2img: | --cfg_scale | -C | 7.5 | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 | | --seed | -S | None | Set the random seed for the next series of images. This can be used to recreate an image generated previously.| | --sampler | -A| k_lms | Sampler to use. Use -h to get list of available samplers. | +| --hires_fix | | | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution | | --grid | -g | False | Turn on grid mode to return a single image combining all the images generated by this prompt | | --individual | -i | True | Turn off grid mode (deprecated; leave off --grid instead) | | --outdir | -o | outputs/img_samples | Temporarily change the location of these images | diff --git a/ldm/dream/args.py b/ldm/dream/args.py index 4399965dc9..ba0cc90992 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -581,6 +581,12 @@ class Args(object): type=str, help='Directory to save generated images and a log of prompts and seeds', ) + render_group.add_argument( + '--hires_fix', + action='store_true', + dest='hires_fix', + help='Create hires image using img2img to prevent duplicated objects' + ) img2img_group.add_argument( '-I', '--init_img', diff --git a/ldm/dream/generator/txt2img2img.py b/ldm/dream/generator/txt2img2img.py new file mode 100644 index 0000000000..502a2bdca3 --- /dev/null +++ b/ldm/dream/generator/txt2img2img.py @@ -0,0 +1,126 @@ +''' +ldm.dream.generator.txt2img inherits from ldm.dream.generator +''' + +import torch +import numpy as np +import math +from ldm.dream.generator.base import Generator +from ldm.models.diffusion.ddim import DDIMSampler + + +class Txt2Img2Img(Generator): + def __init__(self, model, precision): + super().__init__(model, precision) + self.init_latent = None # for get_noise() + + @torch.no_grad() + def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning,width,height,strength,step_callback=None,**kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it + kwargs are 'width' and 'height' + """ + uc, c = conditioning + + @torch.no_grad() + def make_image(x_T): + + trained_square = 512 * 512 + actual_square = width * height + scale = math.sqrt(trained_square / actual_square) + + init_width = math.ceil(scale * width / 64) * 64 + init_height = math.ceil(scale * height / 64) * 64 + + shape = [ + self.latent_channels, + init_height // self.downsampling_factor, + init_width // self.downsampling_factor, + ] + + x = self.get_noise(init_width, init_height) + + if self.free_gpu_mem and self.model.model.device != self.model.device: + self.model.model.to(self.model.device) + + samples, _ = sampler.sample( + batch_size = 1, + S = steps, + x_T = x, + conditioning = c, + shape = shape, + verbose = False, + unconditional_guidance_scale = cfg_scale, + unconditional_conditioning = uc, + eta = ddim_eta, + img_callback = step_callback + ) + + print( + f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height}" + ) + + # resizing + samples = torch.nn.functional.interpolate( + samples, + size=(height // self.downsampling_factor, width // self.downsampling_factor), + mode="bilinear" + ) + + t_enc = int(strength * steps) + + x = None + + # Other samplers not supported yet, so ignore previous sampler + if not isinstance(sampler,DDIMSampler): + print( + f"\n>> Sampler '{sampler.__class__.__name__}' is not yet supported for img2img. Using DDIM sampler" + ) + img_sampler = DDIMSampler(self.model, device=self.model.device) + img_sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + else: + img_sampler = sampler + + z_enc = img_sampler.stochastic_encode( + samples, + torch.tensor([t_enc]).to(self.model.device), + noise=x_T + ) + + # decode it + samples = img_sampler.decode( + z_enc, + c, + t_enc, + img_callback = step_callback, + unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc, + ) + + if self.free_gpu_mem: + self.model.model.to("cpu") + + return self.sample_to_image(samples) + + return make_image + + + # returns a tensor filled with random numbers from a normal distribution + def get_noise(self,width,height): + device = self.model.device + if device.type == 'mps': + return torch.randn([1, + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor], + device='cpu').to(device) + else: + return torch.randn([1, + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor], + device=device) diff --git a/ldm/dream/readline.py b/ldm/dream/readline.py index 271464a29e..f75c597eeb 100644 --- a/ldm/dream/readline.py +++ b/ldm/dream/readline.py @@ -46,6 +46,7 @@ COMMANDS = ( '-save_orig','--save_original', '--skip_normalize','-x', '--log_tokenization','-t', + '--hires_fix', '!fix','!fetch','!history', ) IMG_PATH_COMMANDS = ( diff --git a/ldm/generate.py b/ldm/generate.py index da4b44664a..dd8c1c1390 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -289,6 +289,7 @@ class Generate: upscale = None, # Set this True to handle KeyboardInterrupt internally catch_interrupts = False, + hires_fix = False, **args, ): # eat up additional cruft """ @@ -411,6 +412,8 @@ class Generate: generator = self._make_embiggen() elif init_image is not None: generator = self._make_img2img() + elif hires_fix: + generator = self._make_txt2img2img() else: generator = self._make_txt2img() @@ -670,6 +673,13 @@ class Generate: self.generators['txt2img'].free_gpu_mem = self.free_gpu_mem return self.generators['txt2img'] + def _make_txt2img2img(self): + if not self.generators.get('txt2img2'): + from ldm.dream.generator.txt2img2img import Txt2Img2Img + self.generators['txt2img2'] = Txt2Img2Img(self.model, self.precision) + self.generators['txt2img2'].free_gpu_mem = self.free_gpu_mem + return self.generators['txt2img2'] + def _make_inpaint(self): if not self.generators.get('inpaint'): from ldm.dream.generator.inpaint import Inpaint diff --git a/scripts/dream.py b/scripts/dream.py index 7a9b26a995..84dca75e8f 100644 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -201,9 +201,7 @@ def main_loop(gen, opt, infile): oldargs = metadata_from_png(opt.init_img) opt.prompt = oldargs.prompt print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}') - except AttributeError: - pass - except KeyError: + except (OSError, AttributeError, KeyError): pass if len(opt.prompt) == 0: @@ -279,9 +277,6 @@ def main_loop(gen, opt, infile): prefix = file_writer.unique_prefix() def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None): - print(f'DEBUG:upscaled={upscaled}, first_seed={first_seed}, use_prefix={use_prefix}') - - # note the seed is the seed of the current image # the first_seed is the original seed that noise is added to # when the -v switch is used to generate variations @@ -379,9 +374,6 @@ def do_postprocess (gen, opt, callback): file_path = opt.prompt # treat the prompt as the file pathname if os.path.dirname(file_path) == '': #basename given file_path = os.path.join(opt.outdir,file_path) - if not os.path.exists(file_path): - print(f'* file {file_path} does not exist') - return tool=None if opt.gfpgan_strength > 0: @@ -394,17 +386,24 @@ def do_postprocess (gen, opt, callback): tool = 'outpaint' opt.save_original = True # do not overwrite old image! opt.last_operation = f'postprocess:{tool}' - gen.apply_postprocessor( - image_path = file_path, - tool = tool, - gfpgan_strength = opt.gfpgan_strength, - codeformer_fidelity = opt.codeformer_fidelity, - save_original = opt.save_original, - upscale = opt.upscale, - out_direction = opt.out_direction, - callback = callback, - opt = opt, + try: + gen.apply_postprocessor( + image_path = file_path, + tool = tool, + gfpgan_strength = opt.gfpgan_strength, + codeformer_fidelity = opt.codeformer_fidelity, + save_original = opt.save_original, + upscale = opt.upscale, + out_direction = opt.out_direction, + callback = callback, + opt = opt, ) + except OSError: + print(f'** {file_path}: file could not be read') + return + except (KeyError, AttributeError): + print(f'** {file_path}: file has no metadata') + return return opt.last_operation def prepare_image_metadata( @@ -521,8 +520,11 @@ def retrieve_dream_command(opt,file_path,completer): path = file_path try: cmd = dream_cmd_from_png(path) - except FileNotFoundError: - print(f'** {path}: file not found') + except OSError: + print(f'** {path}: file could not be read') + return + except (KeyError, AttributeError): + print(f'** {path}: file has no metadata') return completer.set_line(cmd)