Hi res mode fix duplicates with img2img scaling

Add message about interpolation size Fix crash if sampler not set to DDIM, change parameter name to hires_fix Hi res mode fix duplicates with img2img scaling
2024-08-30 20:32:17 +00:00 · 2022-09-30 00:58:06 +02:00 · 2022-09-30 00:58:06 +02:00 · 0c354eccaa
commit 0c354eccaa
parent 33162355be
3 changed files with 142 additions and 0 deletions
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -569,6 +569,12 @@ class Args(object):
            type=str,
            help='Directory to save generated images and a log of prompts and seeds',
        )
        render_group.add_argument(
            '--hires_fix',
            action='store_true',
            dest='hires_fix',
            help='Create hires image using img2img to prevent dupes'
        )
        img2img_group.add_argument(
            '-I',
            '--init_img',
--- a/ldm/dream/generator/txt2img2img.py
+++ b/ldm/dream/generator/txt2img2img.py
@ -0,0 +1,126 @@
 '''
 ldm.dream.generator.txt2img inherits from ldm.dream.generator
 '''
 import torch
 import numpy as  np
 import math
 from ldm.dream.generator.base  import Generator
 from ldm.models.diffusion.ddim import DDIMSampler
 class Txt2Img2Img(Generator):
    def __init__(self, model, precision):
        super().__init__(model, precision)
        self.init_latent = None    # for get_noise()
    @torch.no_grad()
    def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
                       conditioning,width,height,strength,step_callback=None,**kwargs):
        """
        Returns a function returning an image derived from the prompt and the initial image
        Return value depends on the seed at the time you call it
        kwargs are 'width' and 'height'
        """
        uc, c   = conditioning
        @torch.no_grad()
        def make_image(x_T):           
            trained_square = 512 * 512
            actual_square = width * height
            scale = math.sqrt(trained_square / actual_square)
            init_width = math.ceil(scale * width / 64) * 64
            init_height = math.ceil(scale * height / 64) * 64
            shape = [
                self.latent_channels,
                init_height // self.downsampling_factor,
                init_width // self.downsampling_factor,
            ]
            x = self.get_noise(init_width, init_height)
            if self.free_gpu_mem and self.model.model.device != self.model.device:
                self.model.model.to(self.model.device)
            samples, _ = sampler.sample(
                batch_size                   = 1,
                S                            = steps,
                x_T                          = x,
                conditioning                 = c,
                shape                        = shape,
                verbose                      = False,
                unconditional_guidance_scale = cfg_scale,
                unconditional_conditioning   = uc,
                eta                          = ddim_eta,
                img_callback                 = step_callback
            )
            print(
                  f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height}"
                 )
            # resizing
            samples = torch.nn.functional.interpolate(
                samples, 
                size=(height // self.downsampling_factor, width // self.downsampling_factor), 
                mode="bilinear"
            )
            t_enc = int(strength * steps)
            x = None
            # Other samplers not supported yet, so ignore previous sampler
            if not isinstance(sampler,DDIMSampler):
                print(
                    f"\n>> Sampler '{sampler.__class__.__name__}' is not yet supported for img2img. Using DDIM sampler"
                )
                img_sampler = DDIMSampler(self.model, device=self.model.device)
                img_sampler.make_schedule(
                    ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
                )
            else:
                img_sampler = sampler
            z_enc = img_sampler.stochastic_encode(
                samples,
                torch.tensor([t_enc]).to(self.model.device),
                noise=x_T
            )
            # decode it
            samples = img_sampler.decode(
                z_enc,
                c,
                t_enc,
                img_callback = step_callback,
                unconditional_guidance_scale=cfg_scale,
                unconditional_conditioning=uc,
            )
            if self.free_gpu_mem:
                self.model.model.to("cpu")
            return self.sample_to_image(samples)
        return make_image
    # returns a tensor filled with random numbers from a normal distribution
    def get_noise(self,width,height):
        device      = self.model.device
        if device.type == 'mps':
            return torch.randn([1,
                                self.latent_channels,
                                height // self.downsampling_factor,
                                width  // self.downsampling_factor],
                                device='cpu').to(device)
        else:
            return torch.randn([1,
                                self.latent_channels,
                                height // self.downsampling_factor,
                                width  // self.downsampling_factor],
                                device=device)
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -287,6 +287,7 @@ class Generate:
            upscale          = None,
            # Set this True to handle KeyboardInterrupt internally
            catch_interrupts = False,
            hires_fix        = False,
            **args,
    ):   # eat up additional cruft
        """
@ -403,6 +404,8 @@ class Generate:
                generator = self._make_embiggen()
            elif init_image is not None:
                generator = self._make_img2img()
            elif hires_fix:
                generator = self._make_txt2img2img()
            else:
                generator = self._make_txt2img()
@ -660,6 +663,13 @@ class Generate:
            self.generators['txt2img'].free_gpu_mem = self.free_gpu_mem
        return self.generators['txt2img']
    def _make_txt2img2img(self):
        if not self.generators.get('txt2img2'):
            from ldm.dream.generator.txt2img2img import Txt2Img2Img
            self.generators['txt2img2'] = Txt2Img2Img(self.model, self.precision)
            self.generators['txt2img2'].free_gpu_mem = self.free_gpu_mem
        return self.generators['txt2img2']
    def _make_inpaint(self):
        if not self.generators.get('inpaint'):
            from ldm.dream.generator.inpaint import Inpaint