Merge branch 'development' into development

2024-08-30 20:32:17 +00:00 · 2022-10-01 19:26:46 -04:00
parent 2d1c3d7b0b 72834ad16c
commit e246e7c8b9
6 changed files with 167 additions and 21 deletions
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -146,6 +146,7 @@ Here are the dream> command that apply to txt2img:
 | --cfg_scale <float>| -C<float> | 7.5                 | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
 | --seed <int>       | -S<int>   | None                | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
 | --sampler <sampler>| -A<sampler>| k_lms              | Sampler to use. Use -h to get list of available samplers. |
+| --hires_fix        |           |                     | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
 | --grid             | -g        | False               | Turn on grid mode to return a single image combining all the images generated by this prompt |
 | --individual       | -i        | True                | Turn off grid mode (deprecated; leave off --grid instead) |
 | --outdir <path>    |  -o<path> | outputs/img_samples  | Temporarily change the location of these images |
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -581,6 +581,12 @@ class Args(object):
            type=str,
            help='Directory to save generated images and a log of prompts and seeds',
        )
+        render_group.add_argument(
+            '--hires_fix',
+            action='store_true',
+            dest='hires_fix',
+            help='Create hires image using img2img to prevent duplicated objects'
+        )
        img2img_group.add_argument(
            '-I',
            '--init_img',
--- a/ldm/dream/generator/txt2img2img.py
+++ b/ldm/dream/generator/txt2img2img.py
@ -0,0 +1,126 @@
+'''
+ldm.dream.generator.txt2img inherits from ldm.dream.generator
+'''
+
+import torch
+import numpy as  np
+import math
+from ldm.dream.generator.base  import Generator
+from ldm.models.diffusion.ddim import DDIMSampler
+
+
+class Txt2Img2Img(Generator):
+    def __init__(self, model, precision):
+        super().__init__(model, precision)
+        self.init_latent = None    # for get_noise()
+
+    @torch.no_grad()
+    def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
+                       conditioning,width,height,strength,step_callback=None,**kwargs):
+        """
+        Returns a function returning an image derived from the prompt and the initial image
+        Return value depends on the seed at the time you call it
+        kwargs are 'width' and 'height'
+        """
+        uc, c   = conditioning
+
+        @torch.no_grad()
+        def make_image(x_T):           
+            
+            trained_square = 512 * 512
+            actual_square = width * height
+            scale = math.sqrt(trained_square / actual_square)
+
+            init_width = math.ceil(scale * width / 64) * 64
+            init_height = math.ceil(scale * height / 64) * 64
+            
+            shape = [
+                self.latent_channels,
+                init_height // self.downsampling_factor,
+                init_width // self.downsampling_factor,
+            ]
+            
+            x = self.get_noise(init_width, init_height)
+            
+            if self.free_gpu_mem and self.model.model.device != self.model.device:
+                self.model.model.to(self.model.device)
+
+            samples, _ = sampler.sample(
+                batch_size                   = 1,
+                S                            = steps,
+                x_T                          = x,
+                conditioning                 = c,
+                shape                        = shape,
+                verbose                      = False,
+                unconditional_guidance_scale = cfg_scale,
+                unconditional_conditioning   = uc,
+                eta                          = ddim_eta,
+                img_callback                 = step_callback
+            )
+            
+            print(
+                  f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height}"
+                 )
+            
+            # resizing
+            samples = torch.nn.functional.interpolate(
+                samples, 
+                size=(height // self.downsampling_factor, width // self.downsampling_factor), 
+                mode="bilinear"
+            )
+
+            t_enc = int(strength * steps)
+
+            x = None
+
+            # Other samplers not supported yet, so ignore previous sampler
+            if not isinstance(sampler,DDIMSampler):
+                print(
+                    f"\n>> Sampler '{sampler.__class__.__name__}' is not yet supported for img2img. Using DDIM sampler"
+                )
+                img_sampler = DDIMSampler(self.model, device=self.model.device)
+                img_sampler.make_schedule(
+                    ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
+                )
+            else:
+                img_sampler = sampler
+            
+            z_enc = img_sampler.stochastic_encode(
+                samples,
+                torch.tensor([t_enc]).to(self.model.device),
+                noise=x_T
+            )
+
+            # decode it
+            samples = img_sampler.decode(
+                z_enc,
+                c,
+                t_enc,
+                img_callback = step_callback,
+                unconditional_guidance_scale=cfg_scale,
+                unconditional_conditioning=uc,
+            )
+
+            if self.free_gpu_mem:
+                self.model.model.to("cpu")
+
+            return self.sample_to_image(samples)
+
+        return make_image
+
+
+    # returns a tensor filled with random numbers from a normal distribution
+    def get_noise(self,width,height):
+        device      = self.model.device
+        if device.type == 'mps':
+            return torch.randn([1,
+                                self.latent_channels,
+                                height // self.downsampling_factor,
+                                width  // self.downsampling_factor],
+                                device='cpu').to(device)
+        else:
+            return torch.randn([1,
+                                self.latent_channels,
+                                height // self.downsampling_factor,
+                                width  // self.downsampling_factor],
+                                device=device)
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -46,6 +46,7 @@ COMMANDS = (
    '-save_orig','--save_original',
    '--skip_normalize','-x',
    '--log_tokenization','-t',
+    '--hires_fix',
    '!fix','!fetch','!history',
    )
 IMG_PATH_COMMANDS = (
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -289,6 +289,7 @@ class Generate:
            upscale          = None,
            # Set this True to handle KeyboardInterrupt internally
            catch_interrupts = False,
+            hires_fix        = False,
            **args,
    ):   # eat up additional cruft
        """
@ -411,6 +412,8 @@ class Generate:
                generator = self._make_embiggen()
            elif init_image is not None:
                generator = self._make_img2img()
+            elif hires_fix:
+                generator = self._make_txt2img2img()
            else:
                generator = self._make_txt2img()

@ -670,6 +673,13 @@ class Generate:
            self.generators['txt2img'].free_gpu_mem = self.free_gpu_mem
        return self.generators['txt2img']

+    def _make_txt2img2img(self):
+        if not self.generators.get('txt2img2'):
+            from ldm.dream.generator.txt2img2img import Txt2Img2Img
+            self.generators['txt2img2'] = Txt2Img2Img(self.model, self.precision)
+            self.generators['txt2img2'].free_gpu_mem = self.free_gpu_mem
+        return self.generators['txt2img2']
+
    def _make_inpaint(self):
        if not self.generators.get('inpaint'):
            from ldm.dream.generator.inpaint import Inpaint
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -201,9 +201,7 @@ def main_loop(gen, opt, infile):
                    oldargs    = metadata_from_png(opt.init_img)
                    opt.prompt = oldargs.prompt
                    print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}')
-            except AttributeError:
-                pass
-            except KeyError:
+            except (OSError, AttributeError, KeyError):
                pass

        if len(opt.prompt) == 0:
@ -279,9 +277,6 @@ def main_loop(gen, opt, infile):
            prefix = file_writer.unique_prefix()

            def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None):
-                print(f'DEBUG:upscaled={upscaled}, first_seed={first_seed}, use_prefix={use_prefix}')
-
-                
                # note the seed is the seed of the current image
                # the first_seed is the original seed that noise is added to
                # when the -v switch is used to generate variations
@ -379,9 +374,6 @@ def do_postprocess (gen, opt, callback):
    file_path = opt.prompt     # treat the prompt as the file pathname
    if os.path.dirname(file_path) == '': #basename given
        file_path = os.path.join(opt.outdir,file_path)
-    if not os.path.exists(file_path):
-        print(f'* file {file_path} does not exist')
-        return

    tool=None
    if opt.gfpgan_strength > 0:
@ -394,17 +386,24 @@ def do_postprocess (gen, opt, callback):
        tool = 'outpaint'
    opt.save_original = True # do not overwrite old image!
    opt.last_operation    = f'postprocess:{tool}'
-    gen.apply_postprocessor(
-        image_path      = file_path,
-        tool            = tool,
-        gfpgan_strength = opt.gfpgan_strength,
-        codeformer_fidelity = opt.codeformer_fidelity,
-        save_original       = opt.save_original,
-        upscale             = opt.upscale,
-        out_direction       = opt.out_direction,
-        callback            = callback,
-        opt                 = opt,
+    try:
+        gen.apply_postprocessor(
+            image_path      = file_path,
+            tool            = tool,
+            gfpgan_strength = opt.gfpgan_strength,
+            codeformer_fidelity = opt.codeformer_fidelity,
+            save_original       = opt.save_original,
+            upscale             = opt.upscale,
+            out_direction       = opt.out_direction,
+            callback            = callback,
+            opt                 = opt,
        )
+    except OSError:
+        print(f'** {file_path}: file could not be read')
+        return
+    except (KeyError, AttributeError):
+        print(f'** {file_path}: file has no metadata')
+        return
    return opt.last_operation
    
 def prepare_image_metadata(
@ -521,8 +520,11 @@ def retrieve_dream_command(opt,file_path,completer):
        path = file_path
    try:
        cmd = dream_cmd_from_png(path)
-    except FileNotFoundError:
-        print(f'** {path}: file not found')
+    except OSError:
+        print(f'** {path}: file could not be read')
+        return
+    except (KeyError, AttributeError):
+        print(f'** {path}: file has no metadata')
        return
    completer.set_line(cmd)