Merge branch 'development' into development

2024-08-30 20:32:17 +00:00 · 2022-10-01 19:26:46 -04:00 · 2022-10-01 19:26:46 -04:00 · e246e7c8b9
commit e246e7c8b9
parent 2d1c3d7b0b 72834ad16c
6 changed files with 167 additions and 21 deletions
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@ -146,6 +146,7 @@ Here are the dream> command that apply to txt2img:
 | --cfg_scale <float>| -C<float> | 7.5                 | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
 | --seed <int>       | -S<int>   | None                | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
 | --sampler <sampler>| -A<sampler>| k_lms              | Sampler to use. Use -h to get list of available samplers. |
 | --hires_fix        |           |                     | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
 | --grid             | -g        | False               | Turn on grid mode to return a single image combining all the images generated by this prompt |
 | --individual       | -i        | True                | Turn off grid mode (deprecated; leave off --grid instead) |
 | --outdir <path>    |  -o<path> | outputs/img_samples  | Temporarily change the location of these images |
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@ -581,6 +581,12 @@ class Args(object):
            type=str,
            help='Directory to save generated images and a log of prompts and seeds',
        )
        render_group.add_argument(
            '--hires_fix',
            action='store_true',
            dest='hires_fix',
            help='Create hires image using img2img to prevent duplicated objects'
        )
        img2img_group.add_argument(
            '-I',
            '--init_img',
--- a/ldm/dream/generator/txt2img2img.py
+++ b/ldm/dream/generator/txt2img2img.py
@ -0,0 +1,126 @@
 '''
 ldm.dream.generator.txt2img inherits from ldm.dream.generator
 '''
 import torch
 import numpy as  np
 import math
 from ldm.dream.generator.base  import Generator
 from ldm.models.diffusion.ddim import DDIMSampler
 class Txt2Img2Img(Generator):
    def __init__(self, model, precision):
        super().__init__(model, precision)
        self.init_latent = None    # for get_noise()
    @torch.no_grad()
    def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
                       conditioning,width,height,strength,step_callback=None,**kwargs):
        """
        Returns a function returning an image derived from the prompt and the initial image
        Return value depends on the seed at the time you call it
        kwargs are 'width' and 'height'
        """
        uc, c   = conditioning
        @torch.no_grad()
        def make_image(x_T):           
            trained_square = 512 * 512
            actual_square = width * height
            scale = math.sqrt(trained_square / actual_square)
            init_width = math.ceil(scale * width / 64) * 64
            init_height = math.ceil(scale * height / 64) * 64
            shape = [
                self.latent_channels,
                init_height // self.downsampling_factor,
                init_width // self.downsampling_factor,
            ]
            x = self.get_noise(init_width, init_height)
            if self.free_gpu_mem and self.model.model.device != self.model.device:
                self.model.model.to(self.model.device)
            samples, _ = sampler.sample(
                batch_size                   = 1,
                S                            = steps,
                x_T                          = x,
                conditioning                 = c,
                shape                        = shape,
                verbose                      = False,
                unconditional_guidance_scale = cfg_scale,
                unconditional_conditioning   = uc,
                eta                          = ddim_eta,
                img_callback                 = step_callback
            )
            print(
                  f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height}"
                 )
            # resizing
            samples = torch.nn.functional.interpolate(
                samples, 
                size=(height // self.downsampling_factor, width // self.downsampling_factor), 
                mode="bilinear"
            )
            t_enc = int(strength * steps)
            x = None
            # Other samplers not supported yet, so ignore previous sampler
            if not isinstance(sampler,DDIMSampler):
                print(
                    f"\n>> Sampler '{sampler.__class__.__name__}' is not yet supported for img2img. Using DDIM sampler"
                )
                img_sampler = DDIMSampler(self.model, device=self.model.device)
                img_sampler.make_schedule(
                    ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
                )
            else:
                img_sampler = sampler
            z_enc = img_sampler.stochastic_encode(
                samples,
                torch.tensor([t_enc]).to(self.model.device),
                noise=x_T
            )
            # decode it
            samples = img_sampler.decode(
                z_enc,
                c,
                t_enc,
                img_callback = step_callback,
                unconditional_guidance_scale=cfg_scale,
                unconditional_conditioning=uc,
            )
            if self.free_gpu_mem:
                self.model.model.to("cpu")
            return self.sample_to_image(samples)
        return make_image
    # returns a tensor filled with random numbers from a normal distribution
    def get_noise(self,width,height):
        device      = self.model.device
        if device.type == 'mps':
            return torch.randn([1,
                                self.latent_channels,
                                height // self.downsampling_factor,
                                width  // self.downsampling_factor],
                                device='cpu').to(device)
        else:
            return torch.randn([1,
                                self.latent_channels,
                                height // self.downsampling_factor,
                                width  // self.downsampling_factor],
                                device=device)
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -46,6 +46,7 @@ COMMANDS = (
    '-save_orig','--save_original',
    '--skip_normalize','-x',
    '--log_tokenization','-t',
    '--hires_fix',
    '!fix','!fetch','!history',
    )
 IMG_PATH_COMMANDS = (
--- a/ldm/generate.py
+++ b/ldm/generate.py
@ -289,6 +289,7 @@ class Generate:
            upscale          = None,
            # Set this True to handle KeyboardInterrupt internally
            catch_interrupts = False,
            hires_fix        = False,
            **args,
    ):   # eat up additional cruft
        """
@ -411,6 +412,8 @@ class Generate:
                generator = self._make_embiggen()
            elif init_image is not None:
                generator = self._make_img2img()
            elif hires_fix:
                generator = self._make_txt2img2img()
            else:
                generator = self._make_txt2img()
@ -670,6 +673,13 @@ class Generate:
            self.generators['txt2img'].free_gpu_mem = self.free_gpu_mem
        return self.generators['txt2img']
    def _make_txt2img2img(self):
        if not self.generators.get('txt2img2'):
            from ldm.dream.generator.txt2img2img import Txt2Img2Img
            self.generators['txt2img2'] = Txt2Img2Img(self.model, self.precision)
            self.generators['txt2img2'].free_gpu_mem = self.free_gpu_mem
        return self.generators['txt2img2']
    def _make_inpaint(self):
        if not self.generators.get('inpaint'):
            from ldm.dream.generator.inpaint import Inpaint
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -201,9 +201,7 @@ def main_loop(gen, opt, infile):
                    oldargs    = metadata_from_png(opt.init_img)
                    opt.prompt = oldargs.prompt
                    print(f'>> Retrieved old prompt "{opt.prompt}" from {opt.init_img}')
-            except AttributeError:
+            except (OSError, AttributeError, KeyError):
                pass
            except KeyError:
                pass
        if len(opt.prompt) == 0:
@ -279,9 +277,6 @@ def main_loop(gen, opt, infile):
            prefix = file_writer.unique_prefix()
            def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None):
                print(f'DEBUG:upscaled={upscaled}, first_seed={first_seed}, use_prefix={use_prefix}')
                # note the seed is the seed of the current image
                # the first_seed is the original seed that noise is added to
                # when the -v switch is used to generate variations
@ -379,9 +374,6 @@ def do_postprocess (gen, opt, callback):
    file_path = opt.prompt     # treat the prompt as the file pathname
    if os.path.dirname(file_path) == '': #basename given
        file_path = os.path.join(opt.outdir,file_path)
    if not os.path.exists(file_path):
        print(f'* file {file_path} does not exist')
        return
    tool=None
    if opt.gfpgan_strength > 0:
@ -394,17 +386,24 @@ def do_postprocess (gen, opt, callback):
        tool = 'outpaint'
    opt.save_original = True # do not overwrite old image!
    opt.last_operation    = f'postprocess:{tool}'
-    gen.apply_postprocessor(
+    try:
-        image_path      = file_path,
+        gen.apply_postprocessor(
-        tool            = tool,
+            image_path      = file_path,
-        gfpgan_strength = opt.gfpgan_strength,
+            tool            = tool,
-        codeformer_fidelity = opt.codeformer_fidelity,
+            gfpgan_strength = opt.gfpgan_strength,
-        save_original       = opt.save_original,
+            codeformer_fidelity = opt.codeformer_fidelity,
-        upscale             = opt.upscale,
+            save_original       = opt.save_original,
-        out_direction       = opt.out_direction,
+            upscale             = opt.upscale,
-        callback            = callback,
+            out_direction       = opt.out_direction,
-        opt                 = opt,
+            callback            = callback,
            opt                 = opt,
        )
    except OSError:
        print(f'** {file_path}: file could not be read')
        return
    except (KeyError, AttributeError):
        print(f'** {file_path}: file has no metadata')
        return
    return opt.last_operation
 def prepare_image_metadata(
@ -521,8 +520,11 @@ def retrieve_dream_command(opt,file_path,completer):
        path = file_path
    try:
        cmd = dream_cmd_from_png(path)
-    except FileNotFoundError:
+    except OSError:
-        print(f'** {path}: file not found')
+        print(f'** {path}: file could not be read')
        return
    except (KeyError, AttributeError):
        print(f'** {path}: file has no metadata')
        return
    completer.set_line(cmd)