InvokeAI/ldm/dream/generator/img2img.py

'''
ldm.dream.generator.img2img descends from ldm.dream.generator
'''

import torch
import numpy as  np
from ldm.dream.devices             import choose_autocast
from ldm.dream.generator.base      import Generator
from ldm.models.diffusion.ddim     import DDIMSampler

class Img2Img(Generator):
    def __init__(self, model, precision):
        super().__init__(model, precision)
        self.init_latent         = None    # by get_noise()

    @torch.no_grad()
    def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
                       conditioning,init_image,strength,step_callback=None,**kwargs):
        """
        Returns a function returning an image derived from the prompt and the initial image
        Return value depends on the seed at the time you call it.
        """

        # PLMS sampler not supported yet, so ignore previous sampler
        if not isinstance(sampler,DDIMSampler):
            print(
                f">> sampler '{sampler.__class__.__name__}' is not yet supported. Using DDIM sampler"
            )
            sampler = DDIMSampler(self.model, device=self.model.device)

        sampler.make_schedule(
            ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
        )

        scope = choose_autocast(self.precision)
        with scope(self.model.device.type):
            self.init_latent = self.model.get_first_stage_encoding(
                self.model.encode_first_stage(init_image)
            ) # move to latent space

        t_enc = int(strength * steps)
        uc, c   = conditioning

        @torch.no_grad()
        def make_image(x_T):
            # encode (scaled latent)
            z_enc = sampler.stochastic_encode(
                self.init_latent,
                torch.tensor([t_enc]).to(self.model.device),
                noise=x_T
            )
            # decode it
            samples = sampler.decode(
                z_enc,
                c,
                t_enc,
                img_callback = step_callback,
                unconditional_guidance_scale=cfg_scale,
                unconditional_conditioning=uc,
            )
            return self.sample_to_image(samples)

        return make_image

    def get_noise(self,width,height):
        device      = self.model.device
        init_latent = self.init_latent
        assert init_latent is not None,'call to get_noise() when init_latent not set'
        if device.type == 'mps':
            return torch.randn_like(init_latent, device='cpu').to(device)
        else:
            return torch.randn_like(init_latent, device=device)
Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator., where there are submodules for txt2img and img2img. inpaint will go in there as well. Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> 2022-09-06 00:40:10 +00:00			`'''`
Add Embiggen automation to upscale-cut-img2img-stitch and achieve high res without extra VRAM (#437) * Add Embiggen automation * Make embiggen_tiles masking more intelligent and count from one (at least for the user), rewrite sections of Embiggen README, fix various typos throughout README * drop duplicate log message 2022-09-12 19:37:26 +00:00			`ldm.dream.generator.img2img descends from ldm.dream.generator`
Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator., where there are submodules for txt2img and img2img. inpaint will go in there as well. Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> 2022-09-06 00:40:10 +00:00			`'''`

			`import torch`
			`import numpy as np`
Replace --full_precision with --precision that works even if not specified Allowed values are 'auto', 'float32', 'autocast', 'float16'. If not specified or 'auto' a working precision is automatically selected based on the torch device. Context: #526 Deprecated --full_precision / -F Tested on both cuda and cpu by calling scripts/dream.py without arguments and checked the auto configuration worked. With --precision=auto/float32/autocast/float16 it performs as expected, either working or failing with a reasonable error. Also checked Img2Img. 2022-09-17 17:56:25 +00:00			`from ldm.dream.devices import choose_autocast`
Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator., where there are submodules for txt2img and img2img. inpaint will go in there as well. Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> 2022-09-06 00:40:10 +00:00			`from ldm.dream.generator.base import Generator`
			`from ldm.models.diffusion.ddim import DDIMSampler`

			`class Img2Img(Generator):`
Replace --full_precision with --precision that works even if not specified Allowed values are 'auto', 'float32', 'autocast', 'float16'. If not specified or 'auto' a working precision is automatically selected based on the torch device. Context: #526 Deprecated --full_precision / -F Tested on both cuda and cpu by calling scripts/dream.py without arguments and checked the auto configuration worked. With --precision=auto/float32/autocast/float16 it performs as expected, either working or failing with a reasonable error. Also checked Img2Img. 2022-09-17 17:56:25 +00:00			`def __init__(self, model, precision):`
			`super().__init__(model, precision)`
Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator., where there are submodules for txt2img and img2img. inpaint will go in there as well. Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> 2022-09-06 00:40:10 +00:00			`self.init_latent = None # by get_noise()`
Replace --full_precision with --precision that works even if not specified Allowed values are 'auto', 'float32', 'autocast', 'float16'. If not specified or 'auto' a working precision is automatically selected based on the torch device. Context: #526 Deprecated --full_precision / -F Tested on both cuda and cpu by calling scripts/dream.py without arguments and checked the auto configuration worked. With --precision=auto/float32/autocast/float16 it performs as expected, either working or failing with a reasonable error. Also checked Img2Img. 2022-09-17 17:56:25 +00:00
Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator., where there are submodules for txt2img and img2img. inpaint will go in there as well. Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> 2022-09-06 00:40:10 +00:00			`@torch.no_grad()`
			`def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,`
			`conditioning,init_image,strength,step_callback=None,**kwargs):`
			`"""`
			`Returns a function returning an image derived from the prompt and the initial image`
			`Return value depends on the seed at the time you call it.`
			`"""`

			`# PLMS sampler not supported yet, so ignore previous sampler`
			`if not isinstance(sampler,DDIMSampler):`
			`print(`
			`f">> sampler '{sampler.__class__.__name__}' is not yet supported. Using DDIM sampler"`
			`)`
			`sampler = DDIMSampler(self.model, device=self.model.device)`

			`sampler.make_schedule(`
			`ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False`
			`)`

Replace --full_precision with --precision that works even if not specified Allowed values are 'auto', 'float32', 'autocast', 'float16'. If not specified or 'auto' a working precision is automatically selected based on the torch device. Context: #526 Deprecated --full_precision / -F Tested on both cuda and cpu by calling scripts/dream.py without arguments and checked the auto configuration worked. With --precision=auto/float32/autocast/float16 it performs as expected, either working or failing with a reasonable error. Also checked Img2Img. 2022-09-17 17:56:25 +00:00			`scope = choose_autocast(self.precision)`
			`with scope(self.model.device.type):`
Refactoring simplet2i (#387) * start refactoring -not yet functional * first phase of refactor done - not sure weighted prompts working * Second phase of refactoring. Everything mostly working. * The refactoring has moved all the hard-core inference work into ldm.dream.generator., where there are submodules for txt2img and img2img. inpaint will go in there as well. Some additional refactoring will be done soon, but relatively minor work. * fix -save_orig flag to actually work * add @neonsecret attention.py memory optimization * remove unneeded imports * move token logging into conditioning.py * add placeholder version of inpaint; porting in progress * fix crash in img2img * inpainting working; not tested on variations * fix crashes in img2img * ported attention.py memory optimization #117 from basujindal branch * added @torch_no_grad() decorators to img2img, txt2img, inpaint closures * Final commit prior to PR against development * fixup crash when generating intermediate images in web UI * rename ldm.simplet2i to ldm.generate * add backward-compatibility simplet2i shell with deprecation warning * add back in mps exception, addresses @vargol comment in #354 * replaced Conditioning class with exported functions * fix wrong type of with_variations attribute during intialization * changed "image_iterator()" to "get_make_image()" * raise NotImplementedError for calling get_make_image() in parent class * Update ldm/generate.py better error message Co-authored-by: Kevin Gibbons <bakkot@gmail.com> * minor stylistic fixes and assertion checks from code review * moved get_noise() method into img2img class * break get_noise() into two methods, one for txt2img and the other for img2img * inpainting works on non-square images now * make get_noise() an abstract method in base class * much improved inpainting Co-authored-by: Kevin Gibbons <bakkot@gmail.com> 2022-09-06 00:40:10 +00:00			`self.init_latent = self.model.get_first_stage_encoding(`
			`self.model.encode_first_stage(init_image)`
			`) # move to latent space`

			`t_enc = int(strength * steps)`
			`uc, c = conditioning`

			`@torch.no_grad()`
			`def make_image(x_T):`
			`# encode (scaled latent)`
			`z_enc = sampler.stochastic_encode(`
			`self.init_latent,`
			`torch.tensor([t_enc]).to(self.model.device),`
			`noise=x_T`
			`)`
			`# decode it`
			`samples = sampler.decode(`
			`z_enc,`
			`c,`
			`t_enc,`
			`img_callback = step_callback,`
			`unconditional_guidance_scale=cfg_scale,`
			`unconditional_conditioning=uc,`
			`)`
			`return self.sample_to_image(samples)`

			`return make_image`

			`def get_noise(self,width,height):`
			`device = self.model.device`
			`init_latent = self.init_latent`
			`assert init_latent is not None,'call to get_noise() when init_latent not set'`
			`if device.type == 'mps':`
			`return torch.randn_like(init_latent, device='cpu').to(device)`
			`else:`
			`return torch.randn_like(init_latent, device=device)`