diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index d95ad78196..759ba2dba4 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -10,8 +10,6 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.invoke.generator.omnibus import Omnibus from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent from PIL import Image -from ldm.invoke.devices import choose_autocast -from ldm.invoke.image_util import InitImageResizer class Txt2Img2Img(Generator): def __init__(self, model, precision): @@ -46,13 +44,16 @@ class Txt2Img2Img(Generator): ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False ) + #x = self.get_noise(init_width, init_height) + x = x_T + if self.free_gpu_mem and self.model.model.device != self.model.device: self.model.model.to(self.model.device) samples, _ = sampler.sample( batch_size = 1, S = steps, - x_T = x_T, + x_T = x, conditioning = c, shape = shape, verbose = False, @@ -68,21 +69,11 @@ class Txt2Img2Img(Generator): ) # resizing - - image = self.sample_to_image(samples) - image = InitImageResizer(image).resize(width, height) - - image = np.array(image).astype(np.float32) / 255.0 - image = image[None].transpose(0, 3, 1, 2) - image = torch.from_numpy(image) - image = 2.0 * image - 1.0 - image = image.to(self.model.device) - - scope = choose_autocast(self.precision) - with scope(self.model.device.type): - samples = self.model.get_first_stage_encoding( - self.model.encode_first_stage(image) - ) # move back to latent space + samples = torch.nn.functional.interpolate( + samples, + size=(height // self.downsampling_factor, width // self.downsampling_factor), + mode="bilinear" + ) t_enc = int(strength * steps) ddim_sampler = DDIMSampler(self.model, device=self.model.device)