From 2db180d90979091b738e28516a263c43fa012a61 Mon Sep 17 00:00:00 2001 From: Jonathan <34005131+JPPhoto@users.noreply.github.com> Date: Wed, 8 Mar 2023 15:50:16 -0600 Subject: [PATCH] Make img2img strength 1 behave the same as txt2img (#2895) * Fix img2img and inpainting code so a strength of 1 behaves the same as txt2img. * Make generated images identical to their txt2img counterparts when strength is 1. --- invokeai/backend/generator/base.py | 5 ++--- invokeai/backend/generator/img2img.py | 9 +++++++- invokeai/backend/generator/inpaint.py | 2 ++ .../stable_diffusion/diffusers_pipeline.py | 22 ++++++++++++------- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/invokeai/backend/generator/base.py b/invokeai/backend/generator/base.py index a834e9dba3..881d3deaff 100644 --- a/invokeai/backend/generator/base.py +++ b/invokeai/backend/generator/base.py @@ -99,6 +99,7 @@ class Generator: h_symmetry_time_pct=h_symmetry_time_pct, v_symmetry_time_pct=v_symmetry_time_pct, attention_maps_callback=attention_maps_callback, + seed=seed, **kwargs, ) results = [] @@ -289,9 +290,7 @@ class Generator: if self.variation_amount > 0: random.seed() # reset RNG to an actually random state, so we can get a random seed for variations seed = random.randrange(0, np.iinfo(np.uint32).max) - return (seed, initial_noise) - else: - return (seed, None) + return (seed, initial_noise) # returns a tensor filled with random numbers from a normal distribution def get_noise(self, width, height): diff --git a/invokeai/backend/generator/img2img.py b/invokeai/backend/generator/img2img.py index f9692b9cc5..b7f90361a0 100644 --- a/invokeai/backend/generator/img2img.py +++ b/invokeai/backend/generator/img2img.py @@ -1,8 +1,10 @@ """ invokeai.backend.generator.img2img descends from .generator """ +from typing import Optional import torch +from accelerate.utils import set_seed from diffusers import logging from ..stable_diffusion import ( @@ -35,6 +37,7 @@ class Img2Img(Generator): h_symmetry_time_pct=None, v_symmetry_time_pct=None, attention_maps_callback=None, + seed=None, **kwargs, ): """ @@ -65,6 +68,7 @@ class Img2Img(Generator): # FIXME: use x_T for initial seeded noise # We're not at the moment because the pipeline automatically resizes init_image if # necessary, which the x_T input might not match. + # In the meantime, reset the seed prior to generating pipeline output so we at least get the same result. logging.set_verbosity_error() # quench safety check warnings pipeline_output = pipeline.img2img_from_embeddings( init_image, @@ -73,6 +77,7 @@ class Img2Img(Generator): conditioning_data, noise_func=self.get_noise_like, callback=step_callback, + seed=seed ) if ( pipeline_output.attention_map_saver is not None @@ -83,7 +88,9 @@ class Img2Img(Generator): return make_image - def get_noise_like(self, like: torch.Tensor): + def get_noise_like(self, like: torch.Tensor, seed: Optional[int]): + if seed is not None: + set_seed(seed) device = like.device if device.type == "mps": x = torch.randn_like(like, device="cpu").to(device) diff --git a/invokeai/backend/generator/inpaint.py b/invokeai/backend/generator/inpaint.py index f7f21b8906..fa7560d43e 100644 --- a/invokeai/backend/generator/inpaint.py +++ b/invokeai/backend/generator/inpaint.py @@ -223,6 +223,7 @@ class Inpaint(Img2Img): inpaint_height=None, inpaint_fill: tuple(int) = (0x7F, 0x7F, 0x7F, 0xFF), attention_maps_callback=None, + seed=None, **kwargs, ): """ @@ -319,6 +320,7 @@ class Inpaint(Img2Img): conditioning_data=conditioning_data, noise_func=self.get_noise_like, callback=step_callback, + seed=seed ) if ( diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index f1622f86be..6bd1fe339d 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -690,6 +690,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): callback: Callable[[PipelineIntermediateState], None] = None, run_id=None, noise_func=None, + seed=None, ) -> InvokeAIStableDiffusionPipelineOutput: if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert("RGB")) @@ -703,7 +704,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): device=self._model_group.device_for(self.unet), dtype=self.unet.dtype, ) - noise = noise_func(initial_latents) + noise = noise_func(initial_latents, seed) return self.img2img_from_latents_and_embeddings( initial_latents, @@ -731,9 +732,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): device=self._model_group.device_for(self.unet), ) result_latents, result_attention_maps = self.latents_from_embeddings( - initial_latents, - num_inference_steps, - conditioning_data, + latents=initial_latents if strength < 1.0 else torch.zeros_like( + initial_latents, device=initial_latents.device, dtype=initial_latents.dtype + ), + num_inference_steps=num_inference_steps, + conditioning_data=conditioning_data, timesteps=timesteps, noise=noise, run_id=run_id, @@ -779,6 +782,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): callback: Callable[[PipelineIntermediateState], None] = None, run_id=None, noise_func=None, + seed=None, ) -> InvokeAIStableDiffusionPipelineOutput: device = self._model_group.device_for(self.unet) latents_dtype = self.unet.dtype @@ -802,7 +806,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): init_image_latents = self.non_noised_latents_from_image( init_image, device=device, dtype=latents_dtype ) - noise = noise_func(init_image_latents) + noise = noise_func(init_image_latents, seed) if mask.dim() == 3: mask = mask.unsqueeze(0) @@ -831,9 +835,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): try: result_latents, result_attention_maps = self.latents_from_embeddings( - init_image_latents, - num_inference_steps, - conditioning_data, + latents=init_image_latents if strength < 1.0 else torch.zeros_like( + init_image_latents, device=init_image_latents.device, dtype=init_image_latents.dtype + ), + num_inference_steps=num_inference_steps, + conditioning_data=conditioning_data, noise=noise, timesteps=timesteps, additional_guidance=guidance,