From 59caff7ff09d1d78e8fe7786d1ca9f4f0c10ab71 Mon Sep 17 00:00:00 2001
From: Kevin Turner <83819+keturn@users.noreply.github.com>
Date: Sat, 5 Aug 2023 21:43:55 -0700
Subject: [PATCH] =?UTF-8?q?refactor(diffusers=5Fpipeline):=20remove=20unus?=
 =?UTF-8?q?ed=20img2img=20wrappers=20=F0=9F=9A=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

invokeai.app no longer needs this as a single method, as it builds on latents2latents instead.
---
 invokeai/backend/generator/img2img.py         | 63 +---------------
 invokeai/backend/generator/inpaint.py         |  8 ++
 .../stable_diffusion/diffusers_pipeline.py    | 73 -------------------
 3 files changed, 9 insertions(+), 135 deletions(-)

diff --git a/invokeai/backend/generator/img2img.py b/invokeai/backend/generator/img2img.py
index 5490b2325c..8aaaff5deb 100644
--- a/invokeai/backend/generator/img2img.py
+++ b/invokeai/backend/generator/img2img.py
@@ -1,25 +1,11 @@
 """
 invokeai.backend.generator.img2img descends from .generator
 """
-from typing import Optional
 
-import torch
-from accelerate.utils import set_seed
-from diffusers import logging
-
-from ..stable_diffusion import (
-    ConditioningData,
-    PostprocessingSettings,
-    StableDiffusionGeneratorPipeline,
-)
 from .base import Generator
 
 
 class Img2Img(Generator):
-    def __init__(self, model, precision):
-        super().__init__(model, precision)
-        self.init_latent = None  # by get_noise()
-
     def get_make_image(
         self,
         sampler,
@@ -42,51 +28,4 @@ class Img2Img(Generator):
         Returns a function returning an image derived from the prompt and the initial image
         Return value depends on the seed at the time you call it.
         """
-        self.perlin = perlin
-
-        # noinspection PyTypeChecker
-        pipeline: StableDiffusionGeneratorPipeline = self.model
-        pipeline.scheduler = sampler
-
-        uc, c, extra_conditioning_info = conditioning
-        conditioning_data = ConditioningData(
-            uc,
-            c,
-            cfg_scale,
-            extra_conditioning_info,
-            postprocessing_settings=PostprocessingSettings(
-                threshold=threshold,
-                warmup=warmup,
-                h_symmetry_time_pct=h_symmetry_time_pct,
-                v_symmetry_time_pct=v_symmetry_time_pct,
-            ),
-        ).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)
-
-        def make_image(x_T: torch.Tensor, seed: int):
-            # FIXME: use x_T for initial seeded noise
-            # We're not at the moment because the pipeline automatically resizes init_image if
-            # necessary, which the x_T input might not match.
-            # In the meantime, reset the seed prior to generating pipeline output so we at least get the same result.
-            logging.set_verbosity_error()  # quench safety check warnings
-            pipeline_output = pipeline.img2img_from_embeddings(
-                init_image,
-                strength,
-                steps,
-                conditioning_data,
-                noise_func=self.get_noise_like,
-                callback=step_callback,
-                seed=seed,
-            )
-            if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None:
-                attention_maps_callback(pipeline_output.attention_map_saver)
-            return pipeline.numpy_to_pil(pipeline_output.images)[0]
-
-        return make_image
-
-    def get_noise_like(self, like: torch.Tensor):
-        device = like.device
-        x = torch.randn_like(like, device=device)
-        if self.perlin > 0.0:
-            shape = like.shape
-            x = (1 - self.perlin) * x + self.perlin * self.get_perlin_noise(shape[3], shape[2])
-        return x
+        raise NotImplementedError("replaced by invokeai.app.invocations.latent.LatentsToLatentsInvocation")
diff --git a/invokeai/backend/generator/inpaint.py b/invokeai/backend/generator/inpaint.py
index 7aeb3d4809..494f213d11 100644
--- a/invokeai/backend/generator/inpaint.py
+++ b/invokeai/backend/generator/inpaint.py
@@ -377,3 +377,11 @@ class Inpaint(Img2Img):
         )
 
         return corrected_result
+
+    def get_noise_like(self, like: torch.Tensor):
+        device = like.device
+        x = torch.randn_like(like, device=device)
+        if self.perlin > 0.0:
+            shape = like.shape
+            x = (1 - self.perlin) * x + self.perlin * self.get_perlin_noise(shape[3], shape[2])
+        return x
diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
index 6891c726dc..c2c8165d02 100644
--- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py
+++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py
@@ -622,79 +622,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
             **kwargs,
         ).sample
 
-    def img2img_from_embeddings(
-        self,
-        init_image: Union[torch.FloatTensor, PIL.Image.Image],
-        strength: float,
-        num_inference_steps: int,
-        conditioning_data: ConditioningData,
-        *,
-        callback: Callable[[PipelineIntermediateState], None] = None,
-        run_id=None,
-        noise_func=None,
-        seed=None,
-    ) -> InvokeAIStableDiffusionPipelineOutput:
-        if isinstance(init_image, PIL.Image.Image):
-            init_image = image_resized_to_grid_as_tensor(init_image.convert("RGB"))
-
-        if init_image.dim() == 3:
-            init_image = einops.rearrange(init_image, "c h w -> 1 c h w")
-
-        # 6. Prepare latent variables
-        initial_latents = self.non_noised_latents_from_image(
-            init_image,
-            device=self.unet.device,
-            dtype=self.unet.dtype,
-        )
-        if seed is not None:
-            set_seed(seed)
-        noise = noise_func(initial_latents)
-
-        return self.img2img_from_latents_and_embeddings(
-            initial_latents,
-            num_inference_steps,
-            conditioning_data,
-            strength,
-            noise,
-            run_id,
-            callback,
-        )
-
-    def img2img_from_latents_and_embeddings(
-        self,
-        initial_latents,
-        num_inference_steps,
-        conditioning_data: ConditioningData,
-        strength,
-        noise: torch.Tensor,
-        run_id=None,
-        callback=None,
-    ) -> InvokeAIStableDiffusionPipelineOutput:
-        timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength)
-        result_latents, result_attention_maps = self.latents_from_embeddings(
-            latents=initial_latents
-            if strength < 1.0
-            else torch.zeros_like(initial_latents, device=initial_latents.device, dtype=initial_latents.dtype),
-            num_inference_steps=num_inference_steps,
-            conditioning_data=conditioning_data,
-            timesteps=timesteps,
-            noise=noise,
-            run_id=run_id,
-            callback=callback,
-        )
-
-        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
-        torch.cuda.empty_cache()
-
-        with torch.inference_mode():
-            image = self.decode_latents(result_latents)
-            output = InvokeAIStableDiffusionPipelineOutput(
-                images=image,
-                nsfw_content_detected=[],
-                attention_map_saver=result_attention_maps,
-            )
-            return output
-
     def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int):
         img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components)
         assert img2img_pipeline.scheduler is self.scheduler