From a51e18ea982368fc135b62af0407e04e9f7b2b52 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lincoln.stein@gmail.com>
Date: Tue, 30 Aug 2022 15:26:02 -0400
Subject: [PATCH] resize initial image to match requested width and height,
 preserving aspect ratio. Closes #210. Closes #207 (#214)

---
 TODO.txt                | 35 --------------------------
 ldm/dream/image_util.py | 54 +++++++++++++++++++++++++++++++++++++++++
 ldm/dream/readline.py   |  2 +-
 ldm/simplet2i.py        | 28 ++++++++++++---------
 4 files changed, 71 insertions(+), 48 deletions(-)
 delete mode 100644 TODO.txt
 create mode 100644 ldm/dream/image_util.py

diff --git a/TODO.txt b/TODO.txt
deleted file mode 100644
index 40833deb64..0000000000
--- a/TODO.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Feature requests:
-
-
-1. "gobig" mode - split image into strips, scale up, add detail using  - DONE!
-   img2img and reassemble with feathering. Issue #66.
-   See https://github.com/jquesnelle/txt2imghd
-
-2. Port basujindal low VRAM optimizations. Issue #62
-
-3. Store images under folders named after the prompt. Issue #27.
-
-4. Some sort of automation for generating variations. Issues #32 and #47.
-
-5. Support for inpainting masks #68.
-
-6. Support for loading variations of the stable-diffusion
-   weights #49
-
-7. Support for klms and other non-ddim samplers in img2img() #36 - DONE!
-
-8. Pass a shell command to open up an image viewer on the last
-   batch of images generated #29.
-
-9. Change sampler and outdir after initialization #115
-
-Code Refactorization:
-
-1. Move the PNG file generation code out of simplet2i and into - DONE!
-   separate module. txt2img() and img2img() should return Image
-   objects, and parent code is responsible for filenaming logic.
-
-2. Refactor redundant code that is shared between txt2img() and - DONE!
-   img2img().
-
-3. Experiment with replacing CompViz code with HuggingFace. - NOT WORTH IT!
diff --git a/ldm/dream/image_util.py b/ldm/dream/image_util.py
new file mode 100644
index 0000000000..fa14ec897b
--- /dev/null
+++ b/ldm/dream/image_util.py
@@ -0,0 +1,54 @@
+from PIL import Image
+
+class InitImageResizer():
+    """Simple class to create resized copies of an Image while preserving the aspect ratio."""
+    def __init__(self,Image):
+        self.image = Image
+
+    def resize(self,width=None,height=None) -> Image:
+        """
+        Return a copy of the image resized to width x height.
+        The aspect ratio is maintained, with any excess space
+        filled using black borders (i.e. letterboxed). If
+        neither width nor height are provided, then returns
+        a copy of the original image. If one or the other is
+        provided, then the other will be calculated from the
+        aspect ratio.
+
+        Everything is floored to the nearest multiple of 64 so
+        that it can be passed to img2img()
+        """
+        im    = self.image
+
+        if not(width or height):
+            return im.copy()
+
+        ar = im.width/im.height
+
+        # Infer missing values from aspect ratio
+        if not height:          # height missing
+            height = int(width/ar)
+        if not width:          # width missing
+            width  = int(height*ar)
+
+        # rw and rh are the resizing width and height for the image
+        # they maintain the aspect ratio, but may not completelyl fill up
+        # the requested destination size
+        (rw,rh) = (width,int(width/ar)) if im.width>=im.height else (int(height*ar),width)
+
+        #round everything to multiples of 64
+        width,height,rw,rh = map(
+            lambda x: x-x%64, (width,height,rw,rh)
+            )
+
+        # resize the original image so that it fits inside the dest
+        resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
+
+        # create new destination image of specified dimensions
+        # and paste the resized image into it centered appropriately
+        new_image = Image.new('RGB',(width,height))
+        new_image.paste(resized_image,((width-rw)//2,(height-rh)//2))
+
+        return new_image
+
+            
diff --git a/ldm/dream/readline.py b/ldm/dream/readline.py
index 5cf99523fc..6c6a390c42 100644
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@@ -23,7 +23,7 @@ class Completer:
         buffer = readline.get_line_buffer()
 
         if text.startswith(('-I', '--init_img')):
-            return self._path_completions(text, state, ('.png'))
+            return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
 
         if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
             return self._path_completions(text, state, ())
diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py
index 710952d299..7e44246f6b 100644
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@@ -27,6 +27,7 @@ from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.models.diffusion.plms import PLMSSampler
 from ldm.models.diffusion.ksampler import KSampler
 from ldm.dream.pngwriter import PngWriter
+from ldm.dream.image_util import InitImageResizer
 
 """Simplified text to image API for stable diffusion/latent diffusion
 
@@ -204,7 +205,6 @@ class T2I:
         skip_normalize=False,
         image_callback=None,
         step_callback=None,
-        # these are specific to txt2img
         width=None,
         height=None,
         # these are specific to img2img
@@ -270,14 +270,16 @@ class T2I:
         assert (
             0.0 <= strength <= 1.0
         ), 'can only work with strength in [0.0, 1.0]'
-        w = int(width / 64) * 64
-        h = int(height / 64) * 64
+        w, h = map(
+            lambda x: x - x % 64, (width, height)
+        )  # resize to integer multiple of 64
+
         if h != height or w != width:
             print(
                 f'Height and width must be multiples of 64. Resizing to {h}x{w}.'
             )
             height = h
-            width = w
+            width  = w
 
         scope = autocast if self.precision == 'autocast' else nullcontext
 
@@ -301,6 +303,8 @@ class T2I:
                     ddim_eta=ddim_eta,
                     skip_normalize=skip_normalize,
                     init_img=init_img,
+                    width=width,
+                    height=height,
                     strength=strength,
                     callback=step_callback,
                 )
@@ -441,6 +445,8 @@ class T2I:
         ddim_eta,
         skip_normalize,
         init_img,
+        width,
+        height,
         strength,
         callback, # Currently not implemented for img2img
     ):
@@ -457,7 +463,7 @@ class T2I:
         else:
             sampler = self.sampler
 
-        init_image = self._load_img(init_img).to(self.device)
+        init_image = self._load_img(init_img,width,height).to(self.device)
         init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
         with precision_scope(self.device.type):
             init_latent = self.model.get_first_stage_encoding(
@@ -616,17 +622,15 @@ class T2I:
             model.half()
         return model
 
-    def _load_img(self, path):
+    def _load_img(self, path, width, height):
         print(f'image path = {path}, cwd = {os.getcwd()}')
         with Image.open(path) as img:
             image = img.convert('RGB')
+        print(f'loaded input image of size {image.width}x{image.height} from {path}')
+
+        image = InitImageResizer(image).resize(width,height)
+        print(f'resized input image to size {image.width}x{image.height}')
 
-        w, h = image.size
-        print(f'loaded input image of size ({w}, {h}) from {path}')
-        w, h = map(
-            lambda x: x - x % 32, (w, h)
-        )  # resize to integer multiple of 32
-        image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
         image = np.array(image).astype(np.float32) / 255.0
         image = image[None].transpose(0, 3, 1, 2)
         image = torch.from_numpy(image)