resize initial image to match requested width and height, preserving aspect ratio. Closes #210. Closes #207 (#214)

2024-08-30 20:32:17 +00:00 · 2022-08-30 15:26:02 -04:00 · 2022-08-30 15:26:02 -04:00 · a51e18ea98
commit a51e18ea98
parent 8bf321f6ae
4 changed files with 71 additions and 48 deletions
--- a/TODO.txt
+++ b/TODO.txt
@ -1,35 +0,0 @@
-Feature requests:
-
-
-1. "gobig" mode - split image into strips, scale up, add detail using  - DONE!
-   img2img and reassemble with feathering. Issue #66.
-   See https://github.com/jquesnelle/txt2imghd
-
-2. Port basujindal low VRAM optimizations. Issue #62
-
-3. Store images under folders named after the prompt. Issue #27.
-
-4. Some sort of automation for generating variations. Issues #32 and #47.
-
-5. Support for inpainting masks #68.
-
-6. Support for loading variations of the stable-diffusion
-   weights #49
-
-7. Support for klms and other non-ddim samplers in img2img() #36 - DONE!
-
-8. Pass a shell command to open up an image viewer on the last
-   batch of images generated #29.
-
-9. Change sampler and outdir after initialization #115
-
-Code Refactorization:
-
-1. Move the PNG file generation code out of simplet2i and into - DONE!
-   separate module. txt2img() and img2img() should return Image
-   objects, and parent code is responsible for filenaming logic.
-
-2. Refactor redundant code that is shared between txt2img() and - DONE!
-   img2img().
-
-3. Experiment with replacing CompViz code with HuggingFace. - NOT WORTH IT!
--- a/ldm/dream/image_util.py
+++ b/ldm/dream/image_util.py
@ -0,0 +1,54 @@
+from PIL import Image
+
+class InitImageResizer():
+    """Simple class to create resized copies of an Image while preserving the aspect ratio."""
+    def __init__(self,Image):
+        self.image = Image
+
+    def resize(self,width=None,height=None) -> Image:
+        """
+        Return a copy of the image resized to width x height.
+        The aspect ratio is maintained, with any excess space
+        filled using black borders (i.e. letterboxed). If
+        neither width nor height are provided, then returns
+        a copy of the original image. If one or the other is
+        provided, then the other will be calculated from the
+        aspect ratio.
+
+        Everything is floored to the nearest multiple of 64 so
+        that it can be passed to img2img()
+        """
+        im    = self.image
+
+        if not(width or height):
+            return im.copy()
+
+        ar = im.width/im.height
+
+        # Infer missing values from aspect ratio
+        if not height:          # height missing
+            height = int(width/ar)
+        if not width:          # width missing
+            width  = int(height*ar)
+
+        # rw and rh are the resizing width and height for the image
+        # they maintain the aspect ratio, but may not completelyl fill up
+        # the requested destination size
+        (rw,rh) = (width,int(width/ar)) if im.width>=im.height else (int(height*ar),width)
+
+        #round everything to multiples of 64
+        width,height,rw,rh = map(
+            lambda x: x-x%64, (width,height,rw,rh)
+            )
+
+        # resize the original image so that it fits inside the dest
+        resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
+
+        # create new destination image of specified dimensions
+        # and paste the resized image into it centered appropriately
+        new_image = Image.new('RGB',(width,height))
+        new_image.paste(resized_image,((width-rw)//2,(height-rh)//2))
+
+        return new_image
+
+            
--- a/ldm/dream/readline.py
+++ b/ldm/dream/readline.py
@ -23,7 +23,7 @@ class Completer:
        buffer = readline.get_line_buffer()

        if text.startswith(('-I', '--init_img')):
-            return self._path_completions(text, state, ('.png'))
+            return self._path_completions(text, state, ('.png','.jpg','.jpeg'))

        if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
            return self._path_completions(text, state, ())
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@ -27,6 +27,7 @@ from ldm.models.diffusion.ddim import DDIMSampler
 from ldm.models.diffusion.plms import PLMSSampler
 from ldm.models.diffusion.ksampler import KSampler
 from ldm.dream.pngwriter import PngWriter
+from ldm.dream.image_util import InitImageResizer

 """Simplified text to image API for stable diffusion/latent diffusion

@ -204,7 +205,6 @@ class T2I:
        skip_normalize=False,
        image_callback=None,
        step_callback=None,
-        # these are specific to txt2img
        width=None,
        height=None,
        # these are specific to img2img
@ -270,8 +270,10 @@ class T2I:
        assert (
            0.0 <= strength <= 1.0
        ), 'can only work with strength in [0.0, 1.0]'
-        w = int(width / 64) * 64
-        h = int(height / 64) * 64
+        w, h = map(
+            lambda x: x - x % 64, (width, height)
+        )  # resize to integer multiple of 64
+
        if h != height or w != width:
            print(
                f'Height and width must be multiples of 64. Resizing to {h}x{w}.'
@ -301,6 +303,8 @@ class T2I:
                    ddim_eta=ddim_eta,
                    skip_normalize=skip_normalize,
                    init_img=init_img,
+                    width=width,
+                    height=height,
                    strength=strength,
                    callback=step_callback,
                )
@ -441,6 +445,8 @@ class T2I:
        ddim_eta,
        skip_normalize,
        init_img,
+        width,
+        height,
        strength,
        callback, # Currently not implemented for img2img
    ):
@ -457,7 +463,7 @@ class T2I:
        else:
            sampler = self.sampler

-        init_image = self._load_img(init_img).to(self.device)
+        init_image = self._load_img(init_img,width,height).to(self.device)
        init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
        with precision_scope(self.device.type):
            init_latent = self.model.get_first_stage_encoding(
@ -616,17 +622,15 @@ class T2I:
            model.half()
        return model

-    def _load_img(self, path):
+    def _load_img(self, path, width, height):
        print(f'image path = {path}, cwd = {os.getcwd()}')
        with Image.open(path) as img:
            image = img.convert('RGB')
+        print(f'loaded input image of size {image.width}x{image.height} from {path}')
+
+        image = InitImageResizer(image).resize(width,height)
+        print(f'resized input image to size {image.width}x{image.height}')

-        w, h = image.size
-        print(f'loaded input image of size ({w}, {h}) from {path}')
-        w, h = map(
-            lambda x: x - x % 32, (w, h)
-        )  # resize to integer multiple of 32
-        image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
        image = np.array(image).astype(np.float32) / 255.0
        image = image[None].transpose(0, 3, 1, 2)
        image = torch.from_numpy(image)