resize initial image to match requested width and height, preserving aspect ratio. Closes #210. Closes #207 (#214)

This commit is contained in:
Lincoln Stein 2022-08-30 15:26:02 -04:00 committed by GitHub
parent 8bf321f6ae
commit a51e18ea98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 71 additions and 48 deletions

View File

@ -1,35 +0,0 @@
Feature requests:
1. "gobig" mode - split image into strips, scale up, add detail using - DONE!
img2img and reassemble with feathering. Issue #66.
See https://github.com/jquesnelle/txt2imghd
2. Port basujindal low VRAM optimizations. Issue #62
3. Store images under folders named after the prompt. Issue #27.
4. Some sort of automation for generating variations. Issues #32 and #47.
5. Support for inpainting masks #68.
6. Support for loading variations of the stable-diffusion
weights #49
7. Support for klms and other non-ddim samplers in img2img() #36 - DONE!
8. Pass a shell command to open up an image viewer on the last
batch of images generated #29.
9. Change sampler and outdir after initialization #115
Code Refactorization:
1. Move the PNG file generation code out of simplet2i and into - DONE!
separate module. txt2img() and img2img() should return Image
objects, and parent code is responsible for filenaming logic.
2. Refactor redundant code that is shared between txt2img() and - DONE!
img2img().
3. Experiment with replacing CompViz code with HuggingFace. - NOT WORTH IT!

54
ldm/dream/image_util.py Normal file
View File

@ -0,0 +1,54 @@
from PIL import Image
class InitImageResizer():
"""Simple class to create resized copies of an Image while preserving the aspect ratio."""
def __init__(self,Image):
self.image = Image
def resize(self,width=None,height=None) -> Image:
"""
Return a copy of the image resized to width x height.
The aspect ratio is maintained, with any excess space
filled using black borders (i.e. letterboxed). If
neither width nor height are provided, then returns
a copy of the original image. If one or the other is
provided, then the other will be calculated from the
aspect ratio.
Everything is floored to the nearest multiple of 64 so
that it can be passed to img2img()
"""
im = self.image
if not(width or height):
return im.copy()
ar = im.width/im.height
# Infer missing values from aspect ratio
if not height: # height missing
height = int(width/ar)
if not width: # width missing
width = int(height*ar)
# rw and rh are the resizing width and height for the image
# they maintain the aspect ratio, but may not completelyl fill up
# the requested destination size
(rw,rh) = (width,int(width/ar)) if im.width>=im.height else (int(height*ar),width)
#round everything to multiples of 64
width,height,rw,rh = map(
lambda x: x-x%64, (width,height,rw,rh)
)
# resize the original image so that it fits inside the dest
resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
# create new destination image of specified dimensions
# and paste the resized image into it centered appropriately
new_image = Image.new('RGB',(width,height))
new_image.paste(resized_image,((width-rw)//2,(height-rh)//2))
return new_image

View File

@ -23,7 +23,7 @@ class Completer:
buffer = readline.get_line_buffer() buffer = readline.get_line_buffer()
if text.startswith(('-I', '--init_img')): if text.startswith(('-I', '--init_img')):
return self._path_completions(text, state, ('.png')) return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
if buffer.strip().endswith('cd') or text.startswith(('.', '/')): if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
return self._path_completions(text, state, ()) return self._path_completions(text, state, ())

View File

@ -27,6 +27,7 @@ from ldm.models.diffusion.ddim import DDIMSampler
from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.plms import PLMSSampler
from ldm.models.diffusion.ksampler import KSampler from ldm.models.diffusion.ksampler import KSampler
from ldm.dream.pngwriter import PngWriter from ldm.dream.pngwriter import PngWriter
from ldm.dream.image_util import InitImageResizer
"""Simplified text to image API for stable diffusion/latent diffusion """Simplified text to image API for stable diffusion/latent diffusion
@ -204,7 +205,6 @@ class T2I:
skip_normalize=False, skip_normalize=False,
image_callback=None, image_callback=None,
step_callback=None, step_callback=None,
# these are specific to txt2img
width=None, width=None,
height=None, height=None,
# these are specific to img2img # these are specific to img2img
@ -270,14 +270,16 @@ class T2I:
assert ( assert (
0.0 <= strength <= 1.0 0.0 <= strength <= 1.0
), 'can only work with strength in [0.0, 1.0]' ), 'can only work with strength in [0.0, 1.0]'
w = int(width / 64) * 64 w, h = map(
h = int(height / 64) * 64 lambda x: x - x % 64, (width, height)
) # resize to integer multiple of 64
if h != height or w != width: if h != height or w != width:
print( print(
f'Height and width must be multiples of 64. Resizing to {h}x{w}.' f'Height and width must be multiples of 64. Resizing to {h}x{w}.'
) )
height = h height = h
width = w width = w
scope = autocast if self.precision == 'autocast' else nullcontext scope = autocast if self.precision == 'autocast' else nullcontext
@ -301,6 +303,8 @@ class T2I:
ddim_eta=ddim_eta, ddim_eta=ddim_eta,
skip_normalize=skip_normalize, skip_normalize=skip_normalize,
init_img=init_img, init_img=init_img,
width=width,
height=height,
strength=strength, strength=strength,
callback=step_callback, callback=step_callback,
) )
@ -441,6 +445,8 @@ class T2I:
ddim_eta, ddim_eta,
skip_normalize, skip_normalize,
init_img, init_img,
width,
height,
strength, strength,
callback, # Currently not implemented for img2img callback, # Currently not implemented for img2img
): ):
@ -457,7 +463,7 @@ class T2I:
else: else:
sampler = self.sampler sampler = self.sampler
init_image = self._load_img(init_img).to(self.device) init_image = self._load_img(init_img,width,height).to(self.device)
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size) init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
with precision_scope(self.device.type): with precision_scope(self.device.type):
init_latent = self.model.get_first_stage_encoding( init_latent = self.model.get_first_stage_encoding(
@ -616,17 +622,15 @@ class T2I:
model.half() model.half()
return model return model
def _load_img(self, path): def _load_img(self, path, width, height):
print(f'image path = {path}, cwd = {os.getcwd()}') print(f'image path = {path}, cwd = {os.getcwd()}')
with Image.open(path) as img: with Image.open(path) as img:
image = img.convert('RGB') image = img.convert('RGB')
print(f'loaded input image of size {image.width}x{image.height} from {path}')
image = InitImageResizer(image).resize(width,height)
print(f'resized input image to size {image.width}x{image.height}')
w, h = image.size
print(f'loaded input image of size ({w}, {h}) from {path}')
w, h = map(
lambda x: x - x % 32, (w, h)
) # resize to integer multiple of 32
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
image = np.array(image).astype(np.float32) / 255.0 image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2) image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image) image = torch.from_numpy(image)