mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
resize initial image to match requested width and height, preserving aspect ratio. Closes #210. Closes #207 (#214)
This commit is contained in:
parent
8bf321f6ae
commit
a51e18ea98
35
TODO.txt
35
TODO.txt
@ -1,35 +0,0 @@
|
||||
Feature requests:
|
||||
|
||||
|
||||
1. "gobig" mode - split image into strips, scale up, add detail using - DONE!
|
||||
img2img and reassemble with feathering. Issue #66.
|
||||
See https://github.com/jquesnelle/txt2imghd
|
||||
|
||||
2. Port basujindal low VRAM optimizations. Issue #62
|
||||
|
||||
3. Store images under folders named after the prompt. Issue #27.
|
||||
|
||||
4. Some sort of automation for generating variations. Issues #32 and #47.
|
||||
|
||||
5. Support for inpainting masks #68.
|
||||
|
||||
6. Support for loading variations of the stable-diffusion
|
||||
weights #49
|
||||
|
||||
7. Support for klms and other non-ddim samplers in img2img() #36 - DONE!
|
||||
|
||||
8. Pass a shell command to open up an image viewer on the last
|
||||
batch of images generated #29.
|
||||
|
||||
9. Change sampler and outdir after initialization #115
|
||||
|
||||
Code Refactorization:
|
||||
|
||||
1. Move the PNG file generation code out of simplet2i and into - DONE!
|
||||
separate module. txt2img() and img2img() should return Image
|
||||
objects, and parent code is responsible for filenaming logic.
|
||||
|
||||
2. Refactor redundant code that is shared between txt2img() and - DONE!
|
||||
img2img().
|
||||
|
||||
3. Experiment with replacing CompViz code with HuggingFace. - NOT WORTH IT!
|
54
ldm/dream/image_util.py
Normal file
54
ldm/dream/image_util.py
Normal file
@ -0,0 +1,54 @@
|
||||
from PIL import Image
|
||||
|
||||
class InitImageResizer():
|
||||
"""Simple class to create resized copies of an Image while preserving the aspect ratio."""
|
||||
def __init__(self,Image):
|
||||
self.image = Image
|
||||
|
||||
def resize(self,width=None,height=None) -> Image:
|
||||
"""
|
||||
Return a copy of the image resized to width x height.
|
||||
The aspect ratio is maintained, with any excess space
|
||||
filled using black borders (i.e. letterboxed). If
|
||||
neither width nor height are provided, then returns
|
||||
a copy of the original image. If one or the other is
|
||||
provided, then the other will be calculated from the
|
||||
aspect ratio.
|
||||
|
||||
Everything is floored to the nearest multiple of 64 so
|
||||
that it can be passed to img2img()
|
||||
"""
|
||||
im = self.image
|
||||
|
||||
if not(width or height):
|
||||
return im.copy()
|
||||
|
||||
ar = im.width/im.height
|
||||
|
||||
# Infer missing values from aspect ratio
|
||||
if not height: # height missing
|
||||
height = int(width/ar)
|
||||
if not width: # width missing
|
||||
width = int(height*ar)
|
||||
|
||||
# rw and rh are the resizing width and height for the image
|
||||
# they maintain the aspect ratio, but may not completelyl fill up
|
||||
# the requested destination size
|
||||
(rw,rh) = (width,int(width/ar)) if im.width>=im.height else (int(height*ar),width)
|
||||
|
||||
#round everything to multiples of 64
|
||||
width,height,rw,rh = map(
|
||||
lambda x: x-x%64, (width,height,rw,rh)
|
||||
)
|
||||
|
||||
# resize the original image so that it fits inside the dest
|
||||
resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
|
||||
|
||||
# create new destination image of specified dimensions
|
||||
# and paste the resized image into it centered appropriately
|
||||
new_image = Image.new('RGB',(width,height))
|
||||
new_image.paste(resized_image,((width-rw)//2,(height-rh)//2))
|
||||
|
||||
return new_image
|
||||
|
||||
|
@ -23,7 +23,7 @@ class Completer:
|
||||
buffer = readline.get_line_buffer()
|
||||
|
||||
if text.startswith(('-I', '--init_img')):
|
||||
return self._path_completions(text, state, ('.png'))
|
||||
return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
|
||||
|
||||
if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
|
||||
return self._path_completions(text, state, ())
|
||||
|
@ -27,6 +27,7 @@ from ldm.models.diffusion.ddim import DDIMSampler
|
||||
from ldm.models.diffusion.plms import PLMSSampler
|
||||
from ldm.models.diffusion.ksampler import KSampler
|
||||
from ldm.dream.pngwriter import PngWriter
|
||||
from ldm.dream.image_util import InitImageResizer
|
||||
|
||||
"""Simplified text to image API for stable diffusion/latent diffusion
|
||||
|
||||
@ -204,7 +205,6 @@ class T2I:
|
||||
skip_normalize=False,
|
||||
image_callback=None,
|
||||
step_callback=None,
|
||||
# these are specific to txt2img
|
||||
width=None,
|
||||
height=None,
|
||||
# these are specific to img2img
|
||||
@ -270,8 +270,10 @@ class T2I:
|
||||
assert (
|
||||
0.0 <= strength <= 1.0
|
||||
), 'can only work with strength in [0.0, 1.0]'
|
||||
w = int(width / 64) * 64
|
||||
h = int(height / 64) * 64
|
||||
w, h = map(
|
||||
lambda x: x - x % 64, (width, height)
|
||||
) # resize to integer multiple of 64
|
||||
|
||||
if h != height or w != width:
|
||||
print(
|
||||
f'Height and width must be multiples of 64. Resizing to {h}x{w}.'
|
||||
@ -301,6 +303,8 @@ class T2I:
|
||||
ddim_eta=ddim_eta,
|
||||
skip_normalize=skip_normalize,
|
||||
init_img=init_img,
|
||||
width=width,
|
||||
height=height,
|
||||
strength=strength,
|
||||
callback=step_callback,
|
||||
)
|
||||
@ -441,6 +445,8 @@ class T2I:
|
||||
ddim_eta,
|
||||
skip_normalize,
|
||||
init_img,
|
||||
width,
|
||||
height,
|
||||
strength,
|
||||
callback, # Currently not implemented for img2img
|
||||
):
|
||||
@ -457,7 +463,7 @@ class T2I:
|
||||
else:
|
||||
sampler = self.sampler
|
||||
|
||||
init_image = self._load_img(init_img).to(self.device)
|
||||
init_image = self._load_img(init_img,width,height).to(self.device)
|
||||
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
|
||||
with precision_scope(self.device.type):
|
||||
init_latent = self.model.get_first_stage_encoding(
|
||||
@ -616,17 +622,15 @@ class T2I:
|
||||
model.half()
|
||||
return model
|
||||
|
||||
def _load_img(self, path):
|
||||
def _load_img(self, path, width, height):
|
||||
print(f'image path = {path}, cwd = {os.getcwd()}')
|
||||
with Image.open(path) as img:
|
||||
image = img.convert('RGB')
|
||||
print(f'loaded input image of size {image.width}x{image.height} from {path}')
|
||||
|
||||
image = InitImageResizer(image).resize(width,height)
|
||||
print(f'resized input image to size {image.width}x{image.height}')
|
||||
|
||||
w, h = image.size
|
||||
print(f'loaded input image of size ({w}, {h}) from {path}')
|
||||
w, h = map(
|
||||
lambda x: x - x % 32, (w, h)
|
||||
) # resize to integer multiple of 32
|
||||
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
|
||||
image = np.array(image).astype(np.float32) / 255.0
|
||||
image = image[None].transpose(0, 3, 1, 2)
|
||||
image = torch.from_numpy(image)
|
||||
|
Loading…
Reference in New Issue
Block a user