mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
resize initial image to match requested width and height, preserving aspect ratio. Closes #210. Closes #207 (#214)
This commit is contained in:
parent
8bf321f6ae
commit
a51e18ea98
35
TODO.txt
35
TODO.txt
@ -1,35 +0,0 @@
|
|||||||
Feature requests:
|
|
||||||
|
|
||||||
|
|
||||||
1. "gobig" mode - split image into strips, scale up, add detail using - DONE!
|
|
||||||
img2img and reassemble with feathering. Issue #66.
|
|
||||||
See https://github.com/jquesnelle/txt2imghd
|
|
||||||
|
|
||||||
2. Port basujindal low VRAM optimizations. Issue #62
|
|
||||||
|
|
||||||
3. Store images under folders named after the prompt. Issue #27.
|
|
||||||
|
|
||||||
4. Some sort of automation for generating variations. Issues #32 and #47.
|
|
||||||
|
|
||||||
5. Support for inpainting masks #68.
|
|
||||||
|
|
||||||
6. Support for loading variations of the stable-diffusion
|
|
||||||
weights #49
|
|
||||||
|
|
||||||
7. Support for klms and other non-ddim samplers in img2img() #36 - DONE!
|
|
||||||
|
|
||||||
8. Pass a shell command to open up an image viewer on the last
|
|
||||||
batch of images generated #29.
|
|
||||||
|
|
||||||
9. Change sampler and outdir after initialization #115
|
|
||||||
|
|
||||||
Code Refactorization:
|
|
||||||
|
|
||||||
1. Move the PNG file generation code out of simplet2i and into - DONE!
|
|
||||||
separate module. txt2img() and img2img() should return Image
|
|
||||||
objects, and parent code is responsible for filenaming logic.
|
|
||||||
|
|
||||||
2. Refactor redundant code that is shared between txt2img() and - DONE!
|
|
||||||
img2img().
|
|
||||||
|
|
||||||
3. Experiment with replacing CompViz code with HuggingFace. - NOT WORTH IT!
|
|
54
ldm/dream/image_util.py
Normal file
54
ldm/dream/image_util.py
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
class InitImageResizer():
|
||||||
|
"""Simple class to create resized copies of an Image while preserving the aspect ratio."""
|
||||||
|
def __init__(self,Image):
|
||||||
|
self.image = Image
|
||||||
|
|
||||||
|
def resize(self,width=None,height=None) -> Image:
|
||||||
|
"""
|
||||||
|
Return a copy of the image resized to width x height.
|
||||||
|
The aspect ratio is maintained, with any excess space
|
||||||
|
filled using black borders (i.e. letterboxed). If
|
||||||
|
neither width nor height are provided, then returns
|
||||||
|
a copy of the original image. If one or the other is
|
||||||
|
provided, then the other will be calculated from the
|
||||||
|
aspect ratio.
|
||||||
|
|
||||||
|
Everything is floored to the nearest multiple of 64 so
|
||||||
|
that it can be passed to img2img()
|
||||||
|
"""
|
||||||
|
im = self.image
|
||||||
|
|
||||||
|
if not(width or height):
|
||||||
|
return im.copy()
|
||||||
|
|
||||||
|
ar = im.width/im.height
|
||||||
|
|
||||||
|
# Infer missing values from aspect ratio
|
||||||
|
if not height: # height missing
|
||||||
|
height = int(width/ar)
|
||||||
|
if not width: # width missing
|
||||||
|
width = int(height*ar)
|
||||||
|
|
||||||
|
# rw and rh are the resizing width and height for the image
|
||||||
|
# they maintain the aspect ratio, but may not completelyl fill up
|
||||||
|
# the requested destination size
|
||||||
|
(rw,rh) = (width,int(width/ar)) if im.width>=im.height else (int(height*ar),width)
|
||||||
|
|
||||||
|
#round everything to multiples of 64
|
||||||
|
width,height,rw,rh = map(
|
||||||
|
lambda x: x-x%64, (width,height,rw,rh)
|
||||||
|
)
|
||||||
|
|
||||||
|
# resize the original image so that it fits inside the dest
|
||||||
|
resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS)
|
||||||
|
|
||||||
|
# create new destination image of specified dimensions
|
||||||
|
# and paste the resized image into it centered appropriately
|
||||||
|
new_image = Image.new('RGB',(width,height))
|
||||||
|
new_image.paste(resized_image,((width-rw)//2,(height-rh)//2))
|
||||||
|
|
||||||
|
return new_image
|
||||||
|
|
||||||
|
|
@ -23,7 +23,7 @@ class Completer:
|
|||||||
buffer = readline.get_line_buffer()
|
buffer = readline.get_line_buffer()
|
||||||
|
|
||||||
if text.startswith(('-I', '--init_img')):
|
if text.startswith(('-I', '--init_img')):
|
||||||
return self._path_completions(text, state, ('.png'))
|
return self._path_completions(text, state, ('.png','.jpg','.jpeg'))
|
||||||
|
|
||||||
if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
|
if buffer.strip().endswith('cd') or text.startswith(('.', '/')):
|
||||||
return self._path_completions(text, state, ())
|
return self._path_completions(text, state, ())
|
||||||
|
@ -27,6 +27,7 @@ from ldm.models.diffusion.ddim import DDIMSampler
|
|||||||
from ldm.models.diffusion.plms import PLMSSampler
|
from ldm.models.diffusion.plms import PLMSSampler
|
||||||
from ldm.models.diffusion.ksampler import KSampler
|
from ldm.models.diffusion.ksampler import KSampler
|
||||||
from ldm.dream.pngwriter import PngWriter
|
from ldm.dream.pngwriter import PngWriter
|
||||||
|
from ldm.dream.image_util import InitImageResizer
|
||||||
|
|
||||||
"""Simplified text to image API for stable diffusion/latent diffusion
|
"""Simplified text to image API for stable diffusion/latent diffusion
|
||||||
|
|
||||||
@ -204,7 +205,6 @@ class T2I:
|
|||||||
skip_normalize=False,
|
skip_normalize=False,
|
||||||
image_callback=None,
|
image_callback=None,
|
||||||
step_callback=None,
|
step_callback=None,
|
||||||
# these are specific to txt2img
|
|
||||||
width=None,
|
width=None,
|
||||||
height=None,
|
height=None,
|
||||||
# these are specific to img2img
|
# these are specific to img2img
|
||||||
@ -270,14 +270,16 @@ class T2I:
|
|||||||
assert (
|
assert (
|
||||||
0.0 <= strength <= 1.0
|
0.0 <= strength <= 1.0
|
||||||
), 'can only work with strength in [0.0, 1.0]'
|
), 'can only work with strength in [0.0, 1.0]'
|
||||||
w = int(width / 64) * 64
|
w, h = map(
|
||||||
h = int(height / 64) * 64
|
lambda x: x - x % 64, (width, height)
|
||||||
|
) # resize to integer multiple of 64
|
||||||
|
|
||||||
if h != height or w != width:
|
if h != height or w != width:
|
||||||
print(
|
print(
|
||||||
f'Height and width must be multiples of 64. Resizing to {h}x{w}.'
|
f'Height and width must be multiples of 64. Resizing to {h}x{w}.'
|
||||||
)
|
)
|
||||||
height = h
|
height = h
|
||||||
width = w
|
width = w
|
||||||
|
|
||||||
scope = autocast if self.precision == 'autocast' else nullcontext
|
scope = autocast if self.precision == 'autocast' else nullcontext
|
||||||
|
|
||||||
@ -301,6 +303,8 @@ class T2I:
|
|||||||
ddim_eta=ddim_eta,
|
ddim_eta=ddim_eta,
|
||||||
skip_normalize=skip_normalize,
|
skip_normalize=skip_normalize,
|
||||||
init_img=init_img,
|
init_img=init_img,
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
strength=strength,
|
strength=strength,
|
||||||
callback=step_callback,
|
callback=step_callback,
|
||||||
)
|
)
|
||||||
@ -441,6 +445,8 @@ class T2I:
|
|||||||
ddim_eta,
|
ddim_eta,
|
||||||
skip_normalize,
|
skip_normalize,
|
||||||
init_img,
|
init_img,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
strength,
|
strength,
|
||||||
callback, # Currently not implemented for img2img
|
callback, # Currently not implemented for img2img
|
||||||
):
|
):
|
||||||
@ -457,7 +463,7 @@ class T2I:
|
|||||||
else:
|
else:
|
||||||
sampler = self.sampler
|
sampler = self.sampler
|
||||||
|
|
||||||
init_image = self._load_img(init_img).to(self.device)
|
init_image = self._load_img(init_img,width,height).to(self.device)
|
||||||
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
|
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
|
||||||
with precision_scope(self.device.type):
|
with precision_scope(self.device.type):
|
||||||
init_latent = self.model.get_first_stage_encoding(
|
init_latent = self.model.get_first_stage_encoding(
|
||||||
@ -616,17 +622,15 @@ class T2I:
|
|||||||
model.half()
|
model.half()
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def _load_img(self, path):
|
def _load_img(self, path, width, height):
|
||||||
print(f'image path = {path}, cwd = {os.getcwd()}')
|
print(f'image path = {path}, cwd = {os.getcwd()}')
|
||||||
with Image.open(path) as img:
|
with Image.open(path) as img:
|
||||||
image = img.convert('RGB')
|
image = img.convert('RGB')
|
||||||
|
print(f'loaded input image of size {image.width}x{image.height} from {path}')
|
||||||
|
|
||||||
|
image = InitImageResizer(image).resize(width,height)
|
||||||
|
print(f'resized input image to size {image.width}x{image.height}')
|
||||||
|
|
||||||
w, h = image.size
|
|
||||||
print(f'loaded input image of size ({w}, {h}) from {path}')
|
|
||||||
w, h = map(
|
|
||||||
lambda x: x - x % 32, (w, h)
|
|
||||||
) # resize to integer multiple of 32
|
|
||||||
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
|
|
||||||
image = np.array(image).astype(np.float32) / 255.0
|
image = np.array(image).astype(np.float32) / 255.0
|
||||||
image = image[None].transpose(0, 3, 1, 2)
|
image = image[None].transpose(0, 3, 1, 2)
|
||||||
image = torch.from_numpy(image)
|
image = torch.from_numpy(image)
|
||||||
|
Loading…
Reference in New Issue
Block a user