diff --git a/TODO.txt b/TODO.txt deleted file mode 100644 index 40833deb64..0000000000 --- a/TODO.txt +++ /dev/null @@ -1,35 +0,0 @@ -Feature requests: - - -1. "gobig" mode - split image into strips, scale up, add detail using - DONE! - img2img and reassemble with feathering. Issue #66. - See https://github.com/jquesnelle/txt2imghd - -2. Port basujindal low VRAM optimizations. Issue #62 - -3. Store images under folders named after the prompt. Issue #27. - -4. Some sort of automation for generating variations. Issues #32 and #47. - -5. Support for inpainting masks #68. - -6. Support for loading variations of the stable-diffusion - weights #49 - -7. Support for klms and other non-ddim samplers in img2img() #36 - DONE! - -8. Pass a shell command to open up an image viewer on the last - batch of images generated #29. - -9. Change sampler and outdir after initialization #115 - -Code Refactorization: - -1. Move the PNG file generation code out of simplet2i and into - DONE! - separate module. txt2img() and img2img() should return Image - objects, and parent code is responsible for filenaming logic. - -2. Refactor redundant code that is shared between txt2img() and - DONE! - img2img(). - -3. Experiment with replacing CompViz code with HuggingFace. - NOT WORTH IT! diff --git a/ldm/dream/image_util.py b/ldm/dream/image_util.py new file mode 100644 index 0000000000..fa14ec897b --- /dev/null +++ b/ldm/dream/image_util.py @@ -0,0 +1,54 @@ +from PIL import Image + +class InitImageResizer(): + """Simple class to create resized copies of an Image while preserving the aspect ratio.""" + def __init__(self,Image): + self.image = Image + + def resize(self,width=None,height=None) -> Image: + """ + Return a copy of the image resized to width x height. + The aspect ratio is maintained, with any excess space + filled using black borders (i.e. letterboxed). If + neither width nor height are provided, then returns + a copy of the original image. If one or the other is + provided, then the other will be calculated from the + aspect ratio. + + Everything is floored to the nearest multiple of 64 so + that it can be passed to img2img() + """ + im = self.image + + if not(width or height): + return im.copy() + + ar = im.width/im.height + + # Infer missing values from aspect ratio + if not height: # height missing + height = int(width/ar) + if not width: # width missing + width = int(height*ar) + + # rw and rh are the resizing width and height for the image + # they maintain the aspect ratio, but may not completelyl fill up + # the requested destination size + (rw,rh) = (width,int(width/ar)) if im.width>=im.height else (int(height*ar),width) + + #round everything to multiples of 64 + width,height,rw,rh = map( + lambda x: x-x%64, (width,height,rw,rh) + ) + + # resize the original image so that it fits inside the dest + resized_image = self.image.resize((rw,rh),resample=Image.Resampling.LANCZOS) + + # create new destination image of specified dimensions + # and paste the resized image into it centered appropriately + new_image = Image.new('RGB',(width,height)) + new_image.paste(resized_image,((width-rw)//2,(height-rh)//2)) + + return new_image + + diff --git a/ldm/dream/readline.py b/ldm/dream/readline.py index 5cf99523fc..6c6a390c42 100644 --- a/ldm/dream/readline.py +++ b/ldm/dream/readline.py @@ -23,7 +23,7 @@ class Completer: buffer = readline.get_line_buffer() if text.startswith(('-I', '--init_img')): - return self._path_completions(text, state, ('.png')) + return self._path_completions(text, state, ('.png','.jpg','.jpeg')) if buffer.strip().endswith('cd') or text.startswith(('.', '/')): return self._path_completions(text, state, ()) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 710952d299..7e44246f6b 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -27,6 +27,7 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler from ldm.dream.pngwriter import PngWriter +from ldm.dream.image_util import InitImageResizer """Simplified text to image API for stable diffusion/latent diffusion @@ -204,7 +205,6 @@ class T2I: skip_normalize=False, image_callback=None, step_callback=None, - # these are specific to txt2img width=None, height=None, # these are specific to img2img @@ -270,14 +270,16 @@ class T2I: assert ( 0.0 <= strength <= 1.0 ), 'can only work with strength in [0.0, 1.0]' - w = int(width / 64) * 64 - h = int(height / 64) * 64 + w, h = map( + lambda x: x - x % 64, (width, height) + ) # resize to integer multiple of 64 + if h != height or w != width: print( f'Height and width must be multiples of 64. Resizing to {h}x{w}.' ) height = h - width = w + width = w scope = autocast if self.precision == 'autocast' else nullcontext @@ -301,6 +303,8 @@ class T2I: ddim_eta=ddim_eta, skip_normalize=skip_normalize, init_img=init_img, + width=width, + height=height, strength=strength, callback=step_callback, ) @@ -441,6 +445,8 @@ class T2I: ddim_eta, skip_normalize, init_img, + width, + height, strength, callback, # Currently not implemented for img2img ): @@ -457,7 +463,7 @@ class T2I: else: sampler = self.sampler - init_image = self._load_img(init_img).to(self.device) + init_image = self._load_img(init_img,width,height).to(self.device) init_image = repeat(init_image, '1 ... -> b ...', b=batch_size) with precision_scope(self.device.type): init_latent = self.model.get_first_stage_encoding( @@ -616,17 +622,15 @@ class T2I: model.half() return model - def _load_img(self, path): + def _load_img(self, path, width, height): print(f'image path = {path}, cwd = {os.getcwd()}') with Image.open(path) as img: image = img.convert('RGB') + print(f'loaded input image of size {image.width}x{image.height} from {path}') + + image = InitImageResizer(image).resize(width,height) + print(f'resized input image to size {image.width}x{image.height}') - w, h = image.size - print(f'loaded input image of size ({w}, {h}) from {path}') - w, h = map( - lambda x: x - x % 32, (w, h) - ) # resize to integer multiple of 32 - image = image.resize((w, h), resample=Image.Resampling.LANCZOS) image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image)