From 1480ef84dcafc7a2119879f1e2bef8d14fd810e8 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Wed, 31 Aug 2022 14:49:00 +1200 Subject: [PATCH 01/17] Add Resolution Checker --- ldm/dream/image_util.py | 2 + ldm/simplet2i.py | 84 +++++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/ldm/dream/image_util.py b/ldm/dream/image_util.py index fa14ec897b..55610a9bab 100644 --- a/ldm/dream/image_util.py +++ b/ldm/dream/image_util.py @@ -49,6 +49,8 @@ class InitImageResizer(): new_image = Image.new('RGB',(width,height)) new_image.paste(resized_image,((width-rw)//2,(height-rh)//2)) + print(f'>> Resized image size to {width}x{height}') + return new_image diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 6901d45774..9ec10fe5a9 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -27,7 +27,6 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler from ldm.dream.pngwriter import PngWriter -from ldm.dream.image_util import InitImageResizer """Simplified text to image API for stable diffusion/latent diffusion @@ -261,16 +260,9 @@ class T2I: assert ( 0.0 <= strength <= 1.0 ), 'can only work with strength in [0.0, 1.0]' - w, h = map( - lambda x: x - x % 64, (width, height) - ) # resize to integer multiple of 64 - if h != height or w != width: - print( - f'Height and width must be multiples of 64. Resizing to {h}x{w}.' - ) - height = h - width = w + if not(width == self.width and height == self.height): + width, height, _ = self._resolution_check(width, height, log=True) scope = autocast if self.precision == 'autocast' else nullcontext @@ -352,7 +344,7 @@ class T2I: image_callback(image, seed) else: image_callback(image, seed, upscaled=True) - else: # no callback passed, so we simply replace old image with rescaled one + else: # no callback passed, so we simply replace old image with rescaled one result[0] = image except KeyboardInterrupt: @@ -434,7 +426,7 @@ class T2I: width, height, strength, - callback, # Currently not implemented for img2img + callback, # Currently not implemented for img2img ): """ An infinite iterator of images from the prompt and the initial image @@ -443,13 +435,13 @@ class T2I: # PLMS sampler not supported yet, so ignore previous sampler if self.sampler_name != 'ddim': print( - f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler" + f"sampler '{self.sampler_name}' is not yet supported. Using DDIM sampler" ) sampler = DDIMSampler(self.model, device=self.device) else: sampler = self.sampler - init_image = self._load_img(init_img,width,height).to(self.device) + init_image = self._load_img(init_img, width, height).to(self.device) with precision_scope(self.device.type): init_latent = self.model.get_first_stage_encoding( self.model.encode_first_stage(init_image) @@ -512,7 +504,8 @@ class T2I: x_samples = self.model.decode_first_stage(samples) x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) if len(x_samples) != 1: - raise Exception(f'expected to get a single image, but got {len(x_samples)}') + raise Exception( + f'expected to get a single image, but got {len(x_samples)}') x_sample = 255.0 * rearrange( x_samples[0].cpu().numpy(), 'c h w -> h w c' ) @@ -547,8 +540,9 @@ class T2I: self.model.cond_stage_model.device = self.device except AttributeError: import traceback - print('Error loading model. Only the CUDA backend is supported',file=sys.stderr) - print(traceback.format_exc(),file=sys.stderr) + print( + 'Error loading model. Only the CUDA backend is supported', file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) raise SystemExit self._set_sampler() @@ -608,10 +602,26 @@ class T2I: print(f'image path = {path}, cwd = {os.getcwd()}') with Image.open(path) as img: image = img.convert('RGB') - print(f'loaded input image of size {image.width}x{image.height} from {path}') + print( + f'loaded input image of size {image.width}x{image.height} from {path}') - image = InitImageResizer(image).resize(width,height) - print(f'resized input image to size {image.width}x{image.height}') + from ldm.dream.image_util import InitImageResizer + if width == self.width and height == self.height: + new_image_width, new_image_height, resize_needed = self._resolution_check( + image.width, image.height) + else: + if height == self.height: + new_image_width, new_image_height, resize_needed = self._resolution_check( + width, image.height) + if width == self.width: + new_image_width, new_image_height, resize_needed = self._resolution_check( + image.width, height) + else: + image = InitImageResizer(image).resize(width, height) + resize_needed=False + if resize_needed: + image = InitImageResizer(image).resize( + new_image_width, new_image_height) image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) @@ -635,7 +645,7 @@ class T2I: prompt = text[:idx] remaining -= idx # remove from main text - text = text[idx + 1 :] + text = text[idx + 1:] # find value for weight if ' ' in text: idx = text.index(' ') # first occurence @@ -653,7 +663,7 @@ class T2I: weight = 1.0 # remove from main text remaining -= idx - text = text[idx + 1 :] + text = text[idx + 1:] # append the sub-prompt and its weight prompts.append(prompt) weights.append(weight) @@ -664,9 +674,9 @@ class T2I: weights.append(1.0) remaining = 0 return prompts, weights - - # shows how the prompt is tokenized - # usually tokens have '' to indicate end-of-word, + + # shows how the prompt is tokenized + # usually tokens have '' to indicate end-of-word, # but for readability it has been replaced with ' ' def _log_tokenization(self, text): if not self.log_tokenization: @@ -676,15 +686,31 @@ class T2I: discarded = "" usedTokens = 0 totalTokens = len(tokens) - for i in range(0,totalTokens): - token = tokens[i].replace('',' ') + for i in range(0, totalTokens): + token = tokens[i].replace('', ' ') # alternate color s = (usedTokens % 6) + 1 if i < self.model.cond_stage_model.max_length: tokenized = tokenized + f"\x1b[0;3{s};40m{token}" usedTokens += 1 - else: # over max token length + else: # over max token length discarded = discarded + f"\x1b[0;3{s};40m{token}" print(f"\nTokens ({usedTokens}):\n{tokenized}\x1b[0m") if discarded != "": - print(f"Tokens Discarded ({totalTokens-usedTokens}):\n{discarded}\x1b[0m") + print( + f"Tokens Discarded ({totalTokens-usedTokens}):\n{discarded}\x1b[0m") + + def _resolution_check(self, width, height, log=False): + resize_needed = False + w, h = map( + lambda x: x - x % 64, (width, height) + ) # resize to integer multiple of 64 + if h != height or w != width: + if log: + print( + f'>> Provided width and height must be multiples of 64. Auto-resizing to {w}x{h}' + ) + height = h + width = w + resize_needed = True + return width, height, resize_needed From 153c93bdd43f96c05e79be74f95a9e755a45f4e1 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 30 Aug 2022 21:21:04 -0700 Subject: [PATCH 02/17] refactor pngwriter --- ldm/dream/pngwriter.py | 66 ++++++++++++------------------------------ ldm/dream/server.py | 31 +++++++++++--------- ldm/simplet2i.py | 17 ++++++----- scripts/dream.py | 44 +++++++++++++++++++--------- 4 files changed, 74 insertions(+), 84 deletions(-) diff --git a/ldm/dream/pngwriter.py b/ldm/dream/pngwriter.py index bf2488bcbd..3dec80e997 100644 --- a/ldm/dream/pngwriter.py +++ b/ldm/dream/pngwriter.py @@ -17,62 +17,32 @@ from PIL import Image, PngImagePlugin class PngWriter: - def __init__(self, outdir, prompt=None): + def __init__(self, outdir): self.outdir = outdir - self.prompt = prompt - self.filepath = None - self.files_written = [] os.makedirs(outdir, exist_ok=True) - def write_image(self, image, seed, upscaled=False): - self.filepath = self.unique_filename( - seed, upscaled, self.filepath - ) # will increment name in some sensible way - try: - prompt = f'{self.prompt} -S{seed}' - self.save_image_and_prompt_to_png(image, prompt, self.filepath) - except IOError as e: - print(e) - if not upscaled: - self.files_written.append([self.filepath, seed]) + # gives the next unique prefix in outdir + def unique_prefix(self): + # sort reverse alphabetically until we find max+1 + dirlist = sorted(os.listdir(self.outdir), reverse=True) + # find the first filename that matches our pattern or return 000000.0.png + existing_name = next( + (f for f in dirlist if re.match('^(\d+)\..*\.png', f)), + '0000000.0.png', + ) + basecount = int(existing_name.split('.', 1)[0]) + 1 + return f'{basecount:06}' - def unique_filename(self, seed, upscaled=False, previouspath=None): - revision = 1 - - if previouspath is None: - # sort reverse alphabetically until we find max+1 - dirlist = sorted(os.listdir(self.outdir), reverse=True) - # find the first filename that matches our pattern or return 000000.0.png - filename = next( - (f for f in dirlist if re.match('^(\d+)\..*\.png', f)), - '0000000.0.png', - ) - basecount = int(filename.split('.', 1)[0]) - basecount += 1 - filename = f'{basecount:06}.{seed}.png' - return os.path.join(self.outdir, filename) - - else: - basename = os.path.basename(previouspath) - x = re.match('^(\d+)\..*\.png', basename) - if not x: - return self.unique_filename(seed, upscaled, previouspath) - - basecount = int(x.groups()[0]) - series = 0 - finished = False - while not finished: - series += 1 - filename = f'{basecount:06}.{seed}.png' - path = os.path.join(self.outdir, filename) - finished = not os.path.exists(path) - return os.path.join(self.outdir, filename) - - def save_image_and_prompt_to_png(self, image, prompt, path): + # saves image named _image_ to outdir/name, writing metadata from prompt + # returns full path of output + def save_image_and_prompt_to_png(self, image, prompt, name): + path = os.path.join(self.outdir, name) info = PngImagePlugin.PngInfo() info.add_text('Dream', prompt) image.save(path, 'PNG', pnginfo=info) + return path + # TODO move this to its own helper function; it's not really a method of pngwriter def make_grid(self, image_list, rows=None, cols=None): image_cnt = len(image_list) if None in (rows, cols): diff --git a/ldm/dream/server.py b/ldm/dream/server.py index 346e114a2b..7e84466f85 100644 --- a/ldm/dream/server.py +++ b/ldm/dream/server.py @@ -88,24 +88,25 @@ class DreamServer(BaseHTTPRequestHandler): images_generated = 0 # helps keep track of when upscaling is started images_upscaled = 0 # helps keep track of when upscaling is completed - pngwriter = PngWriter( - "./outputs/img-samples/", config['prompt'], 1 - ) + pngwriter = PngWriter("./outputs/img-samples/") + prefix = pngwriter.unique_prefix() # if upscaling is requested, then this will be called twice, once when # the images are first generated, and then again when after upscaling # is complete. The upscaling replaces the original file, so the second # entry should not be inserted into the image list. def image_done(image, seed, upscaled=False): - pngwriter.write_image(image, seed, upscaled) + name = f'{prefix}.{seed}.png' + path = pngwriter.save_image_and_prompt_to_png(image, f'{prompt} -S{seed}', name) # Append post_data to log, but only once! if not upscaled: - current_image = pngwriter.files_written[-1] with open("./outputs/img-samples/dream_web_log.txt", "a") as log: - log.write(f"{current_image[0]}: {json.dumps(config)}\n") + log.write(f"{path}: {json.dumps(config)}\n") + + # TODO fix format of this event self.wfile.write(bytes(json.dumps( - {'event':'result', 'files':current_image, 'config':config} + {'event': 'result', 'files': [path, seed], 'config': config} ) + '\n',"utf-8")) # control state of the "postprocessing..." message @@ -129,22 +130,24 @@ class DreamServer(BaseHTTPRequestHandler): {'event':action,'processed_file_cnt':f'{x}/{iterations}'} ) + '\n',"utf-8")) - # TODO: refactor PngWriter: - # it doesn't need to know if batch_size > 1, just if this is _part of a batch_ - step_writer = PngWriter('./outputs/intermediates/', prompt, 2) + step_writer = PngWriter('./outputs/intermediates/') + step_index = 1 def image_progress(sample, step): if self.canceled.is_set(): self.wfile.write(bytes(json.dumps({'event':'canceled'}) + '\n', 'utf-8')) raise CanceledException - url = None + path = None # since rendering images is moderately expensive, only render every 5th image # and don't bother with the last one, since it'll render anyway + nonlocal step_index if progress_images and step % 5 == 0 and step < steps - 1: image = self.model._sample_to_image(sample) - step_writer.write_image(image, seed) # TODO PngWriter to return path - url = step_writer.filepath + name = f'{prefix}.{seed}.{step_index}.png' + metadata = f'{prompt} -S{seed} [intermediate]' + path = step_writer.save_image_and_prompt_to_png(image, metadata, name) + step_index += 1 self.wfile.write(bytes(json.dumps( - {'event':'step', 'step':step + 1, 'url': url} + {'event': 'step', 'step': step + 1, 'url': path} ) + '\n',"utf-8")) try: diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 645f84af53..cb6f6d82e0 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -171,10 +171,14 @@ class T2I: Optional named arguments are the same as those passed to T2I and prompt2image() """ results = self.prompt2image(prompt, **kwargs) - pngwriter = PngWriter(outdir, prompt) - for r in results: - pngwriter.write_image(r[0], r[1]) - return pngwriter.files_written + pngwriter = PngWriter(outdir) + prefix = pngwriter.unique_prefix() + outputs = [] + for image, seed in results: + name = f'{prefix}.{seed}.png' + path = pngwriter.save_image_and_prompt_to_png(image, f'{prompt} -S{seed}', name) + outputs.append([path, seed]) + return outputs def txt2img(self, prompt, **kwargs): outdir = kwargs.pop('outdir', 'outputs/img-samples') @@ -349,10 +353,7 @@ class T2I: f'Error running RealESRGAN - Your image was not upscaled.\n{e}' ) if image_callback is not None: - if save_original: - image_callback(image, seed) - else: - image_callback(image, seed, upscaled=True) + image_callback(image, seed, upscaled=True) else: # no callback passed, so we simply replace old image with rescaled one result[0] = image diff --git a/scripts/dream.py b/scripts/dream.py index 0014fb6d4d..963c3cd169 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -203,24 +203,40 @@ def main_loop(t2i, outdir, prompt_as_dir, parser, infile): # Here is where the images are actually generated! try: - file_writer = PngWriter(current_outdir, normalized_prompt) - callback = file_writer.write_image if individual_images else None - image_list = t2i.prompt2image(image_callback=callback, **vars(opt)) - results = ( - file_writer.files_written if individual_images else image_list - ) + file_writer = PngWriter(current_outdir) + prefix = file_writer.unique_prefix() + seeds = set() + results = [] + grid_images = dict() # seed -> Image, only used if `do_grid` + def image_writer(image, seed, upscaled=False): + if do_grid: + grid_images[seed] = image + else: + if upscaled and opt.save_original: + filename = f'{prefix}.{seed}.postprocessed.png' + else: + filename = f'{prefix}.{seed}.png' + path = file_writer.save_image_and_prompt_to_png(image, f'{normalized_prompt} -S{seed}', filename) + if (not upscaled) or opt.save_original: + # only append to results if we didn't overwrite an earlier output + results.append([path, seed]) - if do_grid and len(results) > 0: - grid_img = file_writer.make_grid([r[0] for r in results]) - filename = file_writer.unique_filename(results[0][1]) - seeds = [a[1] for a in results] - results = [[filename, seeds]] - metadata_prompt = f'{normalized_prompt} -S{results[0][1]}' - file_writer.save_image_and_prompt_to_png( + seeds.add(seed) + + t2i.prompt2image(image_callback=image_writer, **vars(opt)) + + if do_grid and len(grid_images) > 0: + grid_img = file_writer.make_grid(list(grid_images.values())) + first_seed = next(iter(seeds)) + filename = f'{prefix}.{first_seed}.png' + # TODO better metadata for grid images + metadata_prompt = f'{normalized_prompt} -S{first_seed}' + path = file_writer.save_image_and_prompt_to_png( grid_img, metadata_prompt, filename ) + results = [[path, seeds]] - last_seeds = [r[1] for r in results] + last_seeds = list(seeds) except AssertionError as e: print(e) From b983d61e939d7e0f5082a0c1cd369b1488ea3d9d Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 30 Aug 2022 21:33:42 -0700 Subject: [PATCH 03/17] tweak format of "result" event in web ui --- ldm/dream/server.py | 3 +-- static/dream_web/index.js | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ldm/dream/server.py b/ldm/dream/server.py index 7e84466f85..6a667f616b 100644 --- a/ldm/dream/server.py +++ b/ldm/dream/server.py @@ -104,9 +104,8 @@ class DreamServer(BaseHTTPRequestHandler): with open("./outputs/img-samples/dream_web_log.txt", "a") as log: log.write(f"{path}: {json.dumps(config)}\n") - # TODO fix format of this event self.wfile.write(bytes(json.dumps( - {'event': 'result', 'files': [path, seed], 'config': config} + {'event': 'result', 'url': path, 'seed': seed, 'config': config} ) + '\n',"utf-8")) # control state of the "postprocessing..." message diff --git a/static/dream_web/index.js b/static/dream_web/index.js index 5ef75a34a3..4b1c8ac2ec 100644 --- a/static/dream_web/index.js +++ b/static/dream_web/index.js @@ -95,7 +95,7 @@ async function generateSubmit(form) { if (data.event === 'result') { noOutputs = false; document.querySelector("#no-results-message")?.remove(); - appendOutput(data.files[0],data.files[1],data.config); + appendOutput(data.url, data.seed, data.config); progressEle.setAttribute('value', 0); progressEle.setAttribute('max', totalSteps); progressImageEle.src = BLANK_IMAGE_URL; From d566ee092a4a545c5db0099081a6625fb1384163 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 30 Aug 2022 21:36:38 -0700 Subject: [PATCH 04/17] move make_grid into image_utils --- ldm/dream/image_util.py | 21 ++++++++++++++++++++- ldm/dream/pngwriter.py | 27 ++------------------------- scripts/dream.py | 3 ++- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/ldm/dream/image_util.py b/ldm/dream/image_util.py index fa14ec897b..e389fd50e3 100644 --- a/ldm/dream/image_util.py +++ b/ldm/dream/image_util.py @@ -1,3 +1,4 @@ +from math import sqrt, floor, ceil from PIL import Image class InitImageResizer(): @@ -51,4 +52,22 @@ class InitImageResizer(): return new_image - +def make_grid(image_list, rows=None, cols=None): + image_cnt = len(image_list) + if None in (rows, cols): + rows = floor(sqrt(image_cnt)) # try to make it square + cols = ceil(image_cnt / rows) + width = image_list[0].width + height = image_list[0].height + + grid_img = Image.new('RGB', (width * cols, height * rows)) + i = 0 + for r in range(0, rows): + for c in range(0, cols): + if i >= len(image_list): + break + grid_img.paste(image_list[i], (c * width, r * height)) + i = i + 1 + + return grid_img + diff --git a/ldm/dream/pngwriter.py b/ldm/dream/pngwriter.py index 3dec80e997..f6b1762883 100644 --- a/ldm/dream/pngwriter.py +++ b/ldm/dream/pngwriter.py @@ -2,16 +2,13 @@ Two helper classes for dealing with PNG images and their path names. PngWriter -- Converts Images generated by T2I into PNGs, finds appropriate names for them, and writes prompt metadata - into the PNG. Intended to be subclassable in order to - create more complex naming schemes, including using the - prompt for file/directory names. + into the PNG. PromptFormatter -- Utility for converting a Namespace of prompt parameters back into a formatted prompt string with command-line switches. """ import os import re -from math import sqrt, floor, ceil -from PIL import Image, PngImagePlugin +from PIL import PngImagePlugin # -------------------image generation utils----- @@ -42,26 +39,6 @@ class PngWriter: image.save(path, 'PNG', pnginfo=info) return path - # TODO move this to its own helper function; it's not really a method of pngwriter - def make_grid(self, image_list, rows=None, cols=None): - image_cnt = len(image_list) - if None in (rows, cols): - rows = floor(sqrt(image_cnt)) # try to make it square - cols = ceil(image_cnt / rows) - width = image_list[0].width - height = image_list[0].height - - grid_img = Image.new('RGB', (width * cols, height * rows)) - i = 0 - for r in range(0, rows): - for c in range(0, cols): - if i>=len(image_list): - break - grid_img.paste(image_list[i], (c * width, r * height)) - i = i + 1 - - return grid_img - class PromptFormatter: def __init__(self, t2i, opt): diff --git a/scripts/dream.py b/scripts/dream.py index 963c3cd169..50be6dfa7c 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -12,6 +12,7 @@ import time import ldm.dream.readline from ldm.dream.pngwriter import PngWriter, PromptFormatter from ldm.dream.server import DreamServer, ThreadingDreamServer +from ldm.dream.image_util import make_grid def main(): """Initialize command-line parsers and the diffusion model""" @@ -226,7 +227,7 @@ def main_loop(t2i, outdir, prompt_as_dir, parser, infile): t2i.prompt2image(image_callback=image_writer, **vars(opt)) if do_grid and len(grid_images) > 0: - grid_img = file_writer.make_grid(list(grid_images.values())) + grid_img = make_grid(list(grid_images.values())) first_seed = next(iter(seeds)) filename = f'{prefix}.{first_seed}.png' # TODO better metadata for grid images From c83d01b3691942cd33be6bd2cb5bff7aa3600252 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 31 Aug 2022 01:41:15 -0400 Subject: [PATCH 05/17] fix hang during GFPGAN processing due to bug introduced by recent removal of batch_size arg from pngwriter --- ldm/dream/pngwriter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ldm/dream/pngwriter.py b/ldm/dream/pngwriter.py index bf2488bcbd..f7838a58bf 100644 --- a/ldm/dream/pngwriter.py +++ b/ldm/dream/pngwriter.py @@ -65,6 +65,8 @@ class PngWriter: series += 1 filename = f'{basecount:06}.{seed}.png' path = os.path.join(self.outdir, filename) + if os.path.exists(path) and upscaled: + break finished = not os.path.exists(path) return os.path.join(self.outdir, filename) From 4280788c1845504fbc36d06ebfb29a5f3833a0da Mon Sep 17 00:00:00 2001 From: Tom Elovi Spruce Date: Wed, 31 Aug 2022 07:51:25 -0700 Subject: [PATCH 06/17] Fix link to Mac instructions in README (#235) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b881ad1ef3..b1c4898092 100644 --- a/README.md +++ b/README.md @@ -605,7 +605,7 @@ This will bring your local copy into sync with the remote one. ## Macintosh -See (README-Mac-MPS)[README-Mac-MPS.md] for instructions. +See [README-Mac-MPS](README-Mac-MPS.md) for instructions. # Simplified API for text to image generation From a547c333272a2b469658fbbf19f7f9d61dfd854d Mon Sep 17 00:00:00 2001 From: James Reynolds Date: Wed, 31 Aug 2022 08:56:38 -0600 Subject: [PATCH 08/17] check if torch.backends has mps before calling it (#245) Co-authored-by: James Reynolds --- ldm/dream/devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/dream/devices.py b/ldm/dream/devices.py index 9008f7871c..240754dd36 100644 --- a/ldm/dream/devices.py +++ b/ldm/dream/devices.py @@ -4,7 +4,7 @@ def choose_torch_device() -> str: '''Convenience routine for guessing which GPU device to run model on''' if torch.cuda.is_available(): return 'cuda' - if torch.backends.mps.is_available(): + if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): return 'mps' return 'cpu' From b622819051509174538b7c5608cd74ad707daa05 Mon Sep 17 00:00:00 2001 From: Mikhail Tishin Date: Wed, 31 Aug 2022 18:18:32 +0300 Subject: [PATCH 09/17] Expose img2img strength parameter in Web UI (#239) * Expose img2img strength parameter in Web UI * Fix strength label id Co-authored-by: Mikhail Tishin Co-authored-by: Kevin Gibbons https://github.com/bakkot --- ldm/dream/server.py | 2 ++ static/dream_web/index.html | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ldm/dream/server.py b/ldm/dream/server.py index 6a667f616b..47ca48fc27 100644 --- a/ldm/dream/server.py +++ b/ldm/dream/server.py @@ -65,6 +65,7 @@ class DreamServer(BaseHTTPRequestHandler): post_data = json.loads(self.rfile.read(content_length)) prompt = post_data['prompt'] initimg = post_data['initimg'] + strength = float(post_data['strength']) iterations = int(post_data['iterations']) steps = int(post_data['steps']) width = int(post_data['width']) @@ -174,6 +175,7 @@ class DreamServer(BaseHTTPRequestHandler): # Run img2img self.model.prompt2image(prompt, init_img = "./img2img-tmp.png", + strength = strength, iterations = iterations, cfg_scale = cfgscale, seed = seed, diff --git a/static/dream_web/index.html b/static/dream_web/index.html index fa233f07d9..77c728963e 100644 --- a/static/dream_web/index.html +++ b/static/dream_web/index.html @@ -59,13 +59,14 @@ -
- - - +
+ + + +
From 58e35626521d1e23d16a16c4184943c0f5a3c61d Mon Sep 17 00:00:00 2001 From: David Ford <2772469+david-ford@users.noreply.github.com> Date: Wed, 31 Aug 2022 10:24:11 -0500 Subject: [PATCH 10/17] Fix merging embeddings (#226) Fixed merging embeddings based on the changes made in textual inversion. Tested and working. Inverted their logic to prioritize Stable Diffusion implementation over alternatives, but left the option for alternatives to still be used. --- configs/stable-diffusion/v1-finetune.yaml | 7 +-- scripts/merge_embeddings.py | 56 ++++++++++++++++++----- 2 files changed, 48 insertions(+), 15 deletions(-) diff --git a/configs/stable-diffusion/v1-finetune.yaml b/configs/stable-diffusion/v1-finetune.yaml index bf303cbdae..5d608811de 100644 --- a/configs/stable-diffusion/v1-finetune.yaml +++ b/configs/stable-diffusion/v1-finetune.yaml @@ -52,7 +52,7 @@ model: ddconfig: double_z: true z_channels: 4 - resolution: 512 + resolution: 256 in_channels: 3 out_ch: 3 ch: 128 @@ -74,7 +74,7 @@ data: target: main.DataModuleFromConfig params: batch_size: 1 - num_workers: 16 + num_workers: 2 wrap: false train: target: ldm.data.personalized.PersonalizedBase @@ -105,4 +105,5 @@ lightning: trainer: benchmark: True - max_steps: 6100 \ No newline at end of file + max_steps: 4000 + \ No newline at end of file diff --git a/scripts/merge_embeddings.py b/scripts/merge_embeddings.py index 0367d50dcf..452b27faf4 100644 --- a/scripts/merge_embeddings.py +++ b/scripts/merge_embeddings.py @@ -1,4 +1,4 @@ -from ldm.modules.encoders.modules import BERTTokenizer +from ldm.modules.encoders.modules import FrozenCLIPEmbedder, BERTEmbedder from ldm.modules.embedding_manager import EmbeddingManager import argparse, os @@ -6,7 +6,7 @@ from functools import partial import torch -def get_placeholder_loop(placeholder_string, tokenizer): +def get_placeholder_loop(placeholder_string, embedder, use_bert): new_placeholder = None @@ -16,10 +16,36 @@ def get_placeholder_loop(placeholder_string, tokenizer): else: new_placeholder = input(f"Placeholder string '{new_placeholder}' maps to more than a single token. Please enter another string: ") - token = tokenizer(new_placeholder) + token = get_bert_token_for_string(embedder.tknz_fn, new_placeholder) if use_bert else get_clip_token_for_string(embedder.tokenizer, new_placeholder) + + if token is not None: + return new_placeholder, token + +def get_clip_token_for_string(tokenizer, string): + batch_encoding = tokenizer( + string, + truncation=True, + max_length=77, + return_length=True, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt" + ) + + tokens = batch_encoding["input_ids"] + + if torch.count_nonzero(tokens - 49407) == 2: + return tokens[0, 1] + + return None + +def get_bert_token_for_string(tokenizer, string): + token = tokenizer(string) + if torch.count_nonzero(token) == 3: + return token[0, 1] + + return None - if torch.count_nonzero(token) == 3: - return new_placeholder, token[0, 1] if __name__ == "__main__": @@ -40,10 +66,20 @@ if __name__ == "__main__": help="Output path for the merged manager", ) + parser.add_argument( + "-sd", "--use_bert", + action="store_true", + help="Flag to denote that we are not merging stable diffusion embeddings" + ) + args = parser.parse_args() - tokenizer = BERTTokenizer(vq_interface=False, max_length=77) - EmbeddingManager = partial(EmbeddingManager, tokenizer, ["*"]) + if args.use_bert: + embedder = BERTEmbedder(n_embed=1280, n_layer=32).cuda() + else: + embedder = FrozenCLIPEmbedder().cuda() + + EmbeddingManager = partial(EmbeddingManager, embedder, ["*"]) string_to_token_dict = {} string_to_param_dict = torch.nn.ParameterDict() @@ -63,7 +99,7 @@ if __name__ == "__main__": placeholder_to_src[placeholder_string] = manager_ckpt else: - new_placeholder, new_token = get_placeholder_loop(placeholder_string, tokenizer) + new_placeholder, new_token = get_placeholder_loop(placeholder_string, embedder, use_bert=args.use_bert) string_to_token_dict[new_placeholder] = new_token string_to_param_dict[new_placeholder] = manager.string_to_param_dict[placeholder_string] @@ -77,7 +113,3 @@ if __name__ == "__main__": print("Managers merged. Final list of placeholders: ") print(placeholder_to_src) - - - - From 0d2e6f90c8b08620cdfa1cfb552038e9355f5071 Mon Sep 17 00:00:00 2001 From: James Reynolds Date: Wed, 31 Aug 2022 09:27:13 -0600 Subject: [PATCH 11/17] Readme update (#253) * check if torch.backends has mps before calling it * Updated Mac Readme with latest debugging info Co-authored-by: James Reynolds --- README-Mac-MPS.md | 121 +++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/README-Mac-MPS.md b/README-Mac-MPS.md index 775b741cb0..70149871ce 100644 --- a/README-Mac-MPS.md +++ b/README-Mac-MPS.md @@ -1,20 +1,19 @@ # Apple Silicon Mac Users Several people have gotten Stable Diffusion to work on Apple Silicon -Macs using Anaconda. I've gathered up most of their instructions and -put them in this fork (and readme). I haven't tested anything besides -Anaconda, and I've read about issues with things like miniforge, so if -you have an issue that isn't dealt with in this fork then head on over -to the [Apple -Silicon](https://github.com/CompVis/stable-diffusion/issues/25) issue -on GitHub (that page is so long that GitHub hides most of it by -default, so you need to find the hidden part and expand it to view the -whole thing). This fork would not have been possible without the work -done by the people on that issue. +Macs using Anaconda, miniforge, etc. I've gathered up most of their instructions and +put them in this fork (and readme). Things have moved really fast and so these +instructions change often. Hopefully things will settle down a little. + +There's several places where people are discussing Apple +MPS functionality: [the original CompVis +issue](https://github.com/CompVis/stable-diffusion/issues/25), and generally on +[lstein's fork](https://github.com/lstein/stable-diffusion/). You have to have macOS 12.3 Monterey or later. Anything earlier than that won't work. -BTW, I haven't tested any of this on Intel Macs. +BTW, I haven't tested any of this on Intel Macs but I have read that one person +got it to work. How to: @@ -27,38 +26,41 @@ ln -s /path/to/ckpt/sd-v1-1.ckpt models/ldm/stable-diffusion-v1/model.ckpt conda env create -f environment-mac.yaml conda activate ldm + +python scripts/preload_models.py +python scripts/orig_scripts/txt2img.py --prompt "a photograph of an astronaut riding a horse" --plms ``` -These instructions are identical to the main repo except I added -environment-mac.yaml because Mac doesn't have cudatoolkit. +We have not gotten lstein's dream.py to work yet. After you follow all the instructions and run txt2img.py you might get several errors. Here's the errors I've seen and found solutions for. +### Is it slow? + +Be sure to specify 1 sample and 1 iteration. + + python ./scripts/txt2img.py --prompt "ocean" --ddim_steps 5 --n_samples 1 --n_iter 1 + ### Doesn't work anymore? -We are using PyTorch nightly, which includes support for MPS. I don't -know exactly how Anaconda does updates, but I woke up one morning and -Stable Diffusion crashed and I couldn't think of anything I did that -would've changed anything the night before, when it worked. A day and -a half later I finally got it working again. I don't know what changed -overnight. PyTorch-nightly changes overnight but I'm pretty sure I -didn't manually update it. Either way, things are probably going to be -bumpy on Apple Silicon until PyTorch releases a firm version that we -can lock to. +PyTorch nightly includes support for MPS. Because of this, this setup is +inherently unstable. One morning I woke up and it no longer worked no matter +what I did until I switched to miniforge. However, I have another Mac that works +just fine with Anaconda. If you can't get it to work, please search a little +first because many of the errors will get posted and solved. If you can't find +a solution please [create an issue](https://github.com/lstein/stable-diffusion/issues). -To manually update to the latest version of PyTorch nightly (which could fix issues), run this command. +One debugging step is to update to the latest version of PyTorch nightly. conda install pytorch torchvision torchaudio -c pytorch-nightly -## Debugging? +Or you can clean everything up. -Tired of waiting for your renders to finish before you can see if it -works? Reduce the steps! The picture wont look like anything but if it -finishes, hey, it works! This could also help you figure out if you've -got a memory problem, because I'm betting 1 step doesn't use much -memory. + conda clean --yes --all - python ./scripts/txt2img.py --prompt "ocean" --ddim_steps 1 +Or you can reset Anaconda. + + conda update --force-reinstall -y -n base -c defaults conda ### "No module named cv2" (or some other module) @@ -83,6 +85,23 @@ globally. You might also need to install Rust (I mention this again below). + +### Debugging? + +Tired of waiting for your renders to finish before you can see if it +works? Reduce the steps! The image quality will be horrible but at least you'll +get quick feedback. + + python ./scripts/txt2img.py --prompt "ocean" --ddim_steps 5 --n_samples 1 --n_iter 1 + +### MAC: torch._C' has no attribute '_cuda_resetPeakMemoryStats' #234 + +We haven't fixed gotten dream.py to work on Mac yet. + +### OSError: Can't load tokenizer for 'openai/clip-vit-large-patch14'... + + python scripts/preload_models.py + ### "The operator [name] is not current implemented for the MPS device." (sic) Example error. @@ -92,9 +111,7 @@ Example error. NotImplementedError: The operator 'aten::index.Tensor' is not current implemented for the MPS device. If you want this op to be added in priority during the prototype phase of this feature, please comment on [https://github.com/pytorch/pytorch/issues/77764](https://github.com/pytorch/pytorch/issues/77764). As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS. ``` -Just do what it says: - - export PYTORCH_ENABLE_MPS_FALLBACK=1 +The lstein branch includes this fix in [environment-mac.yaml](https://github.com/lstein/stable-diffusion/blob/main/environment-mac.yaml). ### "Could not build wheels for tokenizers" @@ -104,6 +121,8 @@ I have not seen this error because I had Rust installed on my computer before I ### How come `--seed` doesn't work? +First this: + > Completely reproducible results are not guaranteed across PyTorch releases, individual commits, or different platforms. Furthermore, results may not be reproducible between CPU and GPU executions, even @@ -111,7 +130,8 @@ when using identical seeds. [PyTorch docs](https://pytorch.org/docs/stable/notes/randomness.html) -There is an [open issue](https://github.com/pytorch/pytorch/issues/78035) (as of August 2022) in pytorch regarding gradient inconsistency. I am guessing that's what is causing this. +Second, we might have a fix that at least gets a consistent seed sort of. We're +still working on it. ### libiomp5.dylib error? @@ -137,6 +157,8 @@ sort). [There's more suggestions](https://stackoverflow.com/questions/53014306/error-15-initializing-libiomp5-dylib-but-found-libiomp5-dylib-already-initial), like uninstalling tensorflow and reinstalling. I haven't tried them. +Since I switched to miniforge I haven't seen the error. + ### Not enough memory. This seems to be a common problem and is probably the underlying @@ -174,10 +196,10 @@ Actually, this could be happening because there's not enough RAM. You could try ### My images come out black -I haven't solved this issue. I just throw away my black -images. There's a [similar -issue](https://github.com/CompVis/stable-diffusion/issues/69) on CUDA -GPU's where the images come out green. Maybe it's the same issue? +We might have this fixed, we are still testing. + +There's a [similar issue](https://github.com/CompVis/stable-diffusion/issues/69) +on CUDA GPU's where the images come out green. Maybe it's the same issue? Someone in that issue says to use "--precision full", but this fork actually disables that flag. I don't know why, someone else provided that code and I don't know what it does. Maybe the `model.half()` @@ -204,25 +226,4 @@ What? Intel? On an Apple Silicon? The processor must support the Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2) instructions. The processor must support the Intel(R) Advanced Vector Extensions (Intel(R) AVX) instructions. -This fixed it for me: - - conda clean --yes --all - -### Still slow? - -I changed the defaults of n_samples and n_iter to 1 so that it uses -less RAM and makes less images so it will be faster the first time you -use it. I don't actually know what n_samples does internally, but I -know it consumes a lot more RAM. The n_iter flag just loops around the -image creation code, so it shouldn't consume more RAM (it should be -faster if you're going to do multiple images because the libraries and -model will already be loaded--use a prompt file to get this speed -boost). - -These flags are the default sample and iter settings in this fork/branch: - -~~~~ -python scripts/txt2img.py --prompt "ocean" --n_samples=1 --n_iter=1 -~~~ - - +This was actually the issue that I couldn't solve until I switched to miniforge. \ No newline at end of file From a14fd69a5a2ee610d903e4ddd534f63ac01545d9 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Wed, 31 Aug 2022 08:28:11 -0700 Subject: [PATCH 12/17] fix progress bar in webui when using strength parameter (#254) --- static/dream_web/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/static/dream_web/index.js b/static/dream_web/index.js index 4b1c8ac2ec..76a76a53a3 100644 --- a/static/dream_web/index.js +++ b/static/dream_web/index.js @@ -61,8 +61,8 @@ async function generateSubmit(form) { let formData = Object.fromEntries(new FormData(form)); formData.initimg = formData.initimg.name !== '' ? await toBase64(formData.initimg) : null; - let strength = 0.75; // TODO let this be specified in the UI - let totalSteps = formData.initimg ? Math.floor(.75 * formData.steps) : formData.steps; + let strength = formData.strength; + let totalSteps = formData.initimg ? Math.floor(strength * formData.steps) : formData.steps; let progressSectionEle = document.querySelector('#progress-section'); progressSectionEle.style.display = 'initial'; From d022d0dd1162a159be8384fcf7458dfcf0f86245 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 31 Aug 2022 12:32:56 -0400 Subject: [PATCH 13/17] continue to display in-progress image until the post-processing is done, for better esthetics (#255) --- static/dream_web/index.js | 1 - 1 file changed, 1 deletion(-) diff --git a/static/dream_web/index.js b/static/dream_web/index.js index 76a76a53a3..cbd66366f4 100644 --- a/static/dream_web/index.js +++ b/static/dream_web/index.js @@ -98,7 +98,6 @@ async function generateSubmit(form) { appendOutput(data.url, data.seed, data.config); progressEle.setAttribute('value', 0); progressEle.setAttribute('max', totalSteps); - progressImageEle.src = BLANK_IMAGE_URL; } else if (data.event === 'upscaling-started') { document.getElementById("processing_cnt").textContent=data.processed_file_cnt; document.getElementById("scaling-inprocess-message").style.display = "block"; From c52ba1b022ef83ddbae5cde228e08bccd1087c03 Mon Sep 17 00:00:00 2001 From: _nderscore <_@nderscore.com> Date: Wed, 31 Aug 2022 18:00:10 +0000 Subject: [PATCH 14/17] feat: simplify and enhance prompt weight splitting (#258) * feat: simplify and enhance prompt weight splitting * fix: don't shadow the prompt variable * feat: enable backslash-escaped colons in prompts --- ldm/simplet2i.py | 83 +++++++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 51 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index d969ac5e23..82839db875 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -487,22 +487,19 @@ class T2I: uc = self.model.get_learned_conditioning(['']) - # weighted sub-prompts - subprompts, weights = T2I._split_weighted_subprompts(prompt) - if len(subprompts) > 1: + # get weighted sub-prompts + weighted_subprompts = T2I._split_weighted_subprompts(prompt, skip_normalize) + + if len(weighted_subprompts) > 1: # i dont know if this is correct.. but it works c = torch.zeros_like(uc) - # get total weight for normalizing - totalWeight = sum(weights) # normalize each "sub prompt" and add it - for i in range(0, len(subprompts)): - weight = weights[i] - if not skip_normalize: - weight = weight / totalWeight - self._log_tokenization(subprompts[i]) + for i in range(0, len(weighted_subprompts)): + subprompt, weight = weighted_subprompts[i] + self._log_tokenization(subprompt) c = torch.add( c, - self.model.get_learned_conditioning([subprompts[i]]), + self.model.get_learned_conditioning([subprompt]), alpha=weight, ) else: # just standard 1 prompt @@ -616,52 +613,36 @@ class T2I: image = torch.from_numpy(image) return 2.0 * image - 1.0 - def _split_weighted_subprompts(text): + def _split_weighted_subprompts(text, skip_normalize=False): """ grabs all text up to the first occurrence of ':' uses the grabbed text as a sub-prompt, and takes the value following ':' as weight if ':' has no value defined, defaults to 1.0 repeats until no text remaining """ - remaining = len(text) - prompts = [] - weights = [] - while remaining > 0: - if ':' in text: - idx = text.index(':') # first occurrence from start - # grab up to index as sub-prompt - prompt = text[:idx] - remaining -= idx - # remove from main text - text = text[idx + 1 :] - # find value for weight - if ' ' in text: - idx = text.index(' ') # first occurence - else: # no space, read to end - idx = len(text) - if idx != 0: - try: - weight = float(text[:idx]) - except: # couldn't treat as float - print( - f"Warning: '{text[:idx]}' is not a value, are you missing a space?" - ) - weight = 1.0 - else: # no value found - weight = 1.0 - # remove from main text - remaining -= idx - text = text[idx + 1 :] - # append the sub-prompt and its weight - prompts.append(prompt) - weights.append(weight) - else: # no : found - if len(text) > 0: # there is still text though - # take remainder as weight 1 - prompts.append(text) - weights.append(1.0) - remaining = 0 - return prompts, weights + prompt_parser = re.compile(""" + (?P # capture group for 'prompt' + (?:\\\:|[^:])+ # match one or more non ':' characters or escaped colons '\:' + ) # end 'prompt' + (?: # non-capture group + :+ # match one or more ':' characters + (?P # capture group for 'weight' + -?\d+(?:\.\d+)? # match positive or negative integer or decimal number + )? # end weight capture group, make optional + \s* # strip spaces after weight + | # OR + $ # else, if no ':' then match end of line + ) # end non-capture group + """, re.VERBOSE) + parsed_prompts = [(match.group("prompt").replace("\\:", ":"), float(match.group("weight") or 1)) for match in re.finditer(prompt_parser, text)] + if skip_normalize: + return parsed_prompts + weight_sum = sum(map(lambda x: x[1], parsed_prompts)) + if weight_sum == 0: + print("Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") + equal_weight = 1 / len(parsed_prompts) + return [(x[0], equal_weight) for x in parsed_prompts] + return [(x[0], x[1] / weight_sum) for x in parsed_prompts] # shows how the prompt is tokenized # usually tokens have '' to indicate end-of-word, From ed513397b255868a9c0afe6dd7e580005b5d32bb Mon Sep 17 00:00:00 2001 From: David Wager Date: Wed, 31 Aug 2022 19:20:28 +0100 Subject: [PATCH 15/17] Allow configuration of which SD model to use (#263) * Allow configuration of which SD model to use Closes https://github.com/lstein/stable-diffusion/issues/49 The syntax isn't quite the same (opting for --weights over --model), although --weights is more in-line with the existing naming convention. This method also locks us into models in the models/ldm/stable-diffusion-v1/ directory. Personally, I'm not averse to this, although a secondary solution may be necessary if we wish to supply weights from an external directory. * Fix typo * Allow either filename OR filepath input for arg This approach allows both --weights SD13 --weights C:/StableDiffusion/models/ldm/stable-diffusion-v1/SD13.ckpt --- scripts/dream.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/dream.py b/scripts/dream.py index 50be6dfa7c..2911e8847a 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -29,7 +29,10 @@ def main(): width = 512 height = 512 config = 'configs/stable-diffusion/v1-inference.yaml' - weights = 'models/ldm/stable-diffusion-v1/model.ckpt' + if '.ckpt' in opt.weights: + weights = opt.weights + else: + weights = f'models/ldm/stable-diffusion-v1/{opt.weights}.ckpt' print('* Initializing, be patient...\n') sys.path.append('.') @@ -418,6 +421,11 @@ def create_argv_parser(): action='store_true', help='Start in web server mode.', ) + parser.add_argument( + '--weights', + default='model', + help='Indicates the Stable Diffusion model to use.', + ) return parser From 4b560b50c21dcf67872753d11410671f5b48531a Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 31 Aug 2022 16:59:27 -0400 Subject: [PATCH 16/17] fix AttributeError crash when running on non-CUDA systems (#256) * fix AttributeError crash when running on non-CUDA systems; closes issue #234 and issue #250 * although this prevents dream.py script from crashing immediately on MPS systems, MPS support still very much a work in progress. --- ldm/simplet2i.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index a28670fc05..88cbb6ff78 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -157,7 +157,9 @@ class T2I: self.latent_diffusion_weights = latent_diffusion_weights self.device = device - self.session_peakmem = torch.cuda.max_memory_allocated() + # for VRAM usage statistics + self.session_peakmem = torch.cuda.max_memory_allocated() if self.device == 'cuda' else None + if seed is None: self.seed = self._new_seed() else: @@ -363,9 +365,6 @@ class T2I: print('Are you sure your system has an adequate NVIDIA GPU?') toc = time.time() - self.session_peakmem = max( - self.session_peakmem, torch.cuda.max_memory_allocated() - ) print('Usage stats:') print( f' {len(results)} image(s) generated in', '%4.2fs' % (toc - tic) @@ -374,10 +373,15 @@ class T2I: f' Max VRAM used for this generation:', '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9), ) - print( - f' Max VRAM used since script start: ', - '%4.2fG' % (self.session_peakmem / 1e9), - ) + + if self.session_peakmem: + self.session_peakmem = max( + self.session_peakmem, torch.cuda.max_memory_allocated() + ) + print( + f' Max VRAM used since script start: ', + '%4.2fG' % (self.session_peakmem / 1e9), + ) return results @torch.no_grad() From 0433b3d625cd81383360691e6b23ee94991cd0e4 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Thu, 1 Sep 2022 15:13:21 +1200 Subject: [PATCH 17/17] Add Warning When Image Is Too Large (#271) * Add Warning When Image Is Too Large * fix incomprehensible formatting introduced by "blue" Co-authored-by: Lincoln Stein --- ldm/simplet2i.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 88cbb6ff78..230aa74c28 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -27,7 +27,6 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler from ldm.dream.pngwriter import PngWriter -from ldm.dream.image_util import InitImageResizer from ldm.dream.devices import choose_torch_device """Simplified text to image API for stable diffusion/latent diffusion @@ -159,7 +158,7 @@ class T2I: # for VRAM usage statistics self.session_peakmem = torch.cuda.max_memory_allocated() if self.device == 'cuda' else None - + if seed is None: self.seed = self._new_seed() else: @@ -178,7 +177,8 @@ class T2I: outputs = [] for image, seed in results: name = f'{prefix}.{seed}.png' - path = pngwriter.save_image_and_prompt_to_png(image, f'{prompt} -S{seed}', name) + path = pngwriter.save_image_and_prompt_to_png( + image, f'{prompt} -S{seed}', name) outputs.append([path, seed]) return outputs @@ -488,7 +488,8 @@ class T2I: uc = self.model.get_learned_conditioning(['']) # get weighted sub-prompts - weighted_subprompts = T2I._split_weighted_subprompts(prompt, skip_normalize) + weighted_subprompts = T2I._split_weighted_subprompts( + prompt, skip_normalize) if len(weighted_subprompts) > 1: # i dont know if this is correct.. but it works @@ -531,7 +532,7 @@ class T2I: if self.model is None: seed_everything(self.seed) try: - config = OmegaConf.load(self.config) + config = OmegaConf.load(self.config) self.device = self._get_device() model = self._load_model_from_config(config, self.weights) if self.embedding_path is not None: @@ -621,7 +622,7 @@ class T2I: image.width, height) else: image = InitImageResizer(image).resize(width, height) - resize_needed=False + resize_needed = False if resize_needed: image = InitImageResizer(image).resize( new_image_width, new_image_height) @@ -652,18 +653,20 @@ class T2I: $ # else, if no ':' then match end of line ) # end non-capture group """, re.VERBOSE) - parsed_prompts = [(match.group("prompt").replace("\\:", ":"), float(match.group("weight") or 1)) for match in re.finditer(prompt_parser, text)] + parsed_prompts = [(match.group("prompt").replace("\\:", ":"), float( + match.group("weight") or 1)) for match in re.finditer(prompt_parser, text)] if skip_normalize: return parsed_prompts weight_sum = sum(map(lambda x: x[1], parsed_prompts)) if weight_sum == 0: - print("Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") + print( + "Warning: Subprompt weights add up to zero. Discarding and using even weights instead.") equal_weight = 1 / len(parsed_prompts) return [(x[0], equal_weight) for x in parsed_prompts] return [(x[0], x[1] / weight_sum) for x in parsed_prompts] - - # shows how the prompt is tokenized - # usually tokens have '' to indicate end-of-word, + + # shows how the prompt is tokenized + # usually tokens have '' to indicate end-of-word, # but for readability it has been replaced with ' ' def _log_tokenization(self, text): if not self.log_tokenization: @@ -700,4 +703,8 @@ class T2I: height = h width = w resize_needed = True + + if (width * height) > (self.width * self.height): + print(">> This input is larger than your defaults. If you run out of memory, please use a smaller image.") + return width, height, resize_needed