diff --git a/README.md b/README.md index f0ddb978ff..3beaaa2e71 100644 --- a/README.md +++ b/README.md @@ -127,11 +127,15 @@ samples, samples scaled for a sample of the prompt and one with the init word pr On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec. -Note: According to the associated paper, the optimal number of images is 3-5 any more images than that and your model might not converge. +Note: According to the associated paper, the optimal number of images +is 3-5 any more images than that and your model might not converge. -Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you fine a low loss epoch or around ~5000 iterations. +Training will run indefinately, but you may wish to stop it before the +heat death of the universe, when you fine a low loss epoch or around +~5000 iterations. -Once the model is trained, specify the trained .pt file when starting dream using +Once the model is trained, specify the trained .pt file when starting +dream using ~~~~ (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision @@ -156,13 +160,17 @@ It's also possible to train multiple tokens (modify the placeholder string in co --output_path /path/to/output/embedding.pt ~~~~ -Credit goes to @rinongal and the repository located at https://github.com/rinongal/textual_inversion Please see the repository and associated paper for details and limitations. +Credit goes to @rinongal and the repository located at +https://github.com/rinongal/textual_inversion Please see the +repository and associated paper for details and limitations. ## Changes * v1.08 (24 August 2022) * Escape single quotes on the dream> command before trying to parse. This avoids parse errors. + * A new -v option allows you to generate multiple variants of an initial image + in img2img mode. (kudos to Oceanswave) * Removed instruction to get Python3.8 as first step in Windows install. Anaconda3 does it for you. * Added bounds checks for numeric arguments that could cause crashes. @@ -439,7 +447,11 @@ to send me an email if you use and like the script. *Original Author:* Lincoln D. Stein -*Contributions by:* [Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), [xraxra](https://github.com/xraxra), and [bmaltais](https://github.com/bmaltais) +*Contributions by:* +[Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), +[xraxra](https://github.com/xraxra), [bmaltais](https://github.com/bmaltais), [Sean McLellan] (https://github.com/Oceanswave], +[nicolai256](https://github.com/nicolai256], [Benjamin Warner](https://github.com/warner-benjamin), +and [tildebyte](https://github.com/tildebyte) Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein) diff --git a/TODO.txt b/TODO.txt index 420951552f..df9aea75ba 100644 --- a/TODO.txt +++ b/TODO.txt @@ -11,6 +11,14 @@ Feature requests: 5. Support for inpainting masks #68. +6. Support for loading variations of the stable-diffusion + weights #49 + +7. Support for klms and other non-ddim samplers in img2img() #36 + +8. Pass a shell command to open up an image viewer on the last + batch of images generated #29. + Code Refactorization: 1. Move the PNG file generation code out of simplet2i and into diff --git a/ldm/models/diffusion/ddim.py b/ldm/models/diffusion/ddim.py index 065b32986a..ddf786b5a8 100644 --- a/ldm/models/diffusion/ddim.py +++ b/ldm/models/diffusion/ddim.py @@ -10,16 +10,17 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak class DDIMSampler(object): - def __init__(self, model, schedule="linear", **kwargs): + def __init__(self, model, schedule="linear", device="cuda", **kwargs): super().__init__() self.model = model self.ddpm_num_timesteps = model.num_timesteps self.schedule = schedule + self.device = device def register_buffer(self, name, attr): if type(attr) == torch.Tensor: - if attr.device != torch.device("cuda"): - attr = attr.to(torch.device("cuda")) + if attr.device != torch.device(self.device): + attr = attr.to(torch.device(self.device)) setattr(self, name, attr) def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py index 5d09f023f3..5eafe1d7ce 100644 --- a/ldm/models/diffusion/plms.py +++ b/ldm/models/diffusion/plms.py @@ -9,13 +9,18 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak class PLMSSampler(object): - def __init__(self, model, schedule="linear", **kwargs): + def __init__(self, model, schedule="linear", device="cuda", **kwargs): super().__init__() self.model = model self.ddpm_num_timesteps = model.num_timesteps self.schedule = schedule + self.device = device def register_buffer(self, name, attr): + if type(attr) == torch.Tensor: + if attr.device != torch.device(self.device): + attr = attr.to(torch.device(self.device)) + setattr(self, name, attr) def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 0691cccb7b..4737d90ba7 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -58,7 +58,6 @@ import sys import os from omegaconf import OmegaConf from PIL import Image -import PIL from tqdm import tqdm, trange from itertools import islice from einops import rearrange, repeat @@ -158,7 +157,8 @@ The vast majority of these arguments default to reasonable values. @torch.no_grad() def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, - cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,skip_normalize=False): + cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None, + skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed """ Generate an image from the prompt, writing iteration images into the outdir The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...] @@ -286,7 +286,8 @@ The vast majority of these arguments default to reasonable values. @torch.no_grad() def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, - cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,skip_normalize=False): + cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None, + skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed """ Generate an image from the prompt and the initial image, writing iteration images into the outdir The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...] @@ -324,7 +325,7 @@ The vast majority of these arguments default to reasonable values. # PLMS sampler not supported yet, so ignore previous sampler if self.sampler_name!='ddim': print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler") - sampler = DDIMSampler(model) + sampler = DDIMSampler(model, device=self.device) else: sampler = self.sampler @@ -462,9 +463,9 @@ The vast majority of these arguments default to reasonable values. msg = f'setting sampler to {self.sampler_name}' if self.sampler_name=='plms': - self.sampler = PLMSSampler(self.model) + self.sampler = PLMSSampler(self.model, device=self.device) elif self.sampler_name == 'ddim': - self.sampler = DDIMSampler(self.model) + self.sampler = DDIMSampler(self.model, device=self.device) elif self.sampler_name == 'k_dpm_2_a': self.sampler = KSampler(self.model,'dpm_2_ancestral') elif self.sampler_name == 'k_dpm_2': @@ -479,7 +480,7 @@ The vast majority of these arguments default to reasonable values. self.sampler = KSampler(self.model,'lms') else: msg = f'unsupported sampler {self.sampler_name}, defaulting to plms' - self.sampler = PLMSSampler(self.model) + self.sampler = PLMSSampler(self.model, device=self.device) print(msg) @@ -506,7 +507,7 @@ The vast majority of these arguments default to reasonable values. w, h = image.size print(f"loaded input image of size ({w}, {h}) from {path}") w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 - image = image.resize((w, h), resample=PIL.Image.LANCZOS) + image = image.resize((w, h), resample=Image.Resampling.LANCZOS) image = np.array(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) diff --git a/scripts/dream.py b/scripts/dream.py index 09881b9eb6..dc5fad5bac 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -6,6 +6,7 @@ import shlex import atexit import os import sys +import copy from PIL import Image,PngImagePlugin # readline unavailable on windows systems @@ -177,9 +178,32 @@ def main_loop(t2i,parser,log,infile): print(e) continue + + allVariantResults = [] + if opt.variants is not None: + print(f"Generating {opt.variants} variant(s)...") + newopt = copy.deepcopy(opt) + newopt.variants = None + for r in results: + newopt.init_img = r[0] + print(f"\t generating variant for {newopt.init_img}") + for j in range(0, opt.variants): + try: + variantResults = t2i.img2img(**vars(newopt)) + allVariantResults.append([newopt,variantResults]) + except AssertionError as e: + print(e) + continue + print(f"{opt.variants} Variants generated!") + print("Outputs:") write_log_message(t2i,opt,results,log) + if allVariantResults: + print("Variant outputs:") + for vr in allVariantResults: + write_log_message(t2i,vr[0],vr[1],log) + print("goodbye!") @@ -236,6 +260,8 @@ def _reconstruct_switches(t2i,opt): switches.append(f'-H{opt.height or t2i.height}') switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}') switches.append(f'-m{t2i.sampler_name}') + if opt.variants: + switches.append(f'-v{opt.variants}') if opt.init_img: switches.append(f'-I{opt.init_img}') if opt.strength and opt.init_img is not None: @@ -307,8 +333,9 @@ def create_cmd_parser(): parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale") parser.add_argument('-g','--grid',action='store_true',help="generate a grid") parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)") - parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)") + parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)") parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely") + parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants") parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization") return parser @@ -317,7 +344,7 @@ if readline_available: readline.set_completer(Completer(['cd','pwd', '--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b', '--width','-W','--height','-H','--cfg_scale','-C','--grid','-g', - '--individual','-i','--init_img','-I','--strength','-f']).complete) + '--individual','-i','--init_img','-I','--strength','-f','-v','--variants']).complete) readline.set_completer_delims(" ") readline.parse_and_bind('tab: complete') load_history()