tweaked documentation and comments slightly

This commit is contained in:
Lincoln Stein 2022-08-24 15:25:52 -04:00
commit 1eec6b776b
6 changed files with 73 additions and 19 deletions

View File

@ -127,11 +127,15 @@ samples, samples scaled for a sample of the prompt and one with the init word pr
On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec. On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.
Note: According to the associated paper, the optimal number of images is 3-5 any more images than that and your model might not converge. Note: According to the associated paper, the optimal number of images
is 3-5 any more images than that and your model might not converge.
Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you fine a low loss epoch or around ~5000 iterations. Training will run indefinately, but you may wish to stop it before the
heat death of the universe, when you fine a low loss epoch or around
~5000 iterations.
Once the model is trained, specify the trained .pt file when starting dream using Once the model is trained, specify the trained .pt file when starting
dream using
~~~~ ~~~~
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
@ -156,13 +160,17 @@ It's also possible to train multiple tokens (modify the placeholder string in co
--output_path /path/to/output/embedding.pt --output_path /path/to/output/embedding.pt
~~~~ ~~~~
Credit goes to @rinongal and the repository located at https://github.com/rinongal/textual_inversion Please see the repository and associated paper for details and limitations. Credit goes to @rinongal and the repository located at
https://github.com/rinongal/textual_inversion Please see the
repository and associated paper for details and limitations.
## Changes ## Changes
* v1.08 (24 August 2022) * v1.08 (24 August 2022)
* Escape single quotes on the dream> command before trying to parse. This avoids * Escape single quotes on the dream> command before trying to parse. This avoids
parse errors. parse errors.
* A new -v option allows you to generate multiple variants of an initial image
in img2img mode. (kudos to Oceanswave)
* Removed instruction to get Python3.8 as first step in Windows install. * Removed instruction to get Python3.8 as first step in Windows install.
Anaconda3 does it for you. Anaconda3 does it for you.
* Added bounds checks for numeric arguments that could cause crashes. * Added bounds checks for numeric arguments that could cause crashes.
@ -439,7 +447,11 @@ to send me an email if you use and like the script.
*Original Author:* Lincoln D. Stein <lincoln.stein@gmail.com> *Original Author:* Lincoln D. Stein <lincoln.stein@gmail.com>
*Contributions by:* [Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), [xraxra](https://github.com/xraxra), and [bmaltais](https://github.com/bmaltais) *Contributions by:*
[Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison),
[xraxra](https://github.com/xraxra), [bmaltais](https://github.com/bmaltais), [Sean McLellan] (https://github.com/Oceanswave],
[nicolai256](https://github.com/nicolai256], [Benjamin Warner](https://github.com/warner-benjamin),
and [tildebyte](https://github.com/tildebyte)
Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein) Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein)

View File

@ -11,6 +11,14 @@ Feature requests:
5. Support for inpainting masks #68. 5. Support for inpainting masks #68.
6. Support for loading variations of the stable-diffusion
weights #49
7. Support for klms and other non-ddim samplers in img2img() #36
8. Pass a shell command to open up an image viewer on the last
batch of images generated #29.
Code Refactorization: Code Refactorization:
1. Move the PNG file generation code out of simplet2i and into 1. Move the PNG file generation code out of simplet2i and into

View File

@ -10,16 +10,17 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak
class DDIMSampler(object): class DDIMSampler(object):
def __init__(self, model, schedule="linear", **kwargs): def __init__(self, model, schedule="linear", device="cuda", **kwargs):
super().__init__() super().__init__()
self.model = model self.model = model
self.ddpm_num_timesteps = model.num_timesteps self.ddpm_num_timesteps = model.num_timesteps
self.schedule = schedule self.schedule = schedule
self.device = device
def register_buffer(self, name, attr): def register_buffer(self, name, attr):
if type(attr) == torch.Tensor: if type(attr) == torch.Tensor:
if attr.device != torch.device("cuda"): if attr.device != torch.device(self.device):
attr = attr.to(torch.device("cuda")) attr = attr.to(torch.device(self.device))
setattr(self, name, attr) setattr(self, name, attr)
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):

View File

@ -9,13 +9,18 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak
class PLMSSampler(object): class PLMSSampler(object):
def __init__(self, model, schedule="linear", **kwargs): def __init__(self, model, schedule="linear", device="cuda", **kwargs):
super().__init__() super().__init__()
self.model = model self.model = model
self.ddpm_num_timesteps = model.num_timesteps self.ddpm_num_timesteps = model.num_timesteps
self.schedule = schedule self.schedule = schedule
self.device = device
def register_buffer(self, name, attr): def register_buffer(self, name, attr):
if type(attr) == torch.Tensor:
if attr.device != torch.device(self.device):
attr = attr.to(torch.device(self.device))
setattr(self, name, attr) setattr(self, name, attr)
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):

View File

@ -58,7 +58,6 @@ import sys
import os import os
from omegaconf import OmegaConf from omegaconf import OmegaConf
from PIL import Image from PIL import Image
import PIL
from tqdm import tqdm, trange from tqdm import tqdm, trange
from itertools import islice from itertools import islice
from einops import rearrange, repeat from einops import rearrange, repeat
@ -158,7 +157,8 @@ The vast majority of these arguments default to reasonable values.
@torch.no_grad() @torch.no_grad()
def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None, def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None,
steps=None,seed=None,grid=None,individual=None,width=None,height=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None,
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,skip_normalize=False): cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,
skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed
""" """
Generate an image from the prompt, writing iteration images into the outdir Generate an image from the prompt, writing iteration images into the outdir
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...] The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
@ -286,7 +286,8 @@ The vast majority of these arguments default to reasonable values.
@torch.no_grad() @torch.no_grad()
def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None, def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None,
steps=None,seed=None,grid=None,individual=None,width=None,height=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None,
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,skip_normalize=False): cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,
skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed
""" """
Generate an image from the prompt and the initial image, writing iteration images into the outdir Generate an image from the prompt and the initial image, writing iteration images into the outdir
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...] The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
@ -324,7 +325,7 @@ The vast majority of these arguments default to reasonable values.
# PLMS sampler not supported yet, so ignore previous sampler # PLMS sampler not supported yet, so ignore previous sampler
if self.sampler_name!='ddim': if self.sampler_name!='ddim':
print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler") print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler")
sampler = DDIMSampler(model) sampler = DDIMSampler(model, device=self.device)
else: else:
sampler = self.sampler sampler = self.sampler
@ -462,9 +463,9 @@ The vast majority of these arguments default to reasonable values.
msg = f'setting sampler to {self.sampler_name}' msg = f'setting sampler to {self.sampler_name}'
if self.sampler_name=='plms': if self.sampler_name=='plms':
self.sampler = PLMSSampler(self.model) self.sampler = PLMSSampler(self.model, device=self.device)
elif self.sampler_name == 'ddim': elif self.sampler_name == 'ddim':
self.sampler = DDIMSampler(self.model) self.sampler = DDIMSampler(self.model, device=self.device)
elif self.sampler_name == 'k_dpm_2_a': elif self.sampler_name == 'k_dpm_2_a':
self.sampler = KSampler(self.model,'dpm_2_ancestral') self.sampler = KSampler(self.model,'dpm_2_ancestral')
elif self.sampler_name == 'k_dpm_2': elif self.sampler_name == 'k_dpm_2':
@ -479,7 +480,7 @@ The vast majority of these arguments default to reasonable values.
self.sampler = KSampler(self.model,'lms') self.sampler = KSampler(self.model,'lms')
else: else:
msg = f'unsupported sampler {self.sampler_name}, defaulting to plms' msg = f'unsupported sampler {self.sampler_name}, defaulting to plms'
self.sampler = PLMSSampler(self.model) self.sampler = PLMSSampler(self.model, device=self.device)
print(msg) print(msg)
@ -506,7 +507,7 @@ The vast majority of these arguments default to reasonable values.
w, h = image.size w, h = image.size
print(f"loaded input image of size ({w}, {h}) from {path}") print(f"loaded input image of size ({w}, {h}) from {path}")
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32 w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
image = image.resize((w, h), resample=PIL.Image.LANCZOS) image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
image = np.array(image).astype(np.float32) / 255.0 image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2) image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image) image = torch.from_numpy(image)

View File

@ -6,6 +6,7 @@ import shlex
import atexit import atexit
import os import os
import sys import sys
import copy
from PIL import Image,PngImagePlugin from PIL import Image,PngImagePlugin
# readline unavailable on windows systems # readline unavailable on windows systems
@ -177,9 +178,32 @@ def main_loop(t2i,parser,log,infile):
print(e) print(e)
continue continue
allVariantResults = []
if opt.variants is not None:
print(f"Generating {opt.variants} variant(s)...")
newopt = copy.deepcopy(opt)
newopt.variants = None
for r in results:
newopt.init_img = r[0]
print(f"\t generating variant for {newopt.init_img}")
for j in range(0, opt.variants):
try:
variantResults = t2i.img2img(**vars(newopt))
allVariantResults.append([newopt,variantResults])
except AssertionError as e:
print(e)
continue
print(f"{opt.variants} Variants generated!")
print("Outputs:") print("Outputs:")
write_log_message(t2i,opt,results,log) write_log_message(t2i,opt,results,log)
if allVariantResults:
print("Variant outputs:")
for vr in allVariantResults:
write_log_message(t2i,vr[0],vr[1],log)
print("goodbye!") print("goodbye!")
@ -236,6 +260,8 @@ def _reconstruct_switches(t2i,opt):
switches.append(f'-H{opt.height or t2i.height}') switches.append(f'-H{opt.height or t2i.height}')
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}') switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
switches.append(f'-m{t2i.sampler_name}') switches.append(f'-m{t2i.sampler_name}')
if opt.variants:
switches.append(f'-v{opt.variants}')
if opt.init_img: if opt.init_img:
switches.append(f'-I{opt.init_img}') switches.append(f'-I{opt.init_img}')
if opt.strength and opt.init_img is not None: if opt.strength and opt.init_img is not None:
@ -307,8 +333,9 @@ def create_cmd_parser():
parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale") parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale")
parser.add_argument('-g','--grid',action='store_true',help="generate a grid") parser.add_argument('-g','--grid',action='store_true',help="generate a grid")
parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)") parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)") parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)")
parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely") parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization") parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")
return parser return parser
@ -317,7 +344,7 @@ if readline_available:
readline.set_completer(Completer(['cd','pwd', readline.set_completer(Completer(['cd','pwd',
'--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b', '--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b',
'--width','-W','--height','-H','--cfg_scale','-C','--grid','-g', '--width','-W','--height','-H','--cfg_scale','-C','--grid','-g',
'--individual','-i','--init_img','-I','--strength','-f']).complete) '--individual','-i','--init_img','-I','--strength','-f','-v','--variants']).complete)
readline.set_completer_delims(" ") readline.set_completer_delims(" ")
readline.parse_and_bind('tab: complete') readline.parse_and_bind('tab: complete')
load_history() load_history()