tweaked documentation and comments slightly

This commit is contained in:
Lincoln Stein 2022-08-24 15:25:52 -04:00
commit 1eec6b776b
6 changed files with 73 additions and 19 deletions

View File

@ -127,11 +127,15 @@ samples, samples scaled for a sample of the prompt and one with the init word pr
On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.
Note: According to the associated paper, the optimal number of images is 3-5 any more images than that and your model might not converge.
Note: According to the associated paper, the optimal number of images
is 3-5 any more images than that and your model might not converge.
Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you fine a low loss epoch or around ~5000 iterations.
Training will run indefinately, but you may wish to stop it before the
heat death of the universe, when you fine a low loss epoch or around
~5000 iterations.
Once the model is trained, specify the trained .pt file when starting dream using
Once the model is trained, specify the trained .pt file when starting
dream using
~~~~
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
@ -156,13 +160,17 @@ It's also possible to train multiple tokens (modify the placeholder string in co
--output_path /path/to/output/embedding.pt
~~~~
Credit goes to @rinongal and the repository located at https://github.com/rinongal/textual_inversion Please see the repository and associated paper for details and limitations.
Credit goes to @rinongal and the repository located at
https://github.com/rinongal/textual_inversion Please see the
repository and associated paper for details and limitations.
## Changes
* v1.08 (24 August 2022)
* Escape single quotes on the dream> command before trying to parse. This avoids
parse errors.
* A new -v option allows you to generate multiple variants of an initial image
in img2img mode. (kudos to Oceanswave)
* Removed instruction to get Python3.8 as first step in Windows install.
Anaconda3 does it for you.
* Added bounds checks for numeric arguments that could cause crashes.
@ -439,7 +447,11 @@ to send me an email if you use and like the script.
*Original Author:* Lincoln D. Stein <lincoln.stein@gmail.com>
*Contributions by:* [Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), [xraxra](https://github.com/xraxra), and [bmaltais](https://github.com/bmaltais)
*Contributions by:*
[Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison),
[xraxra](https://github.com/xraxra), [bmaltais](https://github.com/bmaltais), [Sean McLellan] (https://github.com/Oceanswave],
[nicolai256](https://github.com/nicolai256], [Benjamin Warner](https://github.com/warner-benjamin),
and [tildebyte](https://github.com/tildebyte)
Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein)

View File

@ -11,6 +11,14 @@ Feature requests:
5. Support for inpainting masks #68.
6. Support for loading variations of the stable-diffusion
weights #49
7. Support for klms and other non-ddim samplers in img2img() #36
8. Pass a shell command to open up an image viewer on the last
batch of images generated #29.
Code Refactorization:
1. Move the PNG file generation code out of simplet2i and into

View File

@ -10,16 +10,17 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak
class DDIMSampler(object):
def __init__(self, model, schedule="linear", **kwargs):
def __init__(self, model, schedule="linear", device="cuda", **kwargs):
super().__init__()
self.model = model
self.ddpm_num_timesteps = model.num_timesteps
self.schedule = schedule
self.device = device
def register_buffer(self, name, attr):
if type(attr) == torch.Tensor:
if attr.device != torch.device("cuda"):
attr = attr.to(torch.device("cuda"))
if attr.device != torch.device(self.device):
attr = attr.to(torch.device(self.device))
setattr(self, name, attr)
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):

View File

@ -9,13 +9,18 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak
class PLMSSampler(object):
def __init__(self, model, schedule="linear", **kwargs):
def __init__(self, model, schedule="linear", device="cuda", **kwargs):
super().__init__()
self.model = model
self.ddpm_num_timesteps = model.num_timesteps
self.schedule = schedule
self.device = device
def register_buffer(self, name, attr):
if type(attr) == torch.Tensor:
if attr.device != torch.device(self.device):
attr = attr.to(torch.device(self.device))
setattr(self, name, attr)
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):

View File

@ -58,7 +58,6 @@ import sys
import os
from omegaconf import OmegaConf
from PIL import Image
import PIL
from tqdm import tqdm, trange
from itertools import islice
from einops import rearrange, repeat
@ -158,7 +157,8 @@ The vast majority of these arguments default to reasonable values.
@torch.no_grad()
def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None,
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,skip_normalize=False):
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,
skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed
"""
Generate an image from the prompt, writing iteration images into the outdir
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
@ -286,7 +286,8 @@ The vast majority of these arguments default to reasonable values.
@torch.no_grad()
def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None,
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,skip_normalize=False):
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,
skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed
"""
Generate an image from the prompt and the initial image, writing iteration images into the outdir
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
@ -324,7 +325,7 @@ The vast majority of these arguments default to reasonable values.
# PLMS sampler not supported yet, so ignore previous sampler
if self.sampler_name!='ddim':
print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler")
sampler = DDIMSampler(model)
sampler = DDIMSampler(model, device=self.device)
else:
sampler = self.sampler
@ -462,9 +463,9 @@ The vast majority of these arguments default to reasonable values.
msg = f'setting sampler to {self.sampler_name}'
if self.sampler_name=='plms':
self.sampler = PLMSSampler(self.model)
self.sampler = PLMSSampler(self.model, device=self.device)
elif self.sampler_name == 'ddim':
self.sampler = DDIMSampler(self.model)
self.sampler = DDIMSampler(self.model, device=self.device)
elif self.sampler_name == 'k_dpm_2_a':
self.sampler = KSampler(self.model,'dpm_2_ancestral')
elif self.sampler_name == 'k_dpm_2':
@ -479,7 +480,7 @@ The vast majority of these arguments default to reasonable values.
self.sampler = KSampler(self.model,'lms')
else:
msg = f'unsupported sampler {self.sampler_name}, defaulting to plms'
self.sampler = PLMSSampler(self.model)
self.sampler = PLMSSampler(self.model, device=self.device)
print(msg)
@ -506,7 +507,7 @@ The vast majority of these arguments default to reasonable values.
w, h = image.size
print(f"loaded input image of size ({w}, {h}) from {path}")
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
image = np.array(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)

View File

@ -6,6 +6,7 @@ import shlex
import atexit
import os
import sys
import copy
from PIL import Image,PngImagePlugin
# readline unavailable on windows systems
@ -177,9 +178,32 @@ def main_loop(t2i,parser,log,infile):
print(e)
continue
allVariantResults = []
if opt.variants is not None:
print(f"Generating {opt.variants} variant(s)...")
newopt = copy.deepcopy(opt)
newopt.variants = None
for r in results:
newopt.init_img = r[0]
print(f"\t generating variant for {newopt.init_img}")
for j in range(0, opt.variants):
try:
variantResults = t2i.img2img(**vars(newopt))
allVariantResults.append([newopt,variantResults])
except AssertionError as e:
print(e)
continue
print(f"{opt.variants} Variants generated!")
print("Outputs:")
write_log_message(t2i,opt,results,log)
if allVariantResults:
print("Variant outputs:")
for vr in allVariantResults:
write_log_message(t2i,vr[0],vr[1],log)
print("goodbye!")
@ -236,6 +260,8 @@ def _reconstruct_switches(t2i,opt):
switches.append(f'-H{opt.height or t2i.height}')
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
switches.append(f'-m{t2i.sampler_name}')
if opt.variants:
switches.append(f'-v{opt.variants}')
if opt.init_img:
switches.append(f'-I{opt.init_img}')
if opt.strength and opt.init_img is not None:
@ -307,8 +333,9 @@ def create_cmd_parser():
parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale")
parser.add_argument('-g','--grid',action='store_true',help="generate a grid")
parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)")
parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)")
parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")
return parser
@ -317,7 +344,7 @@ if readline_available:
readline.set_completer(Completer(['cd','pwd',
'--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b',
'--width','-W','--height','-H','--cfg_scale','-C','--grid','-g',
'--individual','-i','--init_img','-I','--strength','-f']).complete)
'--individual','-i','--init_img','-I','--strength','-f','-v','--variants']).complete)
readline.set_completer_delims(" ")
readline.parse_and_bind('tab: complete')
load_history()