mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
tweaked documentation and comments slightly
This commit is contained in:
commit
1eec6b776b
22
README.md
22
README.md
@ -127,11 +127,15 @@ samples, samples scaled for a sample of the prompt and one with the init word pr
|
|||||||
|
|
||||||
On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.
|
On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.
|
||||||
|
|
||||||
Note: According to the associated paper, the optimal number of images is 3-5 any more images than that and your model might not converge.
|
Note: According to the associated paper, the optimal number of images
|
||||||
|
is 3-5 any more images than that and your model might not converge.
|
||||||
|
|
||||||
Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you fine a low loss epoch or around ~5000 iterations.
|
Training will run indefinately, but you may wish to stop it before the
|
||||||
|
heat death of the universe, when you fine a low loss epoch or around
|
||||||
|
~5000 iterations.
|
||||||
|
|
||||||
Once the model is trained, specify the trained .pt file when starting dream using
|
Once the model is trained, specify the trained .pt file when starting
|
||||||
|
dream using
|
||||||
|
|
||||||
~~~~
|
~~~~
|
||||||
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
|
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
|
||||||
@ -156,13 +160,17 @@ It's also possible to train multiple tokens (modify the placeholder string in co
|
|||||||
--output_path /path/to/output/embedding.pt
|
--output_path /path/to/output/embedding.pt
|
||||||
~~~~
|
~~~~
|
||||||
|
|
||||||
Credit goes to @rinongal and the repository located at https://github.com/rinongal/textual_inversion Please see the repository and associated paper for details and limitations.
|
Credit goes to @rinongal and the repository located at
|
||||||
|
https://github.com/rinongal/textual_inversion Please see the
|
||||||
|
repository and associated paper for details and limitations.
|
||||||
|
|
||||||
## Changes
|
## Changes
|
||||||
|
|
||||||
* v1.08 (24 August 2022)
|
* v1.08 (24 August 2022)
|
||||||
* Escape single quotes on the dream> command before trying to parse. This avoids
|
* Escape single quotes on the dream> command before trying to parse. This avoids
|
||||||
parse errors.
|
parse errors.
|
||||||
|
* A new -v option allows you to generate multiple variants of an initial image
|
||||||
|
in img2img mode. (kudos to Oceanswave)
|
||||||
* Removed instruction to get Python3.8 as first step in Windows install.
|
* Removed instruction to get Python3.8 as first step in Windows install.
|
||||||
Anaconda3 does it for you.
|
Anaconda3 does it for you.
|
||||||
* Added bounds checks for numeric arguments that could cause crashes.
|
* Added bounds checks for numeric arguments that could cause crashes.
|
||||||
@ -439,7 +447,11 @@ to send me an email if you use and like the script.
|
|||||||
|
|
||||||
*Original Author:* Lincoln D. Stein <lincoln.stein@gmail.com>
|
*Original Author:* Lincoln D. Stein <lincoln.stein@gmail.com>
|
||||||
|
|
||||||
*Contributions by:* [Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), [xraxra](https://github.com/xraxra), and [bmaltais](https://github.com/bmaltais)
|
*Contributions by:*
|
||||||
|
[Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison),
|
||||||
|
[xraxra](https://github.com/xraxra), [bmaltais](https://github.com/bmaltais), [Sean McLellan] (https://github.com/Oceanswave],
|
||||||
|
[nicolai256](https://github.com/nicolai256], [Benjamin Warner](https://github.com/warner-benjamin),
|
||||||
|
and [tildebyte](https://github.com/tildebyte)
|
||||||
|
|
||||||
Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein)
|
Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein)
|
||||||
|
|
||||||
|
8
TODO.txt
8
TODO.txt
@ -11,6 +11,14 @@ Feature requests:
|
|||||||
|
|
||||||
5. Support for inpainting masks #68.
|
5. Support for inpainting masks #68.
|
||||||
|
|
||||||
|
6. Support for loading variations of the stable-diffusion
|
||||||
|
weights #49
|
||||||
|
|
||||||
|
7. Support for klms and other non-ddim samplers in img2img() #36
|
||||||
|
|
||||||
|
8. Pass a shell command to open up an image viewer on the last
|
||||||
|
batch of images generated #29.
|
||||||
|
|
||||||
Code Refactorization:
|
Code Refactorization:
|
||||||
|
|
||||||
1. Move the PNG file generation code out of simplet2i and into
|
1. Move the PNG file generation code out of simplet2i and into
|
||||||
|
@ -10,16 +10,17 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak
|
|||||||
|
|
||||||
|
|
||||||
class DDIMSampler(object):
|
class DDIMSampler(object):
|
||||||
def __init__(self, model, schedule="linear", **kwargs):
|
def __init__(self, model, schedule="linear", device="cuda", **kwargs):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.model = model
|
self.model = model
|
||||||
self.ddpm_num_timesteps = model.num_timesteps
|
self.ddpm_num_timesteps = model.num_timesteps
|
||||||
self.schedule = schedule
|
self.schedule = schedule
|
||||||
|
self.device = device
|
||||||
|
|
||||||
def register_buffer(self, name, attr):
|
def register_buffer(self, name, attr):
|
||||||
if type(attr) == torch.Tensor:
|
if type(attr) == torch.Tensor:
|
||||||
if attr.device != torch.device("cuda"):
|
if attr.device != torch.device(self.device):
|
||||||
attr = attr.to(torch.device("cuda"))
|
attr = attr.to(torch.device(self.device))
|
||||||
setattr(self, name, attr)
|
setattr(self, name, attr)
|
||||||
|
|
||||||
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
||||||
|
@ -9,13 +9,18 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak
|
|||||||
|
|
||||||
|
|
||||||
class PLMSSampler(object):
|
class PLMSSampler(object):
|
||||||
def __init__(self, model, schedule="linear", **kwargs):
|
def __init__(self, model, schedule="linear", device="cuda", **kwargs):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.model = model
|
self.model = model
|
||||||
self.ddpm_num_timesteps = model.num_timesteps
|
self.ddpm_num_timesteps = model.num_timesteps
|
||||||
self.schedule = schedule
|
self.schedule = schedule
|
||||||
|
self.device = device
|
||||||
|
|
||||||
def register_buffer(self, name, attr):
|
def register_buffer(self, name, attr):
|
||||||
|
if type(attr) == torch.Tensor:
|
||||||
|
if attr.device != torch.device(self.device):
|
||||||
|
attr = attr.to(torch.device(self.device))
|
||||||
|
|
||||||
setattr(self, name, attr)
|
setattr(self, name, attr)
|
||||||
|
|
||||||
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
||||||
|
@ -58,7 +58,6 @@ import sys
|
|||||||
import os
|
import os
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import PIL
|
|
||||||
from tqdm import tqdm, trange
|
from tqdm import tqdm, trange
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from einops import rearrange, repeat
|
from einops import rearrange, repeat
|
||||||
@ -158,7 +157,8 @@ The vast majority of these arguments default to reasonable values.
|
|||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None,
|
def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None,
|
||||||
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
|
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
|
||||||
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,skip_normalize=False):
|
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,
|
||||||
|
skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed
|
||||||
"""
|
"""
|
||||||
Generate an image from the prompt, writing iteration images into the outdir
|
Generate an image from the prompt, writing iteration images into the outdir
|
||||||
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
|
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
|
||||||
@ -286,7 +286,8 @@ The vast majority of these arguments default to reasonable values.
|
|||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None,
|
def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None,
|
||||||
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
|
steps=None,seed=None,grid=None,individual=None,width=None,height=None,
|
||||||
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,skip_normalize=False):
|
cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,
|
||||||
|
skip_normalize=False,variants=None): # note the "variants" option is an unused hack caused by how options are passed
|
||||||
"""
|
"""
|
||||||
Generate an image from the prompt and the initial image, writing iteration images into the outdir
|
Generate an image from the prompt and the initial image, writing iteration images into the outdir
|
||||||
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
|
The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
|
||||||
@ -324,7 +325,7 @@ The vast majority of these arguments default to reasonable values.
|
|||||||
# PLMS sampler not supported yet, so ignore previous sampler
|
# PLMS sampler not supported yet, so ignore previous sampler
|
||||||
if self.sampler_name!='ddim':
|
if self.sampler_name!='ddim':
|
||||||
print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler")
|
print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler")
|
||||||
sampler = DDIMSampler(model)
|
sampler = DDIMSampler(model, device=self.device)
|
||||||
else:
|
else:
|
||||||
sampler = self.sampler
|
sampler = self.sampler
|
||||||
|
|
||||||
@ -462,9 +463,9 @@ The vast majority of these arguments default to reasonable values.
|
|||||||
|
|
||||||
msg = f'setting sampler to {self.sampler_name}'
|
msg = f'setting sampler to {self.sampler_name}'
|
||||||
if self.sampler_name=='plms':
|
if self.sampler_name=='plms':
|
||||||
self.sampler = PLMSSampler(self.model)
|
self.sampler = PLMSSampler(self.model, device=self.device)
|
||||||
elif self.sampler_name == 'ddim':
|
elif self.sampler_name == 'ddim':
|
||||||
self.sampler = DDIMSampler(self.model)
|
self.sampler = DDIMSampler(self.model, device=self.device)
|
||||||
elif self.sampler_name == 'k_dpm_2_a':
|
elif self.sampler_name == 'k_dpm_2_a':
|
||||||
self.sampler = KSampler(self.model,'dpm_2_ancestral')
|
self.sampler = KSampler(self.model,'dpm_2_ancestral')
|
||||||
elif self.sampler_name == 'k_dpm_2':
|
elif self.sampler_name == 'k_dpm_2':
|
||||||
@ -479,7 +480,7 @@ The vast majority of these arguments default to reasonable values.
|
|||||||
self.sampler = KSampler(self.model,'lms')
|
self.sampler = KSampler(self.model,'lms')
|
||||||
else:
|
else:
|
||||||
msg = f'unsupported sampler {self.sampler_name}, defaulting to plms'
|
msg = f'unsupported sampler {self.sampler_name}, defaulting to plms'
|
||||||
self.sampler = PLMSSampler(self.model)
|
self.sampler = PLMSSampler(self.model, device=self.device)
|
||||||
|
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
@ -506,7 +507,7 @@ The vast majority of these arguments default to reasonable values.
|
|||||||
w, h = image.size
|
w, h = image.size
|
||||||
print(f"loaded input image of size ({w}, {h}) from {path}")
|
print(f"loaded input image of size ({w}, {h}) from {path}")
|
||||||
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
|
w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
|
||||||
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
|
image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
|
||||||
image = np.array(image).astype(np.float32) / 255.0
|
image = np.array(image).astype(np.float32) / 255.0
|
||||||
image = image[None].transpose(0, 3, 1, 2)
|
image = image[None].transpose(0, 3, 1, 2)
|
||||||
image = torch.from_numpy(image)
|
image = torch.from_numpy(image)
|
||||||
|
@ -6,6 +6,7 @@ import shlex
|
|||||||
import atexit
|
import atexit
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import copy
|
||||||
from PIL import Image,PngImagePlugin
|
from PIL import Image,PngImagePlugin
|
||||||
|
|
||||||
# readline unavailable on windows systems
|
# readline unavailable on windows systems
|
||||||
@ -177,9 +178,32 @@ def main_loop(t2i,parser,log,infile):
|
|||||||
print(e)
|
print(e)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
allVariantResults = []
|
||||||
|
if opt.variants is not None:
|
||||||
|
print(f"Generating {opt.variants} variant(s)...")
|
||||||
|
newopt = copy.deepcopy(opt)
|
||||||
|
newopt.variants = None
|
||||||
|
for r in results:
|
||||||
|
newopt.init_img = r[0]
|
||||||
|
print(f"\t generating variant for {newopt.init_img}")
|
||||||
|
for j in range(0, opt.variants):
|
||||||
|
try:
|
||||||
|
variantResults = t2i.img2img(**vars(newopt))
|
||||||
|
allVariantResults.append([newopt,variantResults])
|
||||||
|
except AssertionError as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
print(f"{opt.variants} Variants generated!")
|
||||||
|
|
||||||
print("Outputs:")
|
print("Outputs:")
|
||||||
write_log_message(t2i,opt,results,log)
|
write_log_message(t2i,opt,results,log)
|
||||||
|
|
||||||
|
if allVariantResults:
|
||||||
|
print("Variant outputs:")
|
||||||
|
for vr in allVariantResults:
|
||||||
|
write_log_message(t2i,vr[0],vr[1],log)
|
||||||
|
|
||||||
|
|
||||||
print("goodbye!")
|
print("goodbye!")
|
||||||
|
|
||||||
@ -236,6 +260,8 @@ def _reconstruct_switches(t2i,opt):
|
|||||||
switches.append(f'-H{opt.height or t2i.height}')
|
switches.append(f'-H{opt.height or t2i.height}')
|
||||||
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
|
switches.append(f'-C{opt.cfg_scale or t2i.cfg_scale}')
|
||||||
switches.append(f'-m{t2i.sampler_name}')
|
switches.append(f'-m{t2i.sampler_name}')
|
||||||
|
if opt.variants:
|
||||||
|
switches.append(f'-v{opt.variants}')
|
||||||
if opt.init_img:
|
if opt.init_img:
|
||||||
switches.append(f'-I{opt.init_img}')
|
switches.append(f'-I{opt.init_img}')
|
||||||
if opt.strength and opt.init_img is not None:
|
if opt.strength and opt.init_img is not None:
|
||||||
@ -307,8 +333,9 @@ def create_cmd_parser():
|
|||||||
parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale")
|
parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale")
|
||||||
parser.add_argument('-g','--grid',action='store_true',help="generate a grid")
|
parser.add_argument('-g','--grid',action='store_true',help="generate a grid")
|
||||||
parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
|
parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
|
||||||
parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)")
|
parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)")
|
||||||
parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
|
parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
|
||||||
|
parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
|
||||||
parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")
|
parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
@ -317,7 +344,7 @@ if readline_available:
|
|||||||
readline.set_completer(Completer(['cd','pwd',
|
readline.set_completer(Completer(['cd','pwd',
|
||||||
'--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b',
|
'--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b',
|
||||||
'--width','-W','--height','-H','--cfg_scale','-C','--grid','-g',
|
'--width','-W','--height','-H','--cfg_scale','-C','--grid','-g',
|
||||||
'--individual','-i','--init_img','-I','--strength','-f']).complete)
|
'--individual','-i','--init_img','-I','--strength','-f','-v','--variants']).complete)
|
||||||
readline.set_completer_delims(" ")
|
readline.set_completer_delims(" ")
|
||||||
readline.parse_and_bind('tab: complete')
|
readline.parse_and_bind('tab: complete')
|
||||||
load_history()
|
load_history()
|
||||||
|
Loading…
Reference in New Issue
Block a user