tweaked documentation and comments slightly

2025-07-25 21:05:37 +00:00 · 2022-08-24 15:25:52 -04:00
parent 776c747978 ee10021ea2
commit 1eec6b776b
6 changed files with 73 additions and 19 deletions
--- a/README.md
+++ b/README.md
@ -127,11 +127,15 @@ samples, samples scaled for a sample of the prompt and one with the init word pr

 On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.

-Note: According to the associated paper, the optimal number of images is 3-5 any more images than that and your model might not converge.
+Note: According to the associated paper, the optimal number of images
+is 3-5 any more images than that and your model might not converge.

-Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you fine a low loss epoch or around ~5000 iterations.
+Training will run indefinately, but you may wish to stop it before the
+heat death of the universe, when you fine a low loss epoch or around
+~5000 iterations.

-Once the model is trained, specify the trained .pt file when starting dream using
+Once the model is trained, specify the trained .pt file when starting
+dream using

 ~~~~
 (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
@ -156,13 +160,17 @@ It's also possible to train multiple tokens (modify the placeholder string in co
                                            --output_path /path/to/output/embedding.pt
 ~~~~

-Credit goes to @rinongal and the repository located at https://github.com/rinongal/textual_inversion Please see the repository and associated paper for details and limitations.
+Credit goes to @rinongal and the repository located at
+https://github.com/rinongal/textual_inversion Please see the
+repository and associated paper for details and limitations.

 ## Changes

 * v1.08 (24 August 2022)
   * Escape single quotes on the dream> command before trying to parse. This avoids
     parse errors.
+   * A new -v option allows you to generate multiple variants of an initial image
+     in img2img mode. (kudos to Oceanswave)
   * Removed instruction to get Python3.8 as first step in Windows install.
     Anaconda3 does it for you.
   * Added bounds checks for numeric arguments that could cause crashes.
@ -439,7 +447,11 @@ to send me an email if you use and like the script.

 *Original Author:* Lincoln D. Stein <lincoln.stein@gmail.com>

-*Contributions by:* [Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison), [xraxra](https://github.com/xraxra), and [bmaltais](https://github.com/bmaltais)
+*Contributions by:* 
+[Peter Kowalczyk](https://github.com/slix), [Henry Harrison](https://github.com/hwharrison),
+[xraxra](https://github.com/xraxra), [bmaltais](https://github.com/bmaltais), [Sean McLellan] (https://github.com/Oceanswave],
+[nicolai256](https://github.com/nicolai256], [Benjamin Warner](https://github.com/warner-benjamin),
+and [tildebyte](https://github.com/tildebyte)

 Original portions of the software are Copyright (c) 2020 Lincoln D. Stein (https://github.com/lstein)

--- a/TODO.txt
+++ b/TODO.txt
@ -11,6 +11,14 @@ Feature requests:

 5. Support for inpainting masks #68.

+6. Support for loading variations of the stable-diffusion
+   weights #49
+
+7. Support for klms and other non-ddim samplers in img2img() #36
+
+8. Pass a shell command to open up an image viewer on the last
+   batch of images generated #29.
+
 Code Refactorization:

 1. Move the PNG file generation code out of simplet2i and into
--- a/ldm/models/diffusion/ddim.py
+++ b/ldm/models/diffusion/ddim.py
@ -10,16 +10,17 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak


 class DDIMSampler(object):
-    def __init__(self, model, schedule="linear", **kwargs):
+    def __init__(self, model, schedule="linear", device="cuda", **kwargs):
        super().__init__()
        self.model = model
        self.ddpm_num_timesteps = model.num_timesteps
        self.schedule = schedule
+        self.device = device

    def register_buffer(self, name, attr):
        if type(attr) == torch.Tensor:
-            if attr.device != torch.device("cuda"):
-                attr = attr.to(torch.device("cuda"))
+            if attr.device != torch.device(self.device):
+                attr = attr.to(torch.device(self.device))
        setattr(self, name, attr)

    def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
--- a/ldm/models/diffusion/plms.py
+++ b/ldm/models/diffusion/plms.py
@ -9,13 +9,18 @@ from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, mak


 class PLMSSampler(object):
-    def __init__(self, model, schedule="linear", **kwargs):
+    def __init__(self, model, schedule="linear", device="cuda", **kwargs):
        super().__init__()
        self.model = model
        self.ddpm_num_timesteps = model.num_timesteps
        self.schedule = schedule
+        self.device = device

    def register_buffer(self, name, attr):
+        if type(attr) == torch.Tensor:
+            if attr.device != torch.device(self.device):
+                attr = attr.to(torch.device(self.device))
+
        setattr(self, name, attr)

    def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@ -58,7 +58,6 @@ import sys
 import os
 from omegaconf import OmegaConf
 from PIL import Image
-import PIL
 from tqdm import tqdm, trange
 from itertools import islice
 from einops import rearrange, repeat
@ -158,7 +157,8 @@ The vast majority of these arguments default to reasonable values.
    @torch.no_grad()
    def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None,
                steps=None,seed=None,grid=None,individual=None,width=None,height=None,
-                cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,skip_normalize=False):
+                cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,
+                skip_normalize=False,variants=None):    # note the "variants" option is an unused hack caused by how options are passed
        """
        Generate an image from the prompt, writing iteration images into the outdir
        The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
@ -286,7 +286,8 @@ The vast majority of these arguments default to reasonable values.
    @torch.no_grad()
    def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None,
                steps=None,seed=None,grid=None,individual=None,width=None,height=None,
-                cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,skip_normalize=False):
+                cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,
+                skip_normalize=False,variants=None):   # note the "variants" option is an unused hack caused by how options are passed
        """
        Generate an image from the prompt and the initial image, writing iteration images into the outdir
        The output is a list of lists in the format: [[filename1,seed1], [filename2,seed2],...]
@ -324,7 +325,7 @@ The vast majority of these arguments default to reasonable values.
        # PLMS sampler not supported yet, so ignore previous sampler
        if self.sampler_name!='ddim':
            print(f"sampler '{self.sampler_name}' is not yet supported. Using DDM sampler")
-            sampler = DDIMSampler(model)
+            sampler = DDIMSampler(model, device=self.device)
        else:
            sampler = self.sampler

@ -462,9 +463,9 @@ The vast majority of these arguments default to reasonable values.

            msg = f'setting sampler to {self.sampler_name}'
            if self.sampler_name=='plms':
-                self.sampler = PLMSSampler(self.model)
+                self.sampler = PLMSSampler(self.model, device=self.device)
            elif self.sampler_name == 'ddim':
-                self.sampler = DDIMSampler(self.model)
+                self.sampler = DDIMSampler(self.model, device=self.device)
            elif self.sampler_name == 'k_dpm_2_a':
                self.sampler = KSampler(self.model,'dpm_2_ancestral')
            elif self.sampler_name == 'k_dpm_2':
@ -479,7 +480,7 @@ The vast majority of these arguments default to reasonable values.
                self.sampler = KSampler(self.model,'lms')
            else:
                msg = f'unsupported sampler {self.sampler_name}, defaulting to plms'
-                self.sampler = PLMSSampler(self.model)
+                self.sampler = PLMSSampler(self.model, device=self.device)

            print(msg)

@ -506,7 +507,7 @@ The vast majority of these arguments default to reasonable values.
        w, h = image.size
        print(f"loaded input image of size ({w}, {h}) from {path}")
        w, h = map(lambda x: x - x % 32, (w, h))  # resize to integer multiple of 32
-        image = image.resize((w, h), resample=PIL.Image.LANCZOS)
+        image = image.resize((w, h), resample=Image.Resampling.LANCZOS)
        image = np.array(image).astype(np.float32) / 255.0
        image = image[None].transpose(0, 3, 1, 2)
        image = torch.from_numpy(image)
--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -6,6 +6,7 @@ import shlex
 import atexit
 import os
 import sys
+import copy
 from PIL import Image,PngImagePlugin

 # readline unavailable on windows systems
@ -177,9 +178,32 @@ def main_loop(t2i,parser,log,infile):
            print(e)
            continue

+
+        allVariantResults = []
+        if opt.variants is not None:
+            print(f"Generating {opt.variants} variant(s)...")
+            newopt = copy.deepcopy(opt)
+            newopt.variants = None
+            for r in results:
+                newopt.init_img = r[0]
+                print(f"\t generating variant for {newopt.init_img}")
+                for j in range(0, opt.variants):
+                    try:
+                        variantResults = t2i.img2img(**vars(newopt))
+                        allVariantResults.append([newopt,variantResults])
+                    except AssertionError as e:
+                        print(e)
+                        continue
+            print(f"{opt.variants} Variants generated!")
+
        print("Outputs:")
        write_log_message(t2i,opt,results,log)
            
+        if allVariantResults:
+            print("Variant outputs:")
+            for vr in allVariantResults:
+                write_log_message(t2i,vr[0],vr[1],log)
+            

    print("goodbye!")

@ -236,6 +260,8 @@ def _reconstruct_switches(t2i,opt):
    switches.append(f'-H{opt.height       or t2i.height}')
    switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
    switches.append(f'-m{t2i.sampler_name}')
+    if opt.variants:
+        switches.append(f'-v{opt.variants}')
    if opt.init_img:
        switches.append(f'-I{opt.init_img}')
    if opt.strength and opt.init_img is not None:
@ -307,8 +333,9 @@ def create_cmd_parser():
    parser.add_argument('-C','--cfg_scale',default=7.5,type=float,help="prompt configuration scale")
    parser.add_argument('-g','--grid',action='store_true',help="generate a grid")
    parser.add_argument('-i','--individual',action='store_true',help="generate individual files (default)")
-    parser.add_argument('-I','--init_img',type=str,help="path to input image (supersedes width and height)")
+    parser.add_argument('-I','--init_img',type=str,help="path to input image for img2img mode (supersedes width and height)")
    parser.add_argument('-f','--strength',default=0.75,type=float,help="strength for noising/unnoising. 0.0 preserves image exactly, 1.0 replaces it completely")
+    parser.add_argument('-v','--variants',type=int,help="in img2img mode, the first generated image will get passed back to img2img to generate the requested number of variants")
    parser.add_argument('-x','--skip_normalize',action='store_true',help="skip subprompt weight normalization")
    return parser

@ -317,7 +344,7 @@ if readline_available:
        readline.set_completer(Completer(['cd','pwd',
                                          '--steps','-s','--seed','-S','--iterations','-n','--batch_size','-b',
                                          '--width','-W','--height','-H','--cfg_scale','-C','--grid','-g',
-                                          '--individual','-i','--init_img','-I','--strength','-f']).complete)
+                                          '--individual','-i','--init_img','-I','--strength','-f','-v','--variants']).complete)
        readline.set_completer_delims(" ")
        readline.parse_and_bind('tab: complete')
        load_history()