From 41f0afbcb67a1f56bf0d612c647b4a72e533130b Mon Sep 17 00:00:00 2001 From: henry Date: Sat, 20 Aug 2022 22:28:29 -0500 Subject: [PATCH 1/5] add klms sampling --- environment.yaml | 2 ++ scripts/dream.py | 2 +- scripts/txt2img.py | 63 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/environment.yaml b/environment.yaml index 7f25da800a..2ac2596575 100644 --- a/environment.yaml +++ b/environment.yaml @@ -24,6 +24,8 @@ dependencies: - transformers==4.19.2 - torchmetrics==0.6.0 - kornia==0.6 + - accelerate==0.12.0 + - git+https://github.com/crowsonkb/k-diffusion.git@master - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - -e git+https://github.com/openai/CLIP.git@main#egg=clip - -e . diff --git a/scripts/dream.py b/scripts/dream.py index cc7614980f..322c4e3a82 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -169,7 +169,7 @@ def create_argv_parser(): default=1, help="number of images to produce per iteration (currently not working properly - producing too many images)") parser.add_argument('--sampler', - choices=['plms','ddim'], + choices=['plms','ddim', 'klms'], default='plms', help="which sampler to use") parser.add_argument('-o', diff --git a/scripts/txt2img.py b/scripts/txt2img.py index da77e1a03e..42d5e83496 100644 --- a/scripts/txt2img.py +++ b/scripts/txt2img.py @@ -12,6 +12,10 @@ from pytorch_lightning import seed_everything from torch import autocast from contextlib import contextmanager, nullcontext +import accelerate +import k_diffusion as K +import torch.nn as nn + from ldm.util import instantiate_from_config from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler @@ -80,6 +84,11 @@ def main(): action='store_true', help="use plms sampling", ) + parser.add_argument( + "--klms", + action='store_true', + help="use klms sampling", + ) parser.add_argument( "--laion400m", action='store_true', @@ -190,6 +199,22 @@ def main(): device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model = model.to(device) + #for klms + model_wrap = K.external.CompVisDenoiser(model) + accelerator = accelerate.Accelerator() + device = accelerator.device + class CFGDenoiser(nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + + def forward(self, x, sigma, uncond, cond, cond_scale): + x_in = torch.cat([x] * 2) + sigma_in = torch.cat([sigma] * 2) + cond_in = torch.cat([uncond, cond]) + uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) + return uncond + (cond - uncond) * cond_scale + if opt.plms: sampler = PLMSSampler(model) else: @@ -226,8 +251,8 @@ def main(): with model.ema_scope(): tic = time.time() all_samples = list() - for n in trange(opt.n_iter, desc="Sampling"): - for prompts in tqdm(data, desc="data"): + for n in trange(opt.n_iter, desc="Sampling", disable =not accelerator.is_main_process): + for prompts in tqdm(data, desc="data", disable =not accelerator.is_main_process): uc = None if opt.scale != 1.0: uc = model.get_learned_conditioning(batch_size * [""]) @@ -235,18 +260,32 @@ def main(): prompts = list(prompts) c = model.get_learned_conditioning(prompts) shape = [opt.C, opt.H // opt.f, opt.W // opt.f] - samples_ddim, _ = sampler.sample(S=opt.ddim_steps, - conditioning=c, - batch_size=opt.n_samples, - shape=shape, - verbose=False, - unconditional_guidance_scale=opt.scale, - unconditional_conditioning=uc, - eta=opt.ddim_eta, - x_T=start_code) - + + if not opt.klms: + samples_ddim, _ = sampler.sample(S=opt.ddim_steps, + conditioning=c, + batch_size=opt.n_samples, + shape=shape, + verbose=False, + unconditional_guidance_scale=opt.scale, + unconditional_conditioning=uc, + eta=opt.ddim_eta, + x_T=start_code) + else: + sigmas = model_wrap.get_sigmas(opt.ddim_steps) + if start_code: + x = start_code + else: + x = torch.randn([opt.n_samples, *shape], device=device) * sigmas[0] # for GPU draw + model_wrap_cfg = CFGDenoiser(model_wrap) + extra_args = {'cond': c, 'uncond': uc, 'cond_scale': opt.scale} + samples_ddim = K.sampling.sample_lms(model_wrap_cfg, x, sigmas, extra_args=extra_args, disable=not accelerator.is_main_process) + x_samples_ddim = model.decode_first_stage(samples_ddim) x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) + + if opt.klms: + x_sample = accelerator.gather(x_samples_ddim) if not opt.skip_save: for x_sample in x_samples_ddim: From bb91ca0462f12ebd0b9669be0d45e26a21ac5ecb Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 21 Aug 2022 17:09:00 -0400 Subject: [PATCH 2/5] first attempt to fold k_lms changes proposed by hwharrison and bmaltais --- ldm/models/diffusion/ksampler.py | 64 ++++++++++++++++++++++++++++++++ ldm/simplet2i.py | 12 ++++-- 2 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 ldm/models/diffusion/ksampler.py diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py new file mode 100644 index 0000000000..39c0fdf542 --- /dev/null +++ b/ldm/models/diffusion/ksampler.py @@ -0,0 +1,64 @@ +'''wrapper around part of Karen Crownson's k-duffsion library, making it call compatible with other Samplers''' +import k_diffusion as K +import torch.nn as nn + +class CFGDenoiser(nn.Module): + def __init__(self, model): + super().__init__() + self.inner_model = model + + def forward(self, x, sigma, uncond, cond, cond_scale): + x_in = torch.cat([x] * 2) + sigma_in = torch.cat([sigma] * 2) + cond_in = torch.cat([uncond, cond]) + uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) + return uncond + (cond - uncond) * cond_scale + +class KSampler(object): + def __init__(self,model,schedule="lms", **kwargs): + super().__init__() + self.model = K.external.CompVisDenoiser(model) + self.accelerator = accelerate.Accelerator() + self.device = accelerator.device + self.schedule = schedule + + # most of these arguments are ignored and are only present for compatibility with + # other samples + @torch.no_grad() + def sample(self, + S, + batch_size, + shape, + conditioning=None, + callback=None, + normals_sequence=None, + img_callback=None, + quantize_x0=False, + eta=0., + mask=None, + x0=None, + temperature=1., + noise_dropout=0., + score_corrector=None, + corrector_kwargs=None, + verbose=True, + x_T=None, + log_every_t=100, + unconditional_guidance_scale=1., + unconditional_conditioning=None, + # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ... + **kwargs + ): + + sigmas = self.model.get_sigmas(S) + if x_T: + x = x_T + else: + x = torch.randn([batch_size, *shape], device=device) * sigmas[0] # for GPU draw + model_wrap_cfg = CFGDenoiser(self.model) + extra_args = {'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': unconditional_guidance_scale} + return (K.sampling.sample_lms(model_wrap_cfg, x, sigmas, extra_args=extra_args, disable=not accelerator.is_main_process), + None) + + def gather(samples_ddim): + return self.accelerator.gather(samples_ddim) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 796a99396b..6f740d1f83 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -11,7 +11,7 @@ t2i = T2I(outdir = // outputs/txt2img-samples batch_size = // how many images to generate per sampling (1) steps = // 50 seed = // current system time - sampler = ['ddim','plms'] // ddim + sampler = ['ddim','plms','klms'] // klms grid = // false width = // image width, multiple of 64 (512) height = // image height, multiple of 64 (512) @@ -62,8 +62,9 @@ import time import math from ldm.util import instantiate_from_config -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.models.diffusion.plms import PLMSSampler +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.plms import PLMSSampler +from ldm.models.diffusion.ksampler import KSampler class T2I: """T2I class @@ -101,7 +102,7 @@ class T2I: cfg_scale=7.5, weights="models/ldm/stable-diffusion-v1/model.ckpt", config = "configs/latent-diffusion/txt2img-1p4B-eval.yaml", - sampler="plms", + sampler="klms", latent_channels=4, downsampling_factor=8, ddim_eta=0.0, # deterministic @@ -387,6 +388,9 @@ class T2I: elif self.sampler_name == 'ddim': print("setting sampler to ddim") self.sampler = DDIMSampler(self.model) + elif self.sampler_name == 'klms': + print("setting sampler to klms") + self.sampler = KSampler(self.model,'lms') else: print(f"unsupported sampler {self.sampler_name}, defaulting to plms") self.sampler = PLMSSampler(self.model) From 78aba5b770d6e85e44c730da9735118d10912475 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 21 Aug 2022 19:57:48 -0400 Subject: [PATCH 3/5] preparing for merge into main --- README.md | 8 ++++++++ environment.yaml | 4 ++-- ldm/models/diffusion/ksampler.py | 28 +++++++++++++++++++--------- ldm/simplet2i.py | 9 ++++++++- scripts/dream.py | 12 +++++++++--- scripts/preload_models.py | 1 + 6 files changed, 47 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 5215530a9e..75fae7ca2f 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,14 @@ The --init_img (-I) option gives the path to the seed picture. --strength (-f) c the original will be modified, ranging from 0.0 (keep the original intact), to 1.0 (ignore the original completely). The default is 0.75, and ranges from 0.25-0.75 give interesting results. +## Changes + +*v1.01 (21 August 2022) +-added k_lms sampling **Please run "conda update -f environment.yaml" to load the k_lms dependencies** +-*use half precision arithmetic by default, resulting in faster execution and lower memory requirements +Pass argument --full_precision to dream.py to get slower but more accurate image generation + + ## Installation ### Linux/Mac diff --git a/environment.yaml b/environment.yaml index 2ac2596575..0de05e815a 100644 --- a/environment.yaml +++ b/environment.yaml @@ -25,7 +25,7 @@ dependencies: - torchmetrics==0.6.0 - kornia==0.6 - accelerate==0.12.0 - - git+https://github.com/crowsonkb/k-diffusion.git@master - - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - -e git+https://github.com/openai/CLIP.git@main#egg=clip + - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers + - -e git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion - -e . diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py index 39c0fdf542..cc4677f47e 100644 --- a/ldm/models/diffusion/ksampler.py +++ b/ldm/models/diffusion/ksampler.py @@ -1,12 +1,29 @@ '''wrapper around part of Karen Crownson's k-duffsion library, making it call compatible with other Samplers''' import k_diffusion as K +import torch import torch.nn as nn +import accelerate class CFGDenoiser(nn.Module): def __init__(self, model): super().__init__() self.inner_model = model + def forward(self, x, sigma, uncond, cond, cond_scale): + x_in = torch.cat([x] * 2) + sigma_in = torch.cat([sigma] * 2) + cond_in = torch.cat([uncond, cond]) + uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) + return uncond + (cond - uncond) * cond_scale + +class KSampler(object): + def __init__(self,model,schedule="lms", **kwargs): + super().__init__() + self.model = K.external.CompVisDenoiser(model) + self.accelerator = accelerate.Accelerator() + self.device = self.accelerator.device + self.schedule = schedule + def forward(self, x, sigma, uncond, cond, cond_scale): x_in = torch.cat([x] * 2) sigma_in = torch.cat([sigma] * 2) @@ -14,13 +31,6 @@ class CFGDenoiser(nn.Module): uncond, cond = self.inner_model(x_in, sigma_in, cond=cond_in).chunk(2) return uncond + (cond - uncond) * cond_scale -class KSampler(object): - def __init__(self,model,schedule="lms", **kwargs): - super().__init__() - self.model = K.external.CompVisDenoiser(model) - self.accelerator = accelerate.Accelerator() - self.device = accelerator.device - self.schedule = schedule # most of these arguments are ignored and are only present for compatibility with # other samples @@ -54,10 +64,10 @@ class KSampler(object): if x_T: x = x_T else: - x = torch.randn([batch_size, *shape], device=device) * sigmas[0] # for GPU draw + x = torch.randn([batch_size, *shape], device=self.device) * sigmas[0] # for GPU draw model_wrap_cfg = CFGDenoiser(self.model) extra_args = {'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': unconditional_guidance_scale} - return (K.sampling.sample_lms(model_wrap_cfg, x, sigmas, extra_args=extra_args, disable=not accelerator.is_main_process), + return (K.sampling.sample_lms(model_wrap_cfg, x, sigmas, extra_args=extra_args, disable=not self.accelerator.is_main_process), None) def gather(samples_ddim): diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 6f740d1f83..bcfa928537 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -108,6 +108,7 @@ class T2I: ddim_eta=0.0, # deterministic fixed_code=False, precision='autocast', + full_precision=False, strength=0.75 # default in scripts/img2img.py ): self.outdir = outdir @@ -126,6 +127,7 @@ class T2I: self.downsampling_factor = downsampling_factor self.ddim_eta = ddim_eta self.precision = precision + self.full_precision = full_precision self.strength = strength self.model = None # empty for now self.sampler = None @@ -407,7 +409,12 @@ class T2I: m, u = model.load_state_dict(sd, strict=False) model.cuda() model.eval() - model.half() + if self.full_precision: + print('Using slower but more accurate full precision math') + model.full() + else: + print('Using half precision math. Call with --full_precision to use full precision') + model.half() return model def _load_img(self,path): diff --git a/scripts/dream.py b/scripts/dream.py index cbee183430..b8abb780fd 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import argparse import shlex import atexit @@ -11,7 +12,7 @@ try: except: readline_available = False -debugging = True +debugging = False def main(): ''' Initialize command-line parsers and the diffusion model ''' @@ -49,6 +50,7 @@ def main(): outdir=opt.outdir, sampler=opt.sampler, weights=weights, + full_precision=opt.full_precision, config=config) # make sure the output directory exists @@ -165,14 +167,18 @@ def create_argv_parser(): type=int, default=1, help="number of images to generate") + parser.add_argument('-F','--full_precision', + dest='full_precision', + action='store_true', + help="use slower full precision math for calculations") parser.add_argument('-b','--batch_size', type=int, default=1, help="number of images to produce per iteration (currently not working properly - producing too many images)") parser.add_argument('--sampler', choices=['plms','ddim', 'klms'], - default='plms', - help="which sampler to use") + default='klms', + help="which sampler to use (klms)") parser.add_argument('-o', '--outdir', type=str, diff --git a/scripts/preload_models.py b/scripts/preload_models.py index 7db461bec2..ad1a1eecc5 100644 --- a/scripts/preload_models.py +++ b/scripts/preload_models.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 # Before running stable-diffusion on an internet-isolated machine, # run this script from one with internet connectivity. The # two machines must share a common .cache directory. From 2ace56313cbe273b6fef25ad60cd2b724b29fc95 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 21 Aug 2022 19:59:36 -0400 Subject: [PATCH 4/5] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 75fae7ca2f..131289bab1 100644 --- a/README.md +++ b/README.md @@ -75,9 +75,9 @@ completely). The default is 0.75, and ranges from 0.25-0.75 give interesting res ## Changes -*v1.01 (21 August 2022) --added k_lms sampling **Please run "conda update -f environment.yaml" to load the k_lms dependencies** --*use half precision arithmetic by default, resulting in faster execution and lower memory requirements +- v1.01 (21 August 2022) +* added k_lms sampling **Please run "conda update -f environment.yaml" to load the k_lms dependencies** +* use half precision arithmetic by default, resulting in faster execution and lower memory requirements Pass argument --full_precision to dream.py to get slower but more accurate image generation From f5450bad61b6d754530a64c6a55f981bc7be4d93 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 21 Aug 2022 20:16:31 -0400 Subject: [PATCH 5/5] k_lms sampling working; half precision working, can override with --full_precision --- ldm/simplet2i.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index bcfa928537..e99660a8ab 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -410,8 +410,7 @@ class T2I: model.cuda() model.eval() if self.full_precision: - print('Using slower but more accurate full precision math') - model.full() + print('Using slower but more accurate full-precision math (--full_precision)') else: print('Using half precision math. Call with --full_precision to use full precision') model.half()