diff --git a/.gitignore b/.gitignore index e05fa41e06..d3f70b165c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# ignore default image save location and model symbolic link +outputs/ +models/ldm/stable-diffusion-v1/model.ckpt + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -6,6 +10,10 @@ __pycache__/ # C extensions *.so +# emacs autosave files +*~ +#* + # Distribution / packaging .Python build/ @@ -20,6 +28,7 @@ parts/ sdist/ var/ wheels/ +pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg @@ -86,6 +95,7 @@ ipython_config.py # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version +.python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. @@ -109,7 +119,7 @@ ipython_config.py # https://pdm.fming.dev/#use-with-ide .pdm.toml -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +# PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff @@ -159,8 +169,7 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -**/*.ckpt src/ logs/ **/__pycache__/ -outputs \ No newline at end of file +outputs diff --git a/ldm/models/diffusion/ddim.py b/ldm/models/diffusion/ddim.py index 065b32986a..2ebaeabd22 100644 --- a/ldm/models/diffusion/ddim.py +++ b/ldm/models/diffusion/ddim.py @@ -17,9 +17,6 @@ class DDIMSampler(object): self.schedule = schedule def register_buffer(self, name, attr): - if type(attr) == torch.Tensor: - if attr.device != torch.device("cuda"): - attr = attr.to(torch.device("cuda")) setattr(self, name, attr) def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py index 71f24850ad..5d09f023f3 100644 --- a/ldm/models/diffusion/plms.py +++ b/ldm/models/diffusion/plms.py @@ -16,9 +16,6 @@ class PLMSSampler(object): self.schedule = schedule def register_buffer(self, name, attr): - if type(attr) == torch.Tensor: - if attr.device != torch.device("cuda"): - attr = attr.to(torch.device("cuda")) setattr(self, name, attr) def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True): diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index 8782d621d3..231ea887c0 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -123,7 +123,8 @@ The vast majority of these arguments default to reasonable values. full_precision=False, strength=0.75, # default in scripts/img2img.py embedding_path=None, - latent_diffusion_weights=False # just to keep track of this parameter when regenerating prompt + latent_diffusion_weights=False, # just to keep track of this parameter when regenerating prompt + device='cuda' ): self.outdir = outdir self.batch_size = batch_size @@ -147,11 +148,13 @@ The vast majority of these arguments default to reasonable values. self.model = None # empty for now self.sampler = None self.latent_diffusion_weights=latent_diffusion_weights + self.device = device if seed is None: self.seed = self._new_seed() else: self.seed = seed + @torch.no_grad() def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,init_img=None,skip_normalize=False): @@ -206,69 +209,67 @@ The vast majority of these arguments default to reasonable values. # Gawd. Too many levels of indent here. Need to refactor into smaller routines! try: - with torch.no_grad(): - with precision_scope("cuda"): - with model.ema_scope(): - all_samples = list() - for n in trange(iterations, desc="Sampling"): - seed_everything(seed) - for prompts in tqdm(data, desc="data", dynamic_ncols=True): - uc = None - if cfg_scale != 1.0: - uc = model.get_learned_conditioning(batch_size * [""]) - if isinstance(prompts, tuple): - prompts = list(prompts) + with precision_scope(self.device.type), model.ema_scope(): + all_samples = list() + for n in trange(iterations, desc="Sampling"): + seed_everything(seed) + for prompts in tqdm(data, desc="data", dynamic_ncols=True): + uc = None + if cfg_scale != 1.0: + uc = model.get_learned_conditioning(batch_size * [""]) + if isinstance(prompts, tuple): + prompts = list(prompts) - # weighted sub-prompts - subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) - if len(subprompts) > 1: - # i dont know if this is correct.. but it works - c = torch.zeros_like(uc) - # get total weight for normalizing - totalWeight = sum(weights) - # normalize each "sub prompt" and add it - for i in range(0,len(subprompts)): - weight = weights[i] - if not skip_normalize: - weight = weight / totalWeight - c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) - else: # just standard 1 prompt - c = model.get_learned_conditioning(prompts) + # weighted sub-prompts + subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) + if len(subprompts) > 1: + # i dont know if this is correct.. but it works + c = torch.zeros_like(uc) + # get total weight for normalizing + totalWeight = sum(weights) + # normalize each "sub prompt" and add it + for i in range(0,len(subprompts)): + weight = weights[i] + if not skip_normalize: + weight = weight / totalWeight + c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) + else: # just standard 1 prompt + c = model.get_learned_conditioning(prompts) - shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor] - samples_ddim, _ = sampler.sample(S=steps, - conditioning=c, - batch_size=batch_size, - shape=shape, - verbose=False, - unconditional_guidance_scale=cfg_scale, - unconditional_conditioning=uc, - eta=ddim_eta, - x_T=start_code) + shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor] + samples_ddim, _ = sampler.sample(S=steps, + conditioning=c, + batch_size=batch_size, + shape=shape, + verbose=False, + unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc, + eta=ddim_eta, + x_T=start_code) - x_samples_ddim = model.decode_first_stage(samples_ddim) - x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) + x_samples_ddim = model.decode_first_stage(samples_ddim) + x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) - if not grid: - for x_sample in x_samples_ddim: - x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') - filename = self._unique_filename(outdir,previousname=filename, - seed=seed,isbatch=(batch_size>1)) - assert not os.path.exists(filename) - Image.fromarray(x_sample.astype(np.uint8)).save(filename) - images.append([filename,seed]) - else: - all_samples.append(x_samples_ddim) - seeds.append(seed) + if not grid: + for x_sample in x_samples_ddim: + x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') + filename = self._unique_filename(outdir,previousname=filename, + seed=seed,isbatch=(batch_size>1)) + assert not os.path.exists(filename) + Image.fromarray(x_sample.astype(np.uint8)).save(filename) + images.append([filename,seed]) + else: + all_samples.append(x_samples_ddim) + seeds.append(seed) - image_count += 1 - seed = self._new_seed() - if grid: - images = self._make_grid(samples=all_samples, - seeds=seeds, - batch_size=batch_size, - iterations=iterations, - outdir=outdir) + image_count += 1 + seed = self._new_seed() + if grid: + images = self._make_grid(samples=all_samples, + seeds=seeds, + batch_size=batch_size, + iterations=iterations, + outdir=outdir) except KeyboardInterrupt: print('*interrupted*') print('Partial results will be returned; if --grid was requested, nothing will be returned.') @@ -281,6 +282,7 @@ The vast majority of these arguments default to reasonable values. return images # There is lots of shared code between this and txt2img and should be refactored. + @torch.no_grad() def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, cfg_scale=None,ddim_eta=None,strength=None,embedding_path=None,skip_normalize=False): @@ -331,7 +333,7 @@ The vast majority of these arguments default to reasonable values. assert os.path.isfile(init_img) init_image = self._load_img(init_img).to(self.device) init_image = repeat(init_image, '1 ... -> b ...', b=batch_size) - with precision_scope("cuda"): + with precision_scope(self.device.type): init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space sampler.make_schedule(ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False) @@ -353,63 +355,61 @@ The vast majority of these arguments default to reasonable values. # Gawd. Too many levels of indent here. Need to refactor into smaller routines! try: - with torch.no_grad(): - with precision_scope("cuda"): - with model.ema_scope(): - all_samples = list() - for n in trange(iterations, desc="Sampling"): - seed_everything(seed) - for prompts in tqdm(data, desc="data", dynamic_ncols=True): - uc = None - if cfg_scale != 1.0: - uc = model.get_learned_conditioning(batch_size * [""]) - if isinstance(prompts, tuple): - prompts = list(prompts) + with precision_scope(self.device.type), model.ema_scope(): + all_samples = list() + for n in trange(iterations, desc="Sampling"): + seed_everything(seed) + for prompts in tqdm(data, desc="data", dynamic_ncols=True): + uc = None + if cfg_scale != 1.0: + uc = model.get_learned_conditioning(batch_size * [""]) + if isinstance(prompts, tuple): + prompts = list(prompts) - # weighted sub-prompts - subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) - if len(subprompts) > 1: - # i dont know if this is correct.. but it works - c = torch.zeros_like(uc) - # get total weight for normalizing - totalWeight = sum(weights) - # normalize each "sub prompt" and add it - for i in range(0,len(subprompts)): - weight = weights[i] - if not skip_normalize: - weight = weight / totalWeight - c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) - else: # just standard 1 prompt - c = model.get_learned_conditioning(prompts) + # weighted sub-prompts + subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) + if len(subprompts) > 1: + # i dont know if this is correct.. but it works + c = torch.zeros_like(uc) + # get total weight for normalizing + totalWeight = sum(weights) + # normalize each "sub prompt" and add it + for i in range(0,len(subprompts)): + weight = weights[i] + if not skip_normalize: + weight = weight / totalWeight + c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) + else: # just standard 1 prompt + c = model.get_learned_conditioning(prompts) - # encode (scaled latent) - z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device)) - # decode it - samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale, - unconditional_conditioning=uc,) + # encode (scaled latent) + z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device)) + # decode it + samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc,) - x_samples = model.decode_first_stage(samples) - x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) + x_samples = model.decode_first_stage(samples) + x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) - if not grid: - for x_sample in x_samples: - x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') - filename = self._unique_filename(outdir,previousname=filename, - seed=seed,isbatch=(batch_size>1)) - assert not os.path.exists(filename) - Image.fromarray(x_sample.astype(np.uint8)).save(filename) - images.append([filename,seed]) - else: - all_samples.append(x_samples) - seeds.append(seed) - image_count +=1 - seed = self._new_seed() - if grid: - images = self._make_grid(samples=all_samples, - seeds=seeds, - batch_size=batch_size, - iterations=iterations, - outdir=outdir) + if not grid: + for x_sample in x_samples: + x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') + filename = self._unique_filename(outdir,previousname=filename, + seed=seed,isbatch=(batch_size>1)) + assert not os.path.exists(filename) + Image.fromarray(x_sample.astype(np.uint8)).save(filename) + images.append([filename,seed]) + else: + all_samples.append(x_samples) + seeds.append(seed) + image_count +=1 + seed = self._new_seed() + if grid: + images = self._make_grid(samples=all_samples, + seeds=seeds, + batch_size=batch_size, + iterations=iterations, + outdir=outdir) except KeyboardInterrupt: print('*interrupted*') @@ -448,11 +448,13 @@ The vast majority of these arguments default to reasonable values. seed_everything(self.seed) try: config = OmegaConf.load(self.config) - self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + self.device = torch.device(self.device) if torch.cuda.is_available() else torch.device("cpu") model = self._load_model_from_config(config,self.weights) if self.embedding_path is not None: model.embedding_manager.load(self.embedding_path) self.model = model.to(self.device) + # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here + self.model.cond_stage_model.device = self.device except AttributeError: raise SystemExit @@ -489,7 +491,6 @@ The vast majority of these arguments default to reasonable values. sd = pl_sd["state_dict"] model = instantiate_from_config(config.model) m, u = model.load_state_dict(sd, strict=False) - model.cuda() model.eval() if self.full_precision: print('Using slower but more accurate full-precision math (--full_precision)') diff --git a/scripts/dream.py b/scripts/dream.py index 222949d6dd..f7452d9e48 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -60,7 +60,8 @@ def main(): full_precision=opt.full_precision, config=config, latent_diffusion_weights=opt.laion400m, # this is solely for recreating the prompt - embedding_path=opt.embedding_path + embedding_path=opt.embedding_path, + device=opt.device ) # make sure the output directory exists @@ -282,10 +283,14 @@ def create_argv_parser(): type=str, default="outputs/img-samples", help="directory in which to place generated images and a log of prompts and seeds") - parser.add_argument('--embedding_path', type=str, help="Path to a pre-trained embedding manager checkpoint - can only be set on command line") + parser.add_argument('--device', + '-d', + type=str, + default="cuda", + help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if avalible") return parser diff --git a/src/clip b/src/clip new file mode 160000 index 0000000000..d50d76daa6 --- /dev/null +++ b/src/clip @@ -0,0 +1 @@ +Subproject commit d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 diff --git a/src/k-diffusion b/src/k-diffusion new file mode 160000 index 0000000000..db57990687 --- /dev/null +++ b/src/k-diffusion @@ -0,0 +1 @@ +Subproject commit db5799068749bf3a6d5845120ed32df16b7d883b diff --git a/src/taming-transformers b/src/taming-transformers new file mode 160000 index 0000000000..24268930bf --- /dev/null +++ b/src/taming-transformers @@ -0,0 +1 @@ +Subproject commit 24268930bf1dce879235a7fddd0b2355b84d7ea6