diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..d1cbb0afe6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,133 @@ +# ignore default image save location and model symbolic link +outputs/ +models/ldm/stable-diffusion-v1/model.ckpt + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index c7f6263816..30ce4ff803 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -113,7 +113,8 @@ The vast majority of these arguments default to reasonable values. precision='autocast', full_precision=False, strength=0.75, # default in scripts/img2img.py - latent_diffusion_weights=False # just to keep track of this parameter when regenerating prompt + latent_diffusion_weights=False, # just to keep track of this parameter when regenerating prompt + device='cuda' ): self.outdir = outdir self.batch_size = batch_size @@ -136,11 +137,13 @@ The vast majority of these arguments default to reasonable values. self.model = None # empty for now self.sampler = None self.latent_diffusion_weights=latent_diffusion_weights + self.device = device if seed is None: self.seed = self._new_seed() else: self.seed = seed + @torch.no_grad() def txt2img(self,prompt,outdir=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, cfg_scale=None,ddim_eta=None,strength=None,init_img=None,skip_normalize=False): @@ -191,69 +194,67 @@ The vast majority of these arguments default to reasonable values. # Gawd. Too many levels of indent here. Need to refactor into smaller routines! try: - with torch.no_grad(): - with precision_scope("cuda"): - with model.ema_scope(): - all_samples = list() - for n in trange(iterations, desc="Sampling"): - seed_everything(seed) - for prompts in tqdm(data, desc="data", dynamic_ncols=True): - uc = None - if cfg_scale != 1.0: - uc = model.get_learned_conditioning(batch_size * [""]) - if isinstance(prompts, tuple): - prompts = list(prompts) + with precision_scope(self.device.type), model.ema_scope(): + all_samples = list() + for n in trange(iterations, desc="Sampling"): + seed_everything(seed) + for prompts in tqdm(data, desc="data", dynamic_ncols=True): + uc = None + if cfg_scale != 1.0: + uc = model.get_learned_conditioning(batch_size * [""]) + if isinstance(prompts, tuple): + prompts = list(prompts) - # weighted sub-prompts - subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) - if len(subprompts) > 1: - # i dont know if this is correct.. but it works - c = torch.zeros_like(uc) - # get total weight for normalizing - totalWeight = sum(weights) - # normalize each "sub prompt" and add it - for i in range(0,len(subprompts)): - weight = weights[i] - if not skip_normalize: - weight = weight / totalWeight - c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) - else: # just standard 1 prompt - c = model.get_learned_conditioning(prompts) + # weighted sub-prompts + subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) + if len(subprompts) > 1: + # i dont know if this is correct.. but it works + c = torch.zeros_like(uc) + # get total weight for normalizing + totalWeight = sum(weights) + # normalize each "sub prompt" and add it + for i in range(0,len(subprompts)): + weight = weights[i] + if not skip_normalize: + weight = weight / totalWeight + c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) + else: # just standard 1 prompt + c = model.get_learned_conditioning(prompts) - shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor] - samples_ddim, _ = sampler.sample(S=steps, - conditioning=c, - batch_size=batch_size, - shape=shape, - verbose=False, - unconditional_guidance_scale=cfg_scale, - unconditional_conditioning=uc, - eta=ddim_eta, - x_T=start_code) + shape = [self.latent_channels, height // self.downsampling_factor, width // self.downsampling_factor] + samples_ddim, _ = sampler.sample(S=steps, + conditioning=c, + batch_size=batch_size, + shape=shape, + verbose=False, + unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc, + eta=ddim_eta, + x_T=start_code) - x_samples_ddim = model.decode_first_stage(samples_ddim) - x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) + x_samples_ddim = model.decode_first_stage(samples_ddim) + x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) - if not grid: - for x_sample in x_samples_ddim: - x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') - filename = self._unique_filename(outdir,previousname=filename, - seed=seed,isbatch=(batch_size>1)) - assert not os.path.exists(filename) - Image.fromarray(x_sample.astype(np.uint8)).save(filename) - images.append([filename,seed]) - else: - all_samples.append(x_samples_ddim) - seeds.append(seed) + if not grid: + for x_sample in x_samples_ddim: + x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') + filename = self._unique_filename(outdir,previousname=filename, + seed=seed,isbatch=(batch_size>1)) + assert not os.path.exists(filename) + Image.fromarray(x_sample.astype(np.uint8)).save(filename) + images.append([filename,seed]) + else: + all_samples.append(x_samples_ddim) + seeds.append(seed) - image_count += 1 - seed = self._new_seed() - if grid: - images = self._make_grid(samples=all_samples, - seeds=seeds, - batch_size=batch_size, - iterations=iterations, - outdir=outdir) + image_count += 1 + seed = self._new_seed() + if grid: + images = self._make_grid(samples=all_samples, + seeds=seeds, + batch_size=batch_size, + iterations=iterations, + outdir=outdir) except KeyboardInterrupt: print('*interrupted*') print('Partial results will be returned; if --grid was requested, nothing will be returned.') @@ -266,6 +267,7 @@ The vast majority of these arguments default to reasonable values. return images # There is lots of shared code between this and txt2img and should be refactored. + @torch.no_grad() def img2img(self,prompt,outdir=None,init_img=None,batch_size=None,iterations=None, steps=None,seed=None,grid=None,individual=None,width=None,height=None, cfg_scale=None,ddim_eta=None,strength=None,skip_normalize=False): @@ -312,7 +314,7 @@ The vast majority of these arguments default to reasonable values. assert os.path.isfile(init_img) init_image = self._load_img(init_img).to(self.device) init_image = repeat(init_image, '1 ... -> b ...', b=batch_size) - with precision_scope("cuda"): + with precision_scope(self.device.type): init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space sampler.make_schedule(ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False) @@ -334,63 +336,61 @@ The vast majority of these arguments default to reasonable values. # Gawd. Too many levels of indent here. Need to refactor into smaller routines! try: - with torch.no_grad(): - with precision_scope("cuda"): - with model.ema_scope(): - all_samples = list() - for n in trange(iterations, desc="Sampling"): - seed_everything(seed) - for prompts in tqdm(data, desc="data", dynamic_ncols=True): - uc = None - if cfg_scale != 1.0: - uc = model.get_learned_conditioning(batch_size * [""]) - if isinstance(prompts, tuple): - prompts = list(prompts) + with precision_scope(self.device.type), model.ema_scope(): + all_samples = list() + for n in trange(iterations, desc="Sampling"): + seed_everything(seed) + for prompts in tqdm(data, desc="data", dynamic_ncols=True): + uc = None + if cfg_scale != 1.0: + uc = model.get_learned_conditioning(batch_size * [""]) + if isinstance(prompts, tuple): + prompts = list(prompts) - # weighted sub-prompts - subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) - if len(subprompts) > 1: - # i dont know if this is correct.. but it works - c = torch.zeros_like(uc) - # get total weight for normalizing - totalWeight = sum(weights) - # normalize each "sub prompt" and add it - for i in range(0,len(subprompts)): - weight = weights[i] - if not skip_normalize: - weight = weight / totalWeight - c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) - else: # just standard 1 prompt - c = model.get_learned_conditioning(prompts) + # weighted sub-prompts + subprompts,weights = T2I._split_weighted_subprompts(prompts[0]) + if len(subprompts) > 1: + # i dont know if this is correct.. but it works + c = torch.zeros_like(uc) + # get total weight for normalizing + totalWeight = sum(weights) + # normalize each "sub prompt" and add it + for i in range(0,len(subprompts)): + weight = weights[i] + if not skip_normalize: + weight = weight / totalWeight + c = torch.add(c,model.get_learned_conditioning(subprompts[i]), alpha=weight) + else: # just standard 1 prompt + c = model.get_learned_conditioning(prompts) - # encode (scaled latent) - z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device)) - # decode it - samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale, - unconditional_conditioning=uc,) + # encode (scaled latent) + z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(self.device)) + # decode it + samples = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc,) - x_samples = model.decode_first_stage(samples) - x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) + x_samples = model.decode_first_stage(samples) + x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) - if not grid: - for x_sample in x_samples: - x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') - filename = self._unique_filename(outdir,previousname=filename, - seed=seed,isbatch=(batch_size>1)) - assert not os.path.exists(filename) - Image.fromarray(x_sample.astype(np.uint8)).save(filename) - images.append([filename,seed]) - else: - all_samples.append(x_samples) - seeds.append(seed) - image_count +=1 - seed = self._new_seed() - if grid: - images = self._make_grid(samples=all_samples, - seeds=seeds, - batch_size=batch_size, - iterations=iterations, - outdir=outdir) + if not grid: + for x_sample in x_samples: + x_sample = 255. * rearrange(x_sample.cpu().numpy(), 'c h w -> h w c') + filename = self._unique_filename(outdir,previousname=filename, + seed=seed,isbatch=(batch_size>1)) + assert not os.path.exists(filename) + Image.fromarray(x_sample.astype(np.uint8)).save(filename) + images.append([filename,seed]) + else: + all_samples.append(x_samples) + seeds.append(seed) + image_count +=1 + seed = self._new_seed() + if grid: + images = self._make_grid(samples=all_samples, + seeds=seeds, + batch_size=batch_size, + iterations=iterations, + outdir=outdir) except KeyboardInterrupt: print('*interrupted*') @@ -429,7 +429,7 @@ The vast majority of these arguments default to reasonable values. seed_everything(self.seed) try: config = OmegaConf.load(self.config) - self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") + self.device = torch.device(self.device) if torch.cuda.is_available() else torch.device("cpu") model = self._load_model_from_config(config,self.weights) self.model = model.to(self.device) except AttributeError: @@ -458,7 +458,6 @@ The vast majority of these arguments default to reasonable values. sd = pl_sd["state_dict"] model = instantiate_from_config(config.model) m, u = model.load_state_dict(sd, strict=False) - model.cuda() model.eval() if self.full_precision: print('Using slower but more accurate full-precision math (--full_precision)') diff --git a/scripts/dream.py b/scripts/dream.py index fb8fec2384..2585cbb7fe 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -57,7 +57,8 @@ def main(): weights=weights, full_precision=opt.full_precision, config=config, - latent_diffusion_weights=opt.laion400m # this is solely for recreating the prompt + latent_diffusion_weights=opt.laion400m, # this is solely for recreating the prompt + device=opt.device ) # make sure the output directory exists @@ -268,6 +269,11 @@ def create_argv_parser(): type=str, default="outputs/img-samples", help="directory in which to place generated images and a log of prompts and seeds") + parser.add_argument('--device', + '-d', + type=str, + default="cuda", + help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if avalible") return parser