diff --git a/environment-mac.yaml b/environment-mac.yaml index 7bcd704c5e..44cd1efcd6 100644 --- a/environment-mac.yaml +++ b/environment-mac.yaml @@ -52,7 +52,7 @@ dependencies: - -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers - -e git+https://github.com/openai/CLIP.git@main#egg=clip - - -e git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion + - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion - -e . variables: PYTORCH_ENABLE_MPS_FALLBACK: 1 diff --git a/ldm/dream/devices.py b/ldm/dream/devices.py index 240754dd36..7a205f6616 100644 --- a/ldm/dream/devices.py +++ b/ldm/dream/devices.py @@ -8,4 +8,10 @@ def choose_torch_device() -> str: return 'mps' return 'cpu' - +def choose_autocast_device(device) -> str: + '''Returns an autocast compatible device from a torch device''' + device_type = device.type # this returns 'mps' on M1 + # autocast only supports cuda or cpu + if device_type not in ('cuda','cpu'): + return 'cpu' + return device_type diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index ecec4fe38f..49a9fd38c8 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -8,6 +8,7 @@ import torch import numpy as np import random import os +import traceback from omegaconf import OmegaConf from PIL import Image from tqdm import tqdm, trange @@ -28,7 +29,7 @@ from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler from ldm.dream.pngwriter import PngWriter from ldm.dream.image_util import InitImageResizer -from ldm.dream.devices import choose_torch_device +from ldm.dream.devices import choose_autocast_device, choose_torch_device """Simplified text to image API for stable diffusion/latent diffusion @@ -114,26 +115,28 @@ class T2I: """ def __init__( - self, - iterations=1, - steps=50, - seed=None, - cfg_scale=7.5, - weights='models/ldm/stable-diffusion-v1/model.ckpt', - config='configs/stable-diffusion/v1-inference.yaml', - grid=False, - width=512, - height=512, - sampler_name='k_lms', - latent_channels=4, - downsampling_factor=8, - ddim_eta=0.0, # deterministic - precision='autocast', - full_precision=False, - strength=0.75, # default in scripts/img2img.py - embedding_path=None, - # just to keep track of this parameter when regenerating prompt - latent_diffusion_weights=False, + self, + iterations=1, + steps=50, + seed=None, + cfg_scale=7.5, + weights='models/ldm/stable-diffusion-v1/model.ckpt', + config='configs/stable-diffusion/v1-inference.yaml', + grid=False, + width=512, + height=512, + sampler_name='k_lms', + latent_channels=4, + downsampling_factor=8, + ddim_eta=0.0, # deterministic + precision='autocast', + full_precision=False, + strength=0.75, # default in scripts/img2img.py + embedding_path=None, + device_type = 'cuda', + # just to keep track of this parameter when regenerating prompt + # needs to be replaced when new configuration system implemented. + latent_diffusion_weights=False, ): self.iterations = iterations self.width = width @@ -151,11 +154,17 @@ class T2I: self.full_precision = full_precision self.strength = strength self.embedding_path = embedding_path + self.device_type = device_type self.model = None # empty for now self.sampler = None self.device = None self.latent_diffusion_weights = latent_diffusion_weights + if device_type == 'cuda' and not torch.cuda.is_available(): + device_type = choose_torch_device() + print(">> cuda not available, using device", device_type) + self.device = torch.device(device_type) + # for VRAM usage statistics device_type = choose_torch_device() self.session_peakmem = torch.cuda.max_memory_allocated() if device_type == 'cuda' else None @@ -312,8 +321,9 @@ class T2I: callback=step_callback, ) - with scope(self.device.type), self.model.ema_scope(): - for n in trange(iterations, desc='>> Generating'): + device_type = choose_autocast_device(self.device) + with scope(device_type), self.model.ema_scope(): + for n in trange(iterations, desc='Generating'): seed_everything(seed) image = next(images_iterator) results.append([image, seed]) @@ -346,7 +356,7 @@ class T2I: ) except Exception as e: print( - f'Error running RealESRGAN - Your image was not upscaled.\n{e}' + f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}' ) if image_callback is not None: if save_original: @@ -359,11 +369,11 @@ class T2I: except KeyboardInterrupt: print('*interrupted*') print( - 'Partial results will be returned; if --grid was requested, nothing will be returned.' + '>> Partial results will be returned; if --grid was requested, nothing will be returned.' ) except RuntimeError as e: - print(str(e)) - print('Are you sure your system has an adequate NVIDIA GPU?') + print(traceback.format_exc(), file=sys.stderr) + print('>> Are you sure your system has an adequate NVIDIA GPU?') toc = time.time() print('>> Usage stats:') @@ -464,7 +474,6 @@ class T2I: ) t_enc = int(strength * steps) - # print(f"target t_enc is {t_enc} steps") while True: uc, c = self._get_uc_and_c(prompt, skip_normalize) @@ -515,7 +524,7 @@ class T2I: x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) if len(x_samples) != 1: raise Exception( - f'expected to get a single image, but got {len(x_samples)}') + f'>> expected to get a single image, but got {len(x_samples)}') x_sample = 255.0 * rearrange( x_samples[0].cpu().numpy(), 'c h w -> h w c' ) @@ -525,17 +534,12 @@ class T2I: self.seed = random.randrange(0, np.iinfo(np.uint32).max) return self.seed - def _get_device(self): - device_type = choose_torch_device() - return torch.device(device_type) - def load_model(self): """Load and initialize the model from configuration variables passed at object creation time""" if self.model is None: seed_everything(self.seed) try: config = OmegaConf.load(self.config) - self.device = self._get_device() model = self._load_model_from_config(config, self.weights) if self.embedding_path is not None: model.embedding_manager.load( @@ -544,12 +548,10 @@ class T2I: self.model = model.to(self.device) # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here self.model.cond_stage_model.device = self.device - except AttributeError: - import traceback - print( - 'Error loading model. Only the CUDA backend is supported', file=sys.stderr) + except AttributeError as e: + print(f'>> Error loading model. {str(e)}', file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) - raise SystemExit + raise SystemExit from e self._set_sampler() diff --git a/scripts/dream.py b/scripts/dream.py index 2e3f0707c5..1535ac386c 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -9,6 +9,7 @@ import sys import copy import warnings import time +from ldm.dream.devices import choose_torch_device import ldm.dream.readline from ldm.dream.pngwriter import PngWriter, PromptFormatter from ldm.dream.server import DreamServer, ThreadingDreamServer @@ -60,6 +61,7 @@ def main(): # this is solely for recreating the prompt latent_diffusion_weights=opt.laion400m, embedding_path=opt.embedding_path, + device_type=opt.device ) # make sure the output directory exists @@ -346,6 +348,8 @@ def create_argv_parser(): dest='full_precision', action='store_true', help='Use slower full precision math for calculations', + # MPS only functions with full precision, see https://github.com/lstein/stable-diffusion/issues/237 + default=choose_torch_device() == 'mps', ) parser.add_argument( '-g', @@ -418,6 +422,13 @@ def create_argv_parser(): default='model', help='Indicates the Stable Diffusion model to use.', ) + parser.add_argument( + '--device', + '-d', + type=str, + default='cuda', + help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if available" + ) return parser