Merge branch 'dream-m1' of github.com:toffaletti/stable-diffusion into toffaletti-dream-m1

* Fix conflicts with main branch changes
* Fix logic error in choose_autocast_device() that was causing crashes
on CUDA systems.
This commit is contained in:
Lincoln Stein 2022-09-01 17:54:01 -04:00
commit 629ca09fda
4 changed files with 60 additions and 41 deletions

View File

@ -52,7 +52,7 @@ dependencies:
- -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers
- -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
- -e git+https://github.com/openai/CLIP.git@main#egg=clip
- -e git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion
- -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
- -e .
variables:
PYTORCH_ENABLE_MPS_FALLBACK: 1

View File

@ -8,4 +8,10 @@ def choose_torch_device() -> str:
return 'mps'
return 'cpu'
def choose_autocast_device(device) -> str:
'''Returns an autocast compatible device from a torch device'''
device_type = device.type # this returns 'mps' on M1
# autocast only supports cuda or cpu
if device_type not in ('cuda','cpu'):
return 'cpu'
return device_type

View File

@ -8,6 +8,7 @@ import torch
import numpy as np
import random
import os
import traceback
from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm, trange
@ -28,7 +29,7 @@ from ldm.models.diffusion.plms import PLMSSampler
from ldm.models.diffusion.ksampler import KSampler
from ldm.dream.pngwriter import PngWriter
from ldm.dream.image_util import InitImageResizer
from ldm.dream.devices import choose_torch_device
from ldm.dream.devices import choose_autocast_device, choose_torch_device
"""Simplified text to image API for stable diffusion/latent diffusion
@ -132,7 +133,9 @@ class T2I:
full_precision=False,
strength=0.75, # default in scripts/img2img.py
embedding_path=None,
device_type = 'cuda',
# just to keep track of this parameter when regenerating prompt
# needs to be replaced when new configuration system implemented.
latent_diffusion_weights=False,
):
self.iterations = iterations
@ -151,11 +154,17 @@ class T2I:
self.full_precision = full_precision
self.strength = strength
self.embedding_path = embedding_path
self.device_type = device_type
self.model = None # empty for now
self.sampler = None
self.device = None
self.latent_diffusion_weights = latent_diffusion_weights
if device_type == 'cuda' and not torch.cuda.is_available():
device_type = choose_torch_device()
print(">> cuda not available, using device", device_type)
self.device = torch.device(device_type)
# for VRAM usage statistics
device_type = choose_torch_device()
self.session_peakmem = torch.cuda.max_memory_allocated() if device_type == 'cuda' else None
@ -312,8 +321,9 @@ class T2I:
callback=step_callback,
)
with scope(self.device.type), self.model.ema_scope():
for n in trange(iterations, desc='>> Generating'):
device_type = choose_autocast_device(self.device)
with scope(device_type), self.model.ema_scope():
for n in trange(iterations, desc='Generating'):
seed_everything(seed)
image = next(images_iterator)
results.append([image, seed])
@ -346,7 +356,7 @@ class T2I:
)
except Exception as e:
print(
f'Error running RealESRGAN - Your image was not upscaled.\n{e}'
f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}'
)
if image_callback is not None:
if save_original:
@ -359,11 +369,11 @@ class T2I:
except KeyboardInterrupt:
print('*interrupted*')
print(
'Partial results will be returned; if --grid was requested, nothing will be returned.'
'>> Partial results will be returned; if --grid was requested, nothing will be returned.'
)
except RuntimeError as e:
print(str(e))
print('Are you sure your system has an adequate NVIDIA GPU?')
print(traceback.format_exc(), file=sys.stderr)
print('>> Are you sure your system has an adequate NVIDIA GPU?')
toc = time.time()
print('>> Usage stats:')
@ -464,7 +474,6 @@ class T2I:
)
t_enc = int(strength * steps)
# print(f"target t_enc is {t_enc} steps")
while True:
uc, c = self._get_uc_and_c(prompt, skip_normalize)
@ -515,7 +524,7 @@ class T2I:
x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
if len(x_samples) != 1:
raise Exception(
f'expected to get a single image, but got {len(x_samples)}')
f'>> expected to get a single image, but got {len(x_samples)}')
x_sample = 255.0 * rearrange(
x_samples[0].cpu().numpy(), 'c h w -> h w c'
)
@ -525,17 +534,12 @@ class T2I:
self.seed = random.randrange(0, np.iinfo(np.uint32).max)
return self.seed
def _get_device(self):
device_type = choose_torch_device()
return torch.device(device_type)
def load_model(self):
"""Load and initialize the model from configuration variables passed at object creation time"""
if self.model is None:
seed_everything(self.seed)
try:
config = OmegaConf.load(self.config)
self.device = self._get_device()
model = self._load_model_from_config(config, self.weights)
if self.embedding_path is not None:
model.embedding_manager.load(
@ -544,12 +548,10 @@ class T2I:
self.model = model.to(self.device)
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
self.model.cond_stage_model.device = self.device
except AttributeError:
import traceback
print(
'Error loading model. Only the CUDA backend is supported', file=sys.stderr)
except AttributeError as e:
print(f'>> Error loading model. {str(e)}', file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
raise SystemExit
raise SystemExit from e
self._set_sampler()

View File

@ -9,6 +9,7 @@ import sys
import copy
import warnings
import time
from ldm.dream.devices import choose_torch_device
import ldm.dream.readline
from ldm.dream.pngwriter import PngWriter, PromptFormatter
from ldm.dream.server import DreamServer, ThreadingDreamServer
@ -60,6 +61,7 @@ def main():
# this is solely for recreating the prompt
latent_diffusion_weights=opt.laion400m,
embedding_path=opt.embedding_path,
device_type=opt.device
)
# make sure the output directory exists
@ -346,6 +348,8 @@ def create_argv_parser():
dest='full_precision',
action='store_true',
help='Use slower full precision math for calculations',
# MPS only functions with full precision, see https://github.com/lstein/stable-diffusion/issues/237
default=choose_torch_device() == 'mps',
)
parser.add_argument(
'-g',
@ -418,6 +422,13 @@ def create_argv_parser():
default='model',
help='Indicates the Stable Diffusion model to use.',
)
parser.add_argument(
'--device',
'-d',
type=str,
default='cuda',
help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if available"
)
return parser