Merge branch 'dream-m1' of github.com:toffaletti/stable-diffusion into toffaletti-dream-m1

* Fix conflicts with main branch changes * Fix logic error in choose_autocast_device() that was causing crashes on CUDA systems.
2024-08-30 20:32:17 +00:00 · 2022-09-01 17:54:01 -04:00
parent 833de06299 09bd9fa47e
commit 629ca09fda
4 changed files with 60 additions and 41 deletions
--- a/environment-mac.yaml
+++ b/environment-mac.yaml
@ -52,7 +52,7 @@ dependencies:
    - -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers
    - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
    - -e git+https://github.com/openai/CLIP.git@main#egg=clip
-    - -e git+https://github.com/lstein/k-diffusion.git@master#egg=k-diffusion
+    - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
    - -e .
 variables:
  PYTORCH_ENABLE_MPS_FALLBACK: 1
--- a/ldm/dream/devices.py
+++ b/ldm/dream/devices.py
@ -8,4 +8,10 @@ def choose_torch_device() -> str:
        return 'mps'
    return 'cpu'

-    
+def choose_autocast_device(device) -> str:
+    '''Returns an autocast compatible device from a torch device'''
+    device_type = device.type # this returns 'mps' on M1
+    # autocast only supports cuda or cpu
+    if device_type not in ('cuda','cpu'):
+        return 'cpu'
+    return device_type
--- a/ldm/simplet2i.py
+++ b/ldm/simplet2i.py
@ -8,6 +8,7 @@ import torch
 import numpy as np
 import random
 import os
+import traceback
 from omegaconf import OmegaConf
 from PIL import Image
 from tqdm import tqdm, trange
@ -28,7 +29,7 @@ from ldm.models.diffusion.plms     import PLMSSampler
 from ldm.models.diffusion.ksampler import KSampler
 from ldm.dream.pngwriter           import PngWriter
 from ldm.dream.image_util          import InitImageResizer
-from ldm.dream.devices             import choose_torch_device
+from ldm.dream.devices import choose_autocast_device, choose_torch_device

 """Simplified text to image API for stable diffusion/latent diffusion

@ -114,26 +115,28 @@ class T2I:
 """

    def __init__(
-        self,
-        iterations=1,
-        steps=50,
-        seed=None,
-        cfg_scale=7.5,
-        weights='models/ldm/stable-diffusion-v1/model.ckpt',
-        config='configs/stable-diffusion/v1-inference.yaml',
-        grid=False,
-        width=512,
-        height=512,
-        sampler_name='k_lms',
-        latent_channels=4,
-        downsampling_factor=8,
-        ddim_eta=0.0,  # deterministic
-        precision='autocast',
-        full_precision=False,
-        strength=0.75,  # default in scripts/img2img.py
-        embedding_path=None,
-        # just to keep track of this parameter when regenerating prompt
-        latent_diffusion_weights=False,
+            self,
+            iterations=1,
+            steps=50,
+            seed=None,
+            cfg_scale=7.5,
+            weights='models/ldm/stable-diffusion-v1/model.ckpt',
+            config='configs/stable-diffusion/v1-inference.yaml',
+            grid=False,
+            width=512,
+            height=512,
+            sampler_name='k_lms',
+            latent_channels=4,
+            downsampling_factor=8,
+            ddim_eta=0.0,  # deterministic
+            precision='autocast',
+            full_precision=False,
+            strength=0.75,  # default in scripts/img2img.py
+            embedding_path=None,
+            device_type = 'cuda',
+            # just to keep track of this parameter when regenerating prompt
+            # needs to be replaced when new configuration system implemented.
+            latent_diffusion_weights=False,
    ):
        self.iterations               = iterations
        self.width                    = width
@ -151,11 +154,17 @@ class T2I:
        self.full_precision           = full_precision
        self.strength                 = strength
        self.embedding_path           = embedding_path
+        self.device_type              = device_type
        self.model                    = None     # empty for now
        self.sampler                  = None
        self.device                   = None
        self.latent_diffusion_weights = latent_diffusion_weights

+        if device_type == 'cuda' and not torch.cuda.is_available():
+            device_type = choose_torch_device()
+            print(">> cuda not available, using device", device_type)
+        self.device = torch.device(device_type)
+
        # for VRAM usage statistics
        device_type          = choose_torch_device()
        self.session_peakmem = torch.cuda.max_memory_allocated() if device_type == 'cuda' else None
@ -312,8 +321,9 @@ class T2I:
                    callback=step_callback,
                )

-            with scope(self.device.type), self.model.ema_scope():
-                for n in trange(iterations, desc='>> Generating'):
+            device_type = choose_autocast_device(self.device)
+            with scope(device_type), self.model.ema_scope():
+                for n in trange(iterations, desc='Generating'):
                    seed_everything(seed)
                    image = next(images_iterator)
                    results.append([image, seed])
@ -346,7 +356,7 @@ class T2I:
                                )
                        except Exception as e:
                            print(
-                                f'Error running RealESRGAN - Your image was not upscaled.\n{e}'
+                                f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}'
                            )
                        if image_callback is not None:
                            if save_original:
@ -359,11 +369,11 @@ class T2I:
        except KeyboardInterrupt:
            print('*interrupted*')
            print(
-                'Partial results will be returned; if --grid was requested, nothing will be returned.'
+                '>> Partial results will be returned; if --grid was requested, nothing will be returned.'
            )
        except RuntimeError as e:
-            print(str(e))
-            print('Are you sure your system has an adequate NVIDIA GPU?')
+            print(traceback.format_exc(), file=sys.stderr)
+            print('>> Are you sure your system has an adequate NVIDIA GPU?')

        toc = time.time()
        print('>> Usage stats:')
@ -464,7 +474,6 @@ class T2I:
        )

        t_enc = int(strength * steps)
-        # print(f"target t_enc is {t_enc} steps")

        while True:
            uc, c = self._get_uc_and_c(prompt, skip_normalize)
@ -515,7 +524,7 @@ class T2I:
        x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0)
        if len(x_samples) != 1:
            raise Exception(
-                f'expected to get a single image, but got {len(x_samples)}')
+                f'>> expected to get a single image, but got {len(x_samples)}')
        x_sample = 255.0 * rearrange(
            x_samples[0].cpu().numpy(), 'c h w -> h w c'
        )
@ -525,17 +534,12 @@ class T2I:
        self.seed = random.randrange(0, np.iinfo(np.uint32).max)
        return self.seed

-    def _get_device(self):
-        device_type = choose_torch_device()
-        return torch.device(device_type)
-
    def load_model(self):
        """Load and initialize the model from configuration variables passed at object creation time"""
        if self.model is None:
            seed_everything(self.seed)
            try:
                config = OmegaConf.load(self.config)
-                self.device = self._get_device()
                model = self._load_model_from_config(config, self.weights)
                if self.embedding_path is not None:
                    model.embedding_manager.load(
@ -544,12 +548,10 @@ class T2I:
                self.model = model.to(self.device)
                # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
                self.model.cond_stage_model.device = self.device
-            except AttributeError:
-                import traceback
-                print(
-                    'Error loading model. Only the CUDA backend is supported', file=sys.stderr)
+            except AttributeError as e:
+                print(f'>> Error loading model. {str(e)}', file=sys.stderr)
                print(traceback.format_exc(), file=sys.stderr)
-                raise SystemExit
+                raise SystemExit from e

            self._set_sampler()

--- a/scripts/dream.py
+++ b/scripts/dream.py
@ -9,6 +9,7 @@ import sys
 import copy
 import warnings
 import time
+from ldm.dream.devices import choose_torch_device
 import ldm.dream.readline
 from ldm.dream.pngwriter import PngWriter, PromptFormatter
 from ldm.dream.server import DreamServer, ThreadingDreamServer
@ -60,6 +61,7 @@ def main():
        # this is solely for recreating the prompt
        latent_diffusion_weights=opt.laion400m,
        embedding_path=opt.embedding_path,
+        device_type=opt.device
    )

    # make sure the output directory exists
@ -346,6 +348,8 @@ def create_argv_parser():
        dest='full_precision',
        action='store_true',
        help='Use slower full precision math for calculations',
+        # MPS only functions with full precision, see https://github.com/lstein/stable-diffusion/issues/237
+        default=choose_torch_device() == 'mps',
    )
    parser.add_argument(
        '-g',
@ -418,6 +422,13 @@ def create_argv_parser():
        default='model',
        help='Indicates the Stable Diffusion model to use.',
    )
+    parser.add_argument(
+        '--device',
+        '-d',
+        type=str,
+        default='cuda',
+        help="device to run stable diffusion on. defaults to cuda `torch.cuda.current_device()` if available"
+    )
    return parser