From d176fb07cda8cc591f8c0e34b9bdc42374475dbb Mon Sep 17 00:00:00 2001
From: Mihail Dumitrescu <mihai.dumitresq@gmail.com>
Date: Sat, 17 Sep 2022 20:56:25 +0300
Subject: [PATCH] Replace --full_precision with --precision that works even if
 not specified

Allowed values are 'auto', 'float32', 'autocast', 'float16'. If not specified or 'auto' a working precision is automatically selected based on the torch device.
Context: #526
Deprecated --full_precision / -F

Tested on both cuda and cpu by calling scripts/dream.py without arguments and checked the auto configuration worked. With --precision=auto/float32/autocast/float16 it performs as expected, either working or failing with a reasonable error. Also checked Img2Img.
---
 .../test_regression_txt2img_dream_v1_4.sh     |  3 +-
 .github/workflows/test-dream-conda.yml        |  4 +-
 README.md                                     | 18 ++++----
 docs/features/CLI.md                          |  2 +-
 docs/features/TEXTUAL_INVERSION.md            |  4 +-
 docs/index.md                                 | 16 +++----
 docs/installation/INSTALL_MAC.md              |  4 +-
 ldm/dream/args.py                             | 18 +++++++-
 ldm/dream/devices.py                          | 29 +++++++------
 ldm/dream/generator/base.py                   |  9 ++--
 ldm/dream/generator/embiggen.py               |  4 +-
 ldm/dream/generator/img2img.py                | 12 +++---
 ldm/dream/generator/inpaint.py                | 12 +++---
 ldm/dream/generator/txt2img.py                |  6 +--
 ldm/generate.py                               | 42 +++++++++++--------
 scripts/dream.py                              |  1 +
 server/application.py                         |  2 +-
 server/containers.py                          |  4 +-
 18 files changed, 108 insertions(+), 82 deletions(-)

diff --git a/.dev_scripts/test_regression_txt2img_dream_v1_4.sh b/.dev_scripts/test_regression_txt2img_dream_v1_4.sh
index 11cbf8f14b..9326d3c311 100644
--- a/.dev_scripts/test_regression_txt2img_dream_v1_4.sh
+++ b/.dev_scripts/test_regression_txt2img_dream_v1_4.sh
@@ -5,8 +5,7 @@ SAMPLES_DIR=${OUT_DIR}
 python scripts/dream.py \
     --from_file ${PROMPT_FILE} \
     --outdir ${OUT_DIR} \
-    --sampler plms \
-    --full_precision
+    --sampler plms
 
 # original output by CompVis/stable-diffusion
 IMAGE1=".dev_scripts/images/v1_4_astronaut_rides_horse_plms_step50_seed42.png"
diff --git a/.github/workflows/test-dream-conda.yml b/.github/workflows/test-dream-conda.yml
index 3bd9b24582..6c51ebe718 100644
--- a/.github/workflows/test-dream-conda.yml
+++ b/.github/workflows/test-dream-conda.yml
@@ -85,9 +85,9 @@ jobs:
           fi
           # Utterly hacky, but I don't know how else to do this
           if [[ ${{ github.ref }} == 'refs/heads/master' ]]; then
-            time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/preflight_prompts.txt --full_precision
+            time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/preflight_prompts.txt
           elif [[ ${{ github.ref }} == 'refs/heads/development' ]]; then
-            time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/dev_prompts.txt --full_precision
+            time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/dev_prompts.txt
           fi
           mkdir -p outputs/img-samples
       - name: Archive results
diff --git a/README.md b/README.md
index 7b4ffa76a1..a0cc302e03 100644
--- a/README.md
+++ b/README.md
@@ -86,17 +86,14 @@ You wil need one of the following:
 
 - At least 6 GB of free disk space for the machine learning model, Python, and all its dependencies.
 
-> Note
->
-> If you have an Nvidia 10xx series card (e.g. the 1080ti), please run the dream script in
-> full-precision mode as shown below.
+#### Note
 
-Similarly, specify full-precision mode on Apple M1 hardware.
-
-To run in full-precision mode, start `dream.py` with the `--full_precision` flag:
+Precision is auto configured based on the device. If however you encounter
+errors like 'expected type Float but found Half' or 'not implemented for Half'
+you can try starting `dream.py` with the `--precision=float32` flag:
 
 ```bash
-(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision
+(ldm) ~/stable-diffusion$ python scripts/dream.py --precision=float32
 ```
 
 ### Features
@@ -125,6 +122,11 @@ To run in full-precision mode, start `dream.py` with the `--full_precision` flag
 
 ### Latest Changes
 
+- vNEXT (TODO 2022)
+
+  - Deprecated `--full_precision` / `-F`. Simply omit it and `dream.py` will auto
+    configure. To switch away from auto use the new flag like `--precision=float32`.
+
 - v1.14 (11 September 2022)
 
   - Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs.
diff --git a/docs/features/CLI.md b/docs/features/CLI.md
index 5f7cdaf162..cf49f68b70 100644
--- a/docs/features/CLI.md
+++ b/docs/features/CLI.md
@@ -74,7 +74,7 @@ prompt arguments] (#list-of-prompt-arguments). Others
 | --prompt_as_dir         |     -p      | False                                            | Name output directories using the prompt text.                                                       |
 | --from_file <path>      |             | None                                             | Read list of prompts from a file. Use "-" to read from standard input                                |
 | --model <modelname>     |             | stable-diffusion-1.4                             | Loads model specified in configs/models.yaml. Currently one of "stable-diffusion-1.4" or "laion400m" |
-| --full_precision        |     -F      | False                                            | Run in slower full-precision mode. Needed for Macintosh M1/M2 hardware and some older video cards.   |
+| --precision <pname>     |             | auto                                             | Set to a specific precision. Rare but you may need to switch to 'float32' on some video cards.       |
 | --web                   |             | False                                            | Start in web server mode                                                                             |
 | --host <ip addr>        |             | localhost                                        | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any.                |
 | --port <port>           |             | 9090                                             | Which port web server should listen for requests on.                                                 |
diff --git a/docs/features/TEXTUAL_INVERSION.md b/docs/features/TEXTUAL_INVERSION.md
index b8dbc21192..f020807a12 100644
--- a/docs/features/TEXTUAL_INVERSION.md
+++ b/docs/features/TEXTUAL_INVERSION.md
@@ -57,9 +57,7 @@ Once the model is trained, specify the trained .pt or .bin file when starting
 dream using
 
 ```bash
-python3 ./scripts/dream.py \
-        --embedding_path /path/to/embedding.pt \
-        --full_precision
+python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt
 ```
 
 Then, to utilize your subject at the dream prompt
diff --git a/docs/index.md b/docs/index.md
index bdde3cabd7..1f5a6702dc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -62,15 +62,12 @@ You wil need one of the following:
 
 ### Note
 
-If you are have a Nvidia 10xx series card (e.g. the 1080ti), please run the dream script in
-full-precision mode as shown below.
-
-Similarly, specify full-precision mode on Apple M1 hardware.
-
-To run in full-precision mode, start `dream.py` with the `--full_precision` flag:
+Precision is auto configured based on the device. If however you encounter
+errors like 'expected type Float but found Half' or 'not implemented for Half'
+you can try starting `dream.py` with the `--precision=float32` flag:
 
 ```bash
-(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision
+(ldm) ~/stable-diffusion$ python scripts/dream.py --precision=float32
 ```
 
 ## Features
@@ -98,6 +95,11 @@ To run in full-precision mode, start `dream.py` with the `--full_precision` flag
 
 ## Latest Changes
 
+### vNEXT <small>(TODO 2022)</small>
+
+  - Deprecated `--full_precision` / `-F`. Simply omit it and `dream.py` will auto
+    configure. To switch away from auto use the new flag like `--precision=float32`.
+
 ### v1.14 <small>(11 September 2022)</small>
 
 - Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs.
diff --git a/docs/installation/INSTALL_MAC.md b/docs/installation/INSTALL_MAC.md
index 69bf78fa00..9904e05050 100644
--- a/docs/installation/INSTALL_MAC.md
+++ b/docs/installation/INSTALL_MAC.md
@@ -97,7 +97,7 @@ conda activate ldm
 python scripts/preload_models.py
 
 # run SD!
-python scripts/dream.py --full_precision  # half-precision requires autocast and won't work
+python scripts/dream.py
 
 # or run the web interface!
 python scripts/dream.py --web
@@ -453,5 +453,3 @@ Abort trap: 6
   warnings.warn('resource_tracker: There appear to be %d '
 ```
 
-Macs do not support `autocast/mixed-precision`, so you need to supply
-`--full_precision` to use float32 everywhere.
diff --git a/ldm/dream/args.py b/ldm/dream/args.py
index ada8975e96..9eba72f115 100644
--- a/ldm/dream/args.py
+++ b/ldm/dream/args.py
@@ -100,6 +100,13 @@ SAMPLER_CHOICES = [
     'plms',
 ]
 
+PRECISION_CHOICES = [
+    'auto',
+    'float32',
+    'autocast',
+    'float16',
+]
+
 # is there a way to pick this up during git commits?
 APP_ID      = 'lstein/stable-diffusion'
 APP_VERSION = 'v1.15'
@@ -322,7 +329,16 @@ class Args(object):
             '--full_precision',
             dest='full_precision',
             action='store_true',
-            help='Use more memory-intensive full precision math for calculations',
+            help='Deprecated way to set --precision=float32',
+        )
+        model_group.add_argument(
+            '--precision',
+            dest='precision',
+            type=str,
+            choices=PRECISION_CHOICES,
+            metavar='PRECISION',
+            help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}',
+            default='auto',
         )
         file_group.add_argument(
             '--from_file',
diff --git a/ldm/dream/devices.py b/ldm/dream/devices.py
index a92cfcbf60..424ae5a6d3 100644
--- a/ldm/dream/devices.py
+++ b/ldm/dream/devices.py
@@ -1,6 +1,6 @@
 import torch
 from torch import autocast
-from contextlib import contextmanager, nullcontext
+from contextlib import nullcontext
 
 def choose_torch_device() -> str:
     '''Convenience routine for guessing which GPU device to run model on'''
@@ -10,15 +10,18 @@ def choose_torch_device() -> str:
         return 'mps'
     return 'cpu'
 
-def choose_autocast_device(device):
-    '''Returns an autocast compatible device from a torch device'''
-    device_type = device.type # this returns 'mps' on M1
-    # autocast only for cuda, but GTX 16xx have issues with it
-    if device_type == 'cuda':
-        device_name = torch.cuda.get_device_name()
-        if 'GeForce GTX 1660' in device_name or 'GeForce GTX 1650' in device_name:
-            return device_type,nullcontext
-        else:
-            return device_type,autocast
-    else:
-        return 'cpu',nullcontext
+def choose_precision(device) -> str:
+    '''Returns an appropriate precision for the given torch device'''
+    if device.type == 'cuda':
+        device_name = torch.cuda.get_device_name(device)
+        if not ('GeForce GTX 1660' in device_name or 'GeForce GTX 1650' in device_name):
+            return 'float16'
+    return 'float32'
+
+def choose_autocast(precision):
+    '''Returns an autocast context or nullcontext for the given precision string'''
+    # float16 currently requires autocast to avoid errors like:
+    # 'expected scalar type Half but found Float'
+    if precision == 'autocast' or precision == 'float16':
+        return autocast
+    return nullcontext
diff --git a/ldm/dream/generator/base.py b/ldm/dream/generator/base.py
index 9bed3df719..af98dea6c2 100644
--- a/ldm/dream/generator/base.py
+++ b/ldm/dream/generator/base.py
@@ -9,13 +9,14 @@ from tqdm import tqdm, trange
 from PIL               import Image
 from einops import rearrange, repeat
 from pytorch_lightning import seed_everything
-from ldm.dream.devices import choose_autocast_device
+from ldm.dream.devices import choose_autocast
 
 downsampling = 8
 
 class Generator():
-    def __init__(self,model):
+    def __init__(self, model, precision):
         self.model               = model
+        self.precision           = precision
         self.seed                = None
         self.latent_channels     = model.channels
         self.downsampling_factor = downsampling   # BUG: should come from model or config
@@ -38,7 +39,7 @@ class Generator():
     def generate(self,prompt,init_image,width,height,iterations=1,seed=None,
                  image_callback=None, step_callback=None,
                  **kwargs):
-        device_type,scope   = choose_autocast_device(self.model.device)
+        scope = choose_autocast(self.precision)
         make_image          = self.get_make_image(
             prompt,
             init_image    = init_image,
@@ -51,7 +52,7 @@ class Generator():
         results             = []
         seed                = seed if seed else self.new_seed()
         seed, initial_noise = self.generate_initial_noise(seed, width, height)
-        with scope(device_type), self.model.ema_scope():
+        with scope(self.model.device.type), self.model.ema_scope():
             for n in trange(iterations, desc='Generating'):
                 x_T = None
                 if self.variation_amount > 0:
diff --git a/ldm/dream/generator/embiggen.py b/ldm/dream/generator/embiggen.py
index cb9c029a66..4e775a50cc 100644
--- a/ldm/dream/generator/embiggen.py
+++ b/ldm/dream/generator/embiggen.py
@@ -11,8 +11,8 @@ from ldm.models.diffusion.ddim     import DDIMSampler
 from ldm.dream.generator.img2img   import Img2Img
 
 class Embiggen(Generator):
-    def __init__(self,model):
-        super().__init__(model)
+    def __init__(self, model, precision):
+        super().__init__(model, precision)
         self.init_latent         = None
 
     @torch.no_grad()
diff --git a/ldm/dream/generator/img2img.py b/ldm/dream/generator/img2img.py
index 6a1561db6f..f354b59138 100644
--- a/ldm/dream/generator/img2img.py
+++ b/ldm/dream/generator/img2img.py
@@ -4,15 +4,15 @@ ldm.dream.generator.img2img descends from ldm.dream.generator
 
 import torch
 import numpy as  np
-from ldm.dream.devices             import choose_autocast_device
+from ldm.dream.devices             import choose_autocast
 from ldm.dream.generator.base      import Generator
 from ldm.models.diffusion.ddim     import DDIMSampler
 
 class Img2Img(Generator):
-    def __init__(self,model):
-        super().__init__(model)
+    def __init__(self, model, precision):
+        super().__init__(model, precision)
         self.init_latent         = None    # by get_noise()
-    
+
     @torch.no_grad()
     def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
                        conditioning,init_image,strength,step_callback=None,**kwargs):
@@ -32,8 +32,8 @@ class Img2Img(Generator):
             ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
         )
 
-        device_type,scope   = choose_autocast_device(self.model.device)
-        with scope(device_type):
+        scope = choose_autocast(self.precision)
+        with scope(self.model.device.type):
             self.init_latent = self.model.get_first_stage_encoding(
                 self.model.encode_first_stage(init_image)
             ) # move to latent space
diff --git a/ldm/dream/generator/inpaint.py b/ldm/dream/generator/inpaint.py
index 1b25a658b4..248be93bdf 100644
--- a/ldm/dream/generator/inpaint.py
+++ b/ldm/dream/generator/inpaint.py
@@ -5,15 +5,15 @@ ldm.dream.generator.inpaint descends from ldm.dream.generator
 import torch
 import numpy as  np
 from einops import rearrange, repeat
-from ldm.dream.devices             import choose_autocast_device
+from ldm.dream.devices             import choose_autocast
 from ldm.dream.generator.img2img   import Img2Img
 from ldm.models.diffusion.ddim     import DDIMSampler
 
 class Inpaint(Img2Img):
-    def __init__(self,model):
+    def __init__(self, model, precision):
         self.init_latent = None
-        super().__init__(model)
-    
+        super().__init__(model, precision)
+
     @torch.no_grad()
     def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
                        conditioning,init_image,mask_image,strength,
@@ -38,8 +38,8 @@ class Inpaint(Img2Img):
                 ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False
             )
 
-        device_type,scope   = choose_autocast_device(self.model.device)
-        with scope(device_type):
+        scope = choose_autocast(self.precision)
+        with scope(self.model.device.type):
             self.init_latent = self.model.get_first_stage_encoding(
                 self.model.encode_first_stage(init_image)
             ) # move to latent space
diff --git a/ldm/dream/generator/txt2img.py b/ldm/dream/generator/txt2img.py
index d4cd25cb51..0c77705a1c 100644
--- a/ldm/dream/generator/txt2img.py
+++ b/ldm/dream/generator/txt2img.py
@@ -7,9 +7,9 @@ import numpy as  np
 from ldm.dream.generator.base import Generator
 
 class Txt2Img(Generator):
-    def __init__(self,model):
-        super().__init__(model)
-    
+    def __init__(self, model, precision):
+        super().__init__(model, precision)
+
     @torch.no_grad()
     def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
                        conditioning,width,height,step_callback=None,**kwargs):
diff --git a/ldm/generate.py b/ldm/generate.py
index a470648cdc..75c1c0a393 100644
--- a/ldm/generate.py
+++ b/ldm/generate.py
@@ -29,7 +29,7 @@ from ldm.models.diffusion.plms     import PLMSSampler
 from ldm.models.diffusion.ksampler import KSampler
 from ldm.dream.pngwriter           import PngWriter
 from ldm.dream.image_util          import InitImageResizer
-from ldm.dream.devices             import choose_torch_device
+from ldm.dream.devices             import choose_torch_device, choose_precision
 from ldm.dream.conditioning        import get_uc_and_c
 
 def fix_func(orig):
@@ -104,7 +104,7 @@ gr = Generate(
           # these values are set once and shouldn't be changed
           conf        = path to configuration file ('configs/models.yaml')
           model       = symbolic name of the model in the configuration file
-          full_precision = False
+          precision   = float precision to be used
 
           # this value is sticky and maintained between generation calls
           sampler_name   = ['ddim', 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms', 'plms']  // k_lms
@@ -130,6 +130,7 @@ class Generate:
             sampler_name          = 'k_lms',
             ddim_eta              = 0.0,  # deterministic
             full_precision        = False,
+            precision             = 'auto',
             # these are deprecated; if present they override values in the conf file
             weights               = None,
             config                = None,
@@ -145,7 +146,7 @@ class Generate:
         self.cfg_scale      = 7.5
         self.sampler_name   = sampler_name
         self.ddim_eta       = 0.0    # same seed always produces same image
-        self.full_precision = True if choose_torch_device() == 'mps' else full_precision
+        self.precision      = precision
         self.strength       = 0.75
         self.seamless       = False
         self.embedding_path = embedding_path
@@ -162,6 +163,14 @@ class Generate:
         # it wasn't actually doing anything. This logic could be reinstated.
         device_type = choose_torch_device()
         self.device = torch.device(device_type)
+        if full_precision:
+            if self.precision != 'auto':
+              raise ValueError('Remove --full_precision / -F if using --precision')
+            print('Please remove deprecated --full_precision / -F')
+            print('If auto config does not work you can use --precision=float32')
+            self.precision = 'float32'
+        if self.precision == 'auto':
+            self.precision = choose_precision(self.device)
 
         # for VRAM usage statistics
         self.session_peakmem = torch.cuda.max_memory_allocated() if self._has_cuda else None
@@ -440,25 +449,25 @@ class Generate:
     def _make_img2img(self):
         if not self.generators.get('img2img'):
             from ldm.dream.generator.img2img import Img2Img
-            self.generators['img2img'] = Img2Img(self.model)
+            self.generators['img2img'] = Img2Img(self.model, self.precision)
         return self.generators['img2img']
-    
+
     def _make_embiggen(self):
         if not self.generators.get('embiggen'):
             from ldm.dream.generator.embiggen import Embiggen
-            self.generators['embiggen'] = Embiggen(self.model)
+            self.generators['embiggen'] = Embiggen(self.model, self.precision)
         return self.generators['embiggen']
 
     def _make_txt2img(self):
         if not self.generators.get('txt2img'):
             from ldm.dream.generator.txt2img import Txt2Img
-            self.generators['txt2img'] = Txt2Img(self.model)
+            self.generators['txt2img'] = Txt2Img(self.model, self.precision)
         return self.generators['txt2img']
 
     def _make_inpaint(self):
         if not self.generators.get('inpaint'):
             from ldm.dream.generator.inpaint import Inpaint
-            self.generators['inpaint'] = Inpaint(self.model)
+            self.generators['inpaint'] = Inpaint(self.model, self.precision)
         return self.generators['inpaint']
 
     def load_model(self):
@@ -469,7 +478,7 @@ class Generate:
                 model = self._load_model_from_config(self.config, self.weights)
                 if self.embedding_path is not None:
                     model.embedding_manager.load(
-                        self.embedding_path, self.full_precision
+                        self.embedding_path, self.precision == 'float32' or self.precision == 'autocast'
                     )
                 self.model = model.to(self.device)
                 # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
@@ -619,16 +628,13 @@ class Generate:
         sd    = pl_sd['state_dict']
         model = instantiate_from_config(c.model)
         m, u  = model.load_state_dict(sd, strict=False)
-        
-        if self.full_precision:
-            print(
-                '>> Using slower but more accurate full-precision math (--full_precision)'
-            )
+
+        if self.precision == 'float16':
+            print('Using faster float16 precision')
+            model.to(torch.float16)
         else:
-            print(
-                '>> Using half precision math. Call with --full_precision to use more accurate but VRAM-intensive full precision.'
-            )
-            model.half()
+            print('Using more accurate float32 precision')
+
         model.to(self.device)
         model.eval()
 
diff --git a/scripts/dream.py b/scripts/dream.py
index d11c87dc67..f4bd347bc6 100755
--- a/scripts/dream.py
+++ b/scripts/dream.py
@@ -54,6 +54,7 @@ def main():
             sampler_name   = opt.sampler_name,
             embedding_path = opt.embedding_path,
             full_precision = opt.full_precision,
+            precision      = opt.precision,
         )
     except (FileNotFoundError, IOError, KeyError) as e:
         print(f'{e}. Aborting.')
diff --git a/server/application.py b/server/application.py
index 2501f4b63d..58725637a7 100644
--- a/server/application.py
+++ b/server/application.py
@@ -119,7 +119,7 @@ def main():
   #     "height": height,
   #     "sampler_name": opt.sampler_name,
   #     "weights": weights,
-  #     "full_precision": opt.full_precision,
+  #     "precision": opt.precision,
   #     "config": config,
   #     "grid": opt.grid,
   #     "latent_diffusion_weights": opt.laion400m,
diff --git a/server/containers.py b/server/containers.py
index a3318c5ff0..f1e246482f 100644
--- a/server/containers.py
+++ b/server/containers.py
@@ -23,14 +23,14 @@ class Container(containers.DeclarativeContainer):
     model          = config.model,
     sampler_name   = config.sampler_name,
     embedding_path = config.embedding_path,
-    full_precision = config.full_precision
+    precision      = config.precision
     # config = config.model.config,
 
     # width = config.model.width,
     # height = config.model.height,
     # sampler_name = config.model.sampler_name,
     # weights = config.model.weights,
-    # full_precision = config.model.full_precision,
+    # precision = config.model.precision,
     # grid = config.model.grid,
     # seamless = config.model.seamless,
     # embedding_path = config.model.embedding_path,