From d176fb07cda8cc591f8c0e34b9bdc42374475dbb Mon Sep 17 00:00:00 2001 From: Mihail Dumitrescu Date: Sat, 17 Sep 2022 20:56:25 +0300 Subject: [PATCH] Replace --full_precision with --precision that works even if not specified Allowed values are 'auto', 'float32', 'autocast', 'float16'. If not specified or 'auto' a working precision is automatically selected based on the torch device. Context: #526 Deprecated --full_precision / -F Tested on both cuda and cpu by calling scripts/dream.py without arguments and checked the auto configuration worked. With --precision=auto/float32/autocast/float16 it performs as expected, either working or failing with a reasonable error. Also checked Img2Img. --- .../test_regression_txt2img_dream_v1_4.sh | 3 +- .github/workflows/test-dream-conda.yml | 4 +- README.md | 18 ++++---- docs/features/CLI.md | 2 +- docs/features/TEXTUAL_INVERSION.md | 4 +- docs/index.md | 16 +++---- docs/installation/INSTALL_MAC.md | 4 +- ldm/dream/args.py | 18 +++++++- ldm/dream/devices.py | 29 +++++++------ ldm/dream/generator/base.py | 9 ++-- ldm/dream/generator/embiggen.py | 4 +- ldm/dream/generator/img2img.py | 12 +++--- ldm/dream/generator/inpaint.py | 12 +++--- ldm/dream/generator/txt2img.py | 6 +-- ldm/generate.py | 42 +++++++++++-------- scripts/dream.py | 1 + server/application.py | 2 +- server/containers.py | 4 +- 18 files changed, 108 insertions(+), 82 deletions(-) diff --git a/.dev_scripts/test_regression_txt2img_dream_v1_4.sh b/.dev_scripts/test_regression_txt2img_dream_v1_4.sh index 11cbf8f14b..9326d3c311 100644 --- a/.dev_scripts/test_regression_txt2img_dream_v1_4.sh +++ b/.dev_scripts/test_regression_txt2img_dream_v1_4.sh @@ -5,8 +5,7 @@ SAMPLES_DIR=${OUT_DIR} python scripts/dream.py \ --from_file ${PROMPT_FILE} \ --outdir ${OUT_DIR} \ - --sampler plms \ - --full_precision + --sampler plms # original output by CompVis/stable-diffusion IMAGE1=".dev_scripts/images/v1_4_astronaut_rides_horse_plms_step50_seed42.png" diff --git a/.github/workflows/test-dream-conda.yml b/.github/workflows/test-dream-conda.yml index 3bd9b24582..6c51ebe718 100644 --- a/.github/workflows/test-dream-conda.yml +++ b/.github/workflows/test-dream-conda.yml @@ -85,9 +85,9 @@ jobs: fi # Utterly hacky, but I don't know how else to do this if [[ ${{ github.ref }} == 'refs/heads/master' ]]; then - time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/preflight_prompts.txt --full_precision + time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/preflight_prompts.txt elif [[ ${{ github.ref }} == 'refs/heads/development' ]]; then - time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/dev_prompts.txt --full_precision + time ${{ steps.vars.outputs.PYTHON_BIN }} scripts/dream.py --from_file tests/dev_prompts.txt fi mkdir -p outputs/img-samples - name: Archive results diff --git a/README.md b/README.md index 7b4ffa76a1..a0cc302e03 100644 --- a/README.md +++ b/README.md @@ -86,17 +86,14 @@ You wil need one of the following: - At least 6 GB of free disk space for the machine learning model, Python, and all its dependencies. -> Note -> -> If you have an Nvidia 10xx series card (e.g. the 1080ti), please run the dream script in -> full-precision mode as shown below. +#### Note -Similarly, specify full-precision mode on Apple M1 hardware. - -To run in full-precision mode, start `dream.py` with the `--full_precision` flag: +Precision is auto configured based on the device. If however you encounter +errors like 'expected type Float but found Half' or 'not implemented for Half' +you can try starting `dream.py` with the `--precision=float32` flag: ```bash -(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision +(ldm) ~/stable-diffusion$ python scripts/dream.py --precision=float32 ``` ### Features @@ -125,6 +122,11 @@ To run in full-precision mode, start `dream.py` with the `--full_precision` flag ### Latest Changes +- vNEXT (TODO 2022) + + - Deprecated `--full_precision` / `-F`. Simply omit it and `dream.py` will auto + configure. To switch away from auto use the new flag like `--precision=float32`. + - v1.14 (11 September 2022) - Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs. diff --git a/docs/features/CLI.md b/docs/features/CLI.md index 5f7cdaf162..cf49f68b70 100644 --- a/docs/features/CLI.md +++ b/docs/features/CLI.md @@ -74,7 +74,7 @@ prompt arguments] (#list-of-prompt-arguments). Others | --prompt_as_dir | -p | False | Name output directories using the prompt text. | | --from_file | | None | Read list of prompts from a file. Use "-" to read from standard input | | --model | | stable-diffusion-1.4 | Loads model specified in configs/models.yaml. Currently one of "stable-diffusion-1.4" or "laion400m" | -| --full_precision | -F | False | Run in slower full-precision mode. Needed for Macintosh M1/M2 hardware and some older video cards. | +| --precision | | auto | Set to a specific precision. Rare but you may need to switch to 'float32' on some video cards. | | --web | | False | Start in web server mode | | --host | | localhost | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any. | | --port | | 9090 | Which port web server should listen for requests on. | diff --git a/docs/features/TEXTUAL_INVERSION.md b/docs/features/TEXTUAL_INVERSION.md index b8dbc21192..f020807a12 100644 --- a/docs/features/TEXTUAL_INVERSION.md +++ b/docs/features/TEXTUAL_INVERSION.md @@ -57,9 +57,7 @@ Once the model is trained, specify the trained .pt or .bin file when starting dream using ```bash -python3 ./scripts/dream.py \ - --embedding_path /path/to/embedding.pt \ - --full_precision +python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt ``` Then, to utilize your subject at the dream prompt diff --git a/docs/index.md b/docs/index.md index bdde3cabd7..1f5a6702dc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -62,15 +62,12 @@ You wil need one of the following: ### Note -If you are have a Nvidia 10xx series card (e.g. the 1080ti), please run the dream script in -full-precision mode as shown below. - -Similarly, specify full-precision mode on Apple M1 hardware. - -To run in full-precision mode, start `dream.py` with the `--full_precision` flag: +Precision is auto configured based on the device. If however you encounter +errors like 'expected type Float but found Half' or 'not implemented for Half' +you can try starting `dream.py` with the `--precision=float32` flag: ```bash -(ldm) ~/stable-diffusion$ python scripts/dream.py --full_precision +(ldm) ~/stable-diffusion$ python scripts/dream.py --precision=float32 ``` ## Features @@ -98,6 +95,11 @@ To run in full-precision mode, start `dream.py` with the `--full_precision` flag ## Latest Changes +### vNEXT (TODO 2022) + + - Deprecated `--full_precision` / `-F`. Simply omit it and `dream.py` will auto + configure. To switch away from auto use the new flag like `--precision=float32`. + ### v1.14 (11 September 2022) - Memory optimizations for small-RAM cards. 512x512 now possible on 4 GB GPUs. diff --git a/docs/installation/INSTALL_MAC.md b/docs/installation/INSTALL_MAC.md index 69bf78fa00..9904e05050 100644 --- a/docs/installation/INSTALL_MAC.md +++ b/docs/installation/INSTALL_MAC.md @@ -97,7 +97,7 @@ conda activate ldm python scripts/preload_models.py # run SD! -python scripts/dream.py --full_precision # half-precision requires autocast and won't work +python scripts/dream.py # or run the web interface! python scripts/dream.py --web @@ -453,5 +453,3 @@ Abort trap: 6 warnings.warn('resource_tracker: There appear to be %d ' ``` -Macs do not support `autocast/mixed-precision`, so you need to supply -`--full_precision` to use float32 everywhere. diff --git a/ldm/dream/args.py b/ldm/dream/args.py index ada8975e96..9eba72f115 100644 --- a/ldm/dream/args.py +++ b/ldm/dream/args.py @@ -100,6 +100,13 @@ SAMPLER_CHOICES = [ 'plms', ] +PRECISION_CHOICES = [ + 'auto', + 'float32', + 'autocast', + 'float16', +] + # is there a way to pick this up during git commits? APP_ID = 'lstein/stable-diffusion' APP_VERSION = 'v1.15' @@ -322,7 +329,16 @@ class Args(object): '--full_precision', dest='full_precision', action='store_true', - help='Use more memory-intensive full precision math for calculations', + help='Deprecated way to set --precision=float32', + ) + model_group.add_argument( + '--precision', + dest='precision', + type=str, + choices=PRECISION_CHOICES, + metavar='PRECISION', + help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}', + default='auto', ) file_group.add_argument( '--from_file', diff --git a/ldm/dream/devices.py b/ldm/dream/devices.py index a92cfcbf60..424ae5a6d3 100644 --- a/ldm/dream/devices.py +++ b/ldm/dream/devices.py @@ -1,6 +1,6 @@ import torch from torch import autocast -from contextlib import contextmanager, nullcontext +from contextlib import nullcontext def choose_torch_device() -> str: '''Convenience routine for guessing which GPU device to run model on''' @@ -10,15 +10,18 @@ def choose_torch_device() -> str: return 'mps' return 'cpu' -def choose_autocast_device(device): - '''Returns an autocast compatible device from a torch device''' - device_type = device.type # this returns 'mps' on M1 - # autocast only for cuda, but GTX 16xx have issues with it - if device_type == 'cuda': - device_name = torch.cuda.get_device_name() - if 'GeForce GTX 1660' in device_name or 'GeForce GTX 1650' in device_name: - return device_type,nullcontext - else: - return device_type,autocast - else: - return 'cpu',nullcontext +def choose_precision(device) -> str: + '''Returns an appropriate precision for the given torch device''' + if device.type == 'cuda': + device_name = torch.cuda.get_device_name(device) + if not ('GeForce GTX 1660' in device_name or 'GeForce GTX 1650' in device_name): + return 'float16' + return 'float32' + +def choose_autocast(precision): + '''Returns an autocast context or nullcontext for the given precision string''' + # float16 currently requires autocast to avoid errors like: + # 'expected scalar type Half but found Float' + if precision == 'autocast' or precision == 'float16': + return autocast + return nullcontext diff --git a/ldm/dream/generator/base.py b/ldm/dream/generator/base.py index 9bed3df719..af98dea6c2 100644 --- a/ldm/dream/generator/base.py +++ b/ldm/dream/generator/base.py @@ -9,13 +9,14 @@ from tqdm import tqdm, trange from PIL import Image from einops import rearrange, repeat from pytorch_lightning import seed_everything -from ldm.dream.devices import choose_autocast_device +from ldm.dream.devices import choose_autocast downsampling = 8 class Generator(): - def __init__(self,model): + def __init__(self, model, precision): self.model = model + self.precision = precision self.seed = None self.latent_channels = model.channels self.downsampling_factor = downsampling # BUG: should come from model or config @@ -38,7 +39,7 @@ class Generator(): def generate(self,prompt,init_image,width,height,iterations=1,seed=None, image_callback=None, step_callback=None, **kwargs): - device_type,scope = choose_autocast_device(self.model.device) + scope = choose_autocast(self.precision) make_image = self.get_make_image( prompt, init_image = init_image, @@ -51,7 +52,7 @@ class Generator(): results = [] seed = seed if seed else self.new_seed() seed, initial_noise = self.generate_initial_noise(seed, width, height) - with scope(device_type), self.model.ema_scope(): + with scope(self.model.device.type), self.model.ema_scope(): for n in trange(iterations, desc='Generating'): x_T = None if self.variation_amount > 0: diff --git a/ldm/dream/generator/embiggen.py b/ldm/dream/generator/embiggen.py index cb9c029a66..4e775a50cc 100644 --- a/ldm/dream/generator/embiggen.py +++ b/ldm/dream/generator/embiggen.py @@ -11,8 +11,8 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.dream.generator.img2img import Img2Img class Embiggen(Generator): - def __init__(self,model): - super().__init__(model) + def __init__(self, model, precision): + super().__init__(model, precision) self.init_latent = None @torch.no_grad() diff --git a/ldm/dream/generator/img2img.py b/ldm/dream/generator/img2img.py index 6a1561db6f..f354b59138 100644 --- a/ldm/dream/generator/img2img.py +++ b/ldm/dream/generator/img2img.py @@ -4,15 +4,15 @@ ldm.dream.generator.img2img descends from ldm.dream.generator import torch import numpy as np -from ldm.dream.devices import choose_autocast_device +from ldm.dream.devices import choose_autocast from ldm.dream.generator.base import Generator from ldm.models.diffusion.ddim import DDIMSampler class Img2Img(Generator): - def __init__(self,model): - super().__init__(model) + def __init__(self, model, precision): + super().__init__(model, precision) self.init_latent = None # by get_noise() - + @torch.no_grad() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, conditioning,init_image,strength,step_callback=None,**kwargs): @@ -32,8 +32,8 @@ class Img2Img(Generator): ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False ) - device_type,scope = choose_autocast_device(self.model.device) - with scope(device_type): + scope = choose_autocast(self.precision) + with scope(self.model.device.type): self.init_latent = self.model.get_first_stage_encoding( self.model.encode_first_stage(init_image) ) # move to latent space diff --git a/ldm/dream/generator/inpaint.py b/ldm/dream/generator/inpaint.py index 1b25a658b4..248be93bdf 100644 --- a/ldm/dream/generator/inpaint.py +++ b/ldm/dream/generator/inpaint.py @@ -5,15 +5,15 @@ ldm.dream.generator.inpaint descends from ldm.dream.generator import torch import numpy as np from einops import rearrange, repeat -from ldm.dream.devices import choose_autocast_device +from ldm.dream.devices import choose_autocast from ldm.dream.generator.img2img import Img2Img from ldm.models.diffusion.ddim import DDIMSampler class Inpaint(Img2Img): - def __init__(self,model): + def __init__(self, model, precision): self.init_latent = None - super().__init__(model) - + super().__init__(model, precision) + @torch.no_grad() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, conditioning,init_image,mask_image,strength, @@ -38,8 +38,8 @@ class Inpaint(Img2Img): ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False ) - device_type,scope = choose_autocast_device(self.model.device) - with scope(device_type): + scope = choose_autocast(self.precision) + with scope(self.model.device.type): self.init_latent = self.model.get_first_stage_encoding( self.model.encode_first_stage(init_image) ) # move to latent space diff --git a/ldm/dream/generator/txt2img.py b/ldm/dream/generator/txt2img.py index d4cd25cb51..0c77705a1c 100644 --- a/ldm/dream/generator/txt2img.py +++ b/ldm/dream/generator/txt2img.py @@ -7,9 +7,9 @@ import numpy as np from ldm.dream.generator.base import Generator class Txt2Img(Generator): - def __init__(self,model): - super().__init__(model) - + def __init__(self, model, precision): + super().__init__(model, precision) + @torch.no_grad() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, conditioning,width,height,step_callback=None,**kwargs): diff --git a/ldm/generate.py b/ldm/generate.py index a470648cdc..75c1c0a393 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -29,7 +29,7 @@ from ldm.models.diffusion.plms import PLMSSampler from ldm.models.diffusion.ksampler import KSampler from ldm.dream.pngwriter import PngWriter from ldm.dream.image_util import InitImageResizer -from ldm.dream.devices import choose_torch_device +from ldm.dream.devices import choose_torch_device, choose_precision from ldm.dream.conditioning import get_uc_and_c def fix_func(orig): @@ -104,7 +104,7 @@ gr = Generate( # these values are set once and shouldn't be changed conf = path to configuration file ('configs/models.yaml') model = symbolic name of the model in the configuration file - full_precision = False + precision = float precision to be used # this value is sticky and maintained between generation calls sampler_name = ['ddim', 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms', 'plms'] // k_lms @@ -130,6 +130,7 @@ class Generate: sampler_name = 'k_lms', ddim_eta = 0.0, # deterministic full_precision = False, + precision = 'auto', # these are deprecated; if present they override values in the conf file weights = None, config = None, @@ -145,7 +146,7 @@ class Generate: self.cfg_scale = 7.5 self.sampler_name = sampler_name self.ddim_eta = 0.0 # same seed always produces same image - self.full_precision = True if choose_torch_device() == 'mps' else full_precision + self.precision = precision self.strength = 0.75 self.seamless = False self.embedding_path = embedding_path @@ -162,6 +163,14 @@ class Generate: # it wasn't actually doing anything. This logic could be reinstated. device_type = choose_torch_device() self.device = torch.device(device_type) + if full_precision: + if self.precision != 'auto': + raise ValueError('Remove --full_precision / -F if using --precision') + print('Please remove deprecated --full_precision / -F') + print('If auto config does not work you can use --precision=float32') + self.precision = 'float32' + if self.precision == 'auto': + self.precision = choose_precision(self.device) # for VRAM usage statistics self.session_peakmem = torch.cuda.max_memory_allocated() if self._has_cuda else None @@ -440,25 +449,25 @@ class Generate: def _make_img2img(self): if not self.generators.get('img2img'): from ldm.dream.generator.img2img import Img2Img - self.generators['img2img'] = Img2Img(self.model) + self.generators['img2img'] = Img2Img(self.model, self.precision) return self.generators['img2img'] - + def _make_embiggen(self): if not self.generators.get('embiggen'): from ldm.dream.generator.embiggen import Embiggen - self.generators['embiggen'] = Embiggen(self.model) + self.generators['embiggen'] = Embiggen(self.model, self.precision) return self.generators['embiggen'] def _make_txt2img(self): if not self.generators.get('txt2img'): from ldm.dream.generator.txt2img import Txt2Img - self.generators['txt2img'] = Txt2Img(self.model) + self.generators['txt2img'] = Txt2Img(self.model, self.precision) return self.generators['txt2img'] def _make_inpaint(self): if not self.generators.get('inpaint'): from ldm.dream.generator.inpaint import Inpaint - self.generators['inpaint'] = Inpaint(self.model) + self.generators['inpaint'] = Inpaint(self.model, self.precision) return self.generators['inpaint'] def load_model(self): @@ -469,7 +478,7 @@ class Generate: model = self._load_model_from_config(self.config, self.weights) if self.embedding_path is not None: model.embedding_manager.load( - self.embedding_path, self.full_precision + self.embedding_path, self.precision == 'float32' or self.precision == 'autocast' ) self.model = model.to(self.device) # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here @@ -619,16 +628,13 @@ class Generate: sd = pl_sd['state_dict'] model = instantiate_from_config(c.model) m, u = model.load_state_dict(sd, strict=False) - - if self.full_precision: - print( - '>> Using slower but more accurate full-precision math (--full_precision)' - ) + + if self.precision == 'float16': + print('Using faster float16 precision') + model.to(torch.float16) else: - print( - '>> Using half precision math. Call with --full_precision to use more accurate but VRAM-intensive full precision.' - ) - model.half() + print('Using more accurate float32 precision') + model.to(self.device) model.eval() diff --git a/scripts/dream.py b/scripts/dream.py index d11c87dc67..f4bd347bc6 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -54,6 +54,7 @@ def main(): sampler_name = opt.sampler_name, embedding_path = opt.embedding_path, full_precision = opt.full_precision, + precision = opt.precision, ) except (FileNotFoundError, IOError, KeyError) as e: print(f'{e}. Aborting.') diff --git a/server/application.py b/server/application.py index 2501f4b63d..58725637a7 100644 --- a/server/application.py +++ b/server/application.py @@ -119,7 +119,7 @@ def main(): # "height": height, # "sampler_name": opt.sampler_name, # "weights": weights, - # "full_precision": opt.full_precision, + # "precision": opt.precision, # "config": config, # "grid": opt.grid, # "latent_diffusion_weights": opt.laion400m, diff --git a/server/containers.py b/server/containers.py index a3318c5ff0..f1e246482f 100644 --- a/server/containers.py +++ b/server/containers.py @@ -23,14 +23,14 @@ class Container(containers.DeclarativeContainer): model = config.model, sampler_name = config.sampler_name, embedding_path = config.embedding_path, - full_precision = config.full_precision + precision = config.precision # config = config.model.config, # width = config.model.width, # height = config.model.height, # sampler_name = config.model.sampler_name, # weights = config.model.weights, - # full_precision = config.model.full_precision, + # precision = config.model.precision, # grid = config.model.grid, # seamless = config.model.seamless, # embedding_path = config.model.embedding_path,