From a5fb8469ed314f8527377b25fd6ce291d782a996 Mon Sep 17 00:00:00 2001 From: xra Date: Fri, 19 Aug 2022 17:23:43 +0900 Subject: [PATCH] use Half precision for reduced memory usage & faster speed This allows users with 6 & 8gb cards to run 512x512 and for even larger resolutions for bigger GPUs I compared the output in Beyond Compare and there are minor differences detected at tolerance 3, but side by side the differences are not perceptible. --- ldm/simplet2i.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py index adcbd6372a..796a99396b 100644 --- a/ldm/simplet2i.py +++ b/ldm/simplet2i.py @@ -256,6 +256,8 @@ class T2I: model = self.load_model() # will instantiate the model or return it from cache + precision_scope = autocast if self.precision=="autocast" else nullcontext + # grid and individual are mutually exclusive, with individual taking priority. # not necessary, but needed for compatability with dream bot if (grid is None): @@ -279,7 +281,8 @@ class T2I: assert os.path.isfile(init_img) init_image = self._load_img(init_img).to(self.device) init_image = repeat(init_image, '1 ... -> b ...', b=batch_size) - init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space + with precision_scope("cuda"): + init_latent = model.get_first_stage_encoding(model.encode_first_stage(init_image)) # move to latent space sampler.make_schedule(ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False) @@ -292,7 +295,6 @@ class T2I: t_enc = int(strength * steps) print(f"target t_enc is {t_enc} steps") - precision_scope = autocast if self.precision=="autocast" else nullcontext images = list() seeds = list() @@ -401,6 +403,7 @@ class T2I: m, u = model.load_state_dict(sd, strict=False) model.cuda() model.eval() + model.half() return model def _load_img(self,path):