From dd2aedacaf27d8fe750a342c310bc88de5311931 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 7 Sep 2022 13:23:53 -0400 Subject: [PATCH] report VRAM usage stats during initial model loading (#419) --- ldm/generate.py | 24 ++++++++++++++++++++++++ scripts/dream.py | 4 ---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 9ba72c3676..ebcbfb8752 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -501,12 +501,22 @@ class Generate: def _load_model_from_config(self, config, ckpt): print(f'>> Loading model from {ckpt}') + + # for usage statistics + device_type = choose_torch_device() + if device_type == 'cuda': + torch.cuda.reset_peak_memory_stats() + tic = time.time() + + # this does the work pl_sd = torch.load(ckpt, map_location='cpu') sd = pl_sd['state_dict'] model = instantiate_from_config(config.model) m, u = model.load_state_dict(sd, strict=False) model.to(self.device) model.eval() + + if self.full_precision: print( '>> Using slower but more accurate full-precision math (--full_precision)' @@ -516,6 +526,20 @@ class Generate: '>> Using half precision math. Call with --full_precision to use more accurate but VRAM-intensive full precision.' ) model.half() + + # usage statistics + toc = time.time() + print( + f'>> Model loaded in', '%4.2fs' % (toc - tic) + ) + if device_type == 'cuda': + print( + '>> Max VRAM used to load the model:', + '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9), + '\n>> Current VRAM usage:' + '%4.2fG' % (torch.cuda.memory_allocated() / 1e9), + ) + return model def _load_img(self, path, width, height, fit=False): diff --git a/scripts/dream.py b/scripts/dream.py index 11ab809890..aad7bb4b2f 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -91,11 +91,7 @@ def main(): print(">> changed to seamless tiling mode") # preload the model - tic = time.time() t2i.load_model() - print( - f'>> model loaded in', '%4.2fs' % (time.time() - tic) - ) if not infile: print(