From a10815554480ba9f266c1c97722b623291d06bf7 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lstein@gmail.com>
Date: Mon, 8 May 2023 21:47:03 -0400
Subject: [PATCH] added StALKeR779's great model size calculating routine

---
 .../backend/model_management/model_cache.py   | 72 +++++++++++++------
 1 file changed, 50 insertions(+), 22 deletions(-)

diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index 04af1f43f3..173fd87623 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -21,15 +21,14 @@ import gc
 import hashlib
 import warnings
 from collections import Counter
-from enum import Enum,auto
+from enum import Enum
 from pathlib import Path
-from psutil import Process
 from typing import Dict, Sequence, Union, Tuple, types
 
 import torch
 import safetensors.torch
     
-from diffusers import StableDiffusionPipeline, AutoencoderKL, SchedulerMixin, UNet2DConditionModel
+from diffusers import DiffusionPipeline, StableDiffusionPipeline, AutoencoderKL, SchedulerMixin, UNet2DConditionModel
 from diffusers import logging as diffusers_logging
 from diffusers.pipelines.stable_diffusion.safety_checker import \
     StableDiffusionSafetyChecker
@@ -84,7 +83,7 @@ class ModelStatus(Enum):
 # After loading, we will know it exactly.
 # Sizes are in Gigs, estimated for float16; double for float32
 SIZE_GUESSTIMATE = {
-    SDModelType.diffusers: 2.5,
+    SDModelType.diffusers: 2.2,
     SDModelType.vae: 0.35,
     SDModelType.text_encoder: 0.5,
     SDModelType.tokenizer: 0.001,
@@ -255,17 +254,18 @@ class ModelCache(object):
 
             # clean memory to make MemoryUsage() more accurate
             gc.collect()
-            with MemoryUsage() as usage:
-                model = self._load_model_from_storage(
-                    repo_id_or_path=repo_id_or_path,
-                    model_class=model_type.value,
-                    subfolder=subfolder,
-                    revision=revision,
-                    legacy_info=legacy_info,
-                )
-            logger.debug(f'Actual memory used to load model: {(usage.mem_used/GIG):.2f} GB')
-            self.model_sizes[key] = usage.mem_used      # remember size of this model for cache cleansing
-            self.current_cache_size += usage.mem_used   # increment size of the cache
+            model = self._load_model_from_storage(
+                repo_id_or_path=repo_id_or_path,
+                model_class=model_type.value,
+                subfolder=subfolder,
+                revision=revision,
+                legacy_info=legacy_info,
+            )
+            
+            if mem_used := self.calc_model_size(model):
+                logger.debug(f'CPU RAM used for load: {(mem_used/GIG):.2f} GB')
+                self.model_sizes[key] = mem_used      # remember size of this model for cache cleansing
+                self.current_cache_size += mem_used   # increment size of the cache
             
             # this is a bit of legacy work needed to support the old-style "load this diffuser with custom VAE"
             if model_type==SDModelType.diffusers and attach_model_part[0]:
@@ -308,7 +308,10 @@ class ModelCache(object):
                    cache._offload_unlocked_models()
                 if  model.device != cache.execution_device:
                     cache.logger.debug(f'Moving {key} into {cache.execution_device}')
-                    model.to(cache.execution_device)  # move into GPU
+                    with VRAMUsage() as mem:
+                        model.to(cache.execution_device)  # move into GPU
+                    cache.logger.debug(f'GPU VRAM used for load: {(mem.vram_used/GIG):.2f} GB')
+                    cache.model_sizes[key] = mem.vram_used   # more accurate size
                 cache.logger.debug(f'Locking {key} in {cache.execution_device}')                
                 cache._print_cuda_stats()
             else:
@@ -479,8 +482,9 @@ class ModelCache(object):
         '''
         # silence transformer and diffuser warnings
         with SilenceWarnings():
+            # !!! NOTE: conversion should not happen here, but in ModelManager
             if self.is_legacy_ckpt(repo_id_or_path):
-                model = model_class(self._load_ckpt_from_storage(repo_id_or_path, legacy_info))
+                model = self._load_ckpt_from_storage(repo_id_or_path, legacy_info)
             else:
                 model = self._load_diffusers_from_storage(
                     repo_id_or_path,
@@ -608,6 +612,30 @@ class ModelCache(object):
             raise KeyError(f"Revision '{revision}' not found in {repo_id}")
         return desired_revisions[0].target_commit
 
+    @staticmethod
+    def calc_model_size(model)->int:
+        if isinstance(model,DiffusionPipeline):
+            return ModelCache._calc_pipeline(model)
+        elif isinstance(model,torch.nn.Module):
+            return ModelCache._calc_model(model)
+        else:
+            return None
+
+    @staticmethod
+    def _calc_pipeline(pipeline)->int:
+        res = 0
+        for submodel_key in pipeline.components.keys():
+            submodel = getattr(pipeline, submodel_key)
+            if submodel is not None and isinstance(submodel, torch.nn.Module):
+                res += ModelCache._calc_model(submodel)
+        return res
+    
+    @staticmethod
+    def _calc_model(model)->int:
+        mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
+        mem_bufs = sum([buf.nelement()*buf.element_size() for buf in model.buffers()])
+        mem = mem_params + mem_bufs # in bytes
+        return mem
 
 class SilenceWarnings(object):
     def __init__(self):
@@ -624,14 +652,14 @@ class SilenceWarnings(object):
         diffusers_logging.set_verbosity(self.diffusers_verbosity)
         warnings.simplefilter('default')
 
-class MemoryUsage(object):
+class VRAMUsage(object):
     def __init__(self):
-        self.vms = None
-        self.mem_used = 0
+        self.vram = None
+        self.vram_used = 0
         
     def __enter__(self):
-        self.vms = Process().memory_info().vms
+        self.vram = torch.cuda.memory_allocated()
         return self
 
     def __exit__(self, *args):
-        self.mem_used = Process().memory_info().vms - self.vms
+        self.vram_used = torch.cuda.memory_allocated() - self.vram