added StALKeR779's great model size calculating routine

2024-08-30 20:32:17 +00:00 · 2023-05-08 21:47:03 -04:00 · 2023-05-08 21:47:03 -04:00 · a108155544
commit a108155544
parent c15b49c805
1 changed files with 50 additions and 22 deletions
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -21,15 +21,14 @@ import gc
 import hashlib
 import warnings
 from collections import Counter
-from enum import Enum,auto
+from enum import Enum
 from pathlib import Path
 from psutil import Process
 from typing import Dict, Sequence, Union, Tuple, types
 import torch
 import safetensors.torch
-from diffusers import StableDiffusionPipeline, AutoencoderKL, SchedulerMixin, UNet2DConditionModel
+from diffusers import DiffusionPipeline, StableDiffusionPipeline, AutoencoderKL, SchedulerMixin, UNet2DConditionModel
 from diffusers import logging as diffusers_logging
 from diffusers.pipelines.stable_diffusion.safety_checker import \
    StableDiffusionSafetyChecker
@ -84,7 +83,7 @@ class ModelStatus(Enum):
 # After loading, we will know it exactly.
 # Sizes are in Gigs, estimated for float16; double for float32
 SIZE_GUESSTIMATE = {
-    SDModelType.diffusers: 2.5,
+    SDModelType.diffusers: 2.2,
    SDModelType.vae: 0.35,
    SDModelType.text_encoder: 0.5,
    SDModelType.tokenizer: 0.001,
@ -255,7 +254,6 @@ class ModelCache(object):
            # clean memory to make MemoryUsage() more accurate
            gc.collect()
            with MemoryUsage() as usage:
            model = self._load_model_from_storage(
                repo_id_or_path=repo_id_or_path,
                model_class=model_type.value,
@ -263,9 +261,11 @@ class ModelCache(object):
                revision=revision,
                legacy_info=legacy_info,
            )
-            logger.debug(f'Actual memory used to load model: {(usage.mem_used/GIG):.2f} GB')
+            
-            self.model_sizes[key] = usage.mem_used      # remember size of this model for cache cleansing
+            if mem_used := self.calc_model_size(model):
-            self.current_cache_size += usage.mem_used   # increment size of the cache
+                logger.debug(f'CPU RAM used for load: {(mem_used/GIG):.2f} GB')
                self.model_sizes[key] = mem_used      # remember size of this model for cache cleansing
                self.current_cache_size += mem_used   # increment size of the cache
            # this is a bit of legacy work needed to support the old-style "load this diffuser with custom VAE"
            if model_type==SDModelType.diffusers and attach_model_part[0]:
@ -308,7 +308,10 @@ class ModelCache(object):
                   cache._offload_unlocked_models()
                if  model.device != cache.execution_device:
                    cache.logger.debug(f'Moving {key} into {cache.execution_device}')
                    with VRAMUsage() as mem:
                        model.to(cache.execution_device)  # move into GPU
                    cache.logger.debug(f'GPU VRAM used for load: {(mem.vram_used/GIG):.2f} GB')
                    cache.model_sizes[key] = mem.vram_used   # more accurate size
                cache.logger.debug(f'Locking {key} in {cache.execution_device}')                
                cache._print_cuda_stats()
            else:
@ -479,8 +482,9 @@ class ModelCache(object):
        '''
        # silence transformer and diffuser warnings
        with SilenceWarnings():
            # !!! NOTE: conversion should not happen here, but in ModelManager
            if self.is_legacy_ckpt(repo_id_or_path):
-                model = model_class(self._load_ckpt_from_storage(repo_id_or_path, legacy_info))
+                model = self._load_ckpt_from_storage(repo_id_or_path, legacy_info)
            else:
                model = self._load_diffusers_from_storage(
                    repo_id_or_path,
@ -608,6 +612,30 @@ class ModelCache(object):
            raise KeyError(f"Revision '{revision}' not found in {repo_id}")
        return desired_revisions[0].target_commit
    @staticmethod
    def calc_model_size(model)->int:
        if isinstance(model,DiffusionPipeline):
            return ModelCache._calc_pipeline(model)
        elif isinstance(model,torch.nn.Module):
            return ModelCache._calc_model(model)
        else:
            return None
    @staticmethod
    def _calc_pipeline(pipeline)->int:
        res = 0
        for submodel_key in pipeline.components.keys():
            submodel = getattr(pipeline, submodel_key)
            if submodel is not None and isinstance(submodel, torch.nn.Module):
                res += ModelCache._calc_model(submodel)
        return res
    @staticmethod
    def _calc_model(model)->int:
        mem_params = sum([param.nelement()*param.element_size() for param in model.parameters()])
        mem_bufs = sum([buf.nelement()*buf.element_size() for buf in model.buffers()])
        mem = mem_params + mem_bufs # in bytes
        return mem
 class SilenceWarnings(object):
    def __init__(self):
@ -624,14 +652,14 @@ class SilenceWarnings(object):
        diffusers_logging.set_verbosity(self.diffusers_verbosity)
        warnings.simplefilter('default')
-class MemoryUsage(object):
+class VRAMUsage(object):
    def __init__(self):
-        self.vms = None
+        self.vram = None
-        self.mem_used = 0
+        self.vram_used = 0
    def __enter__(self):
-        self.vms = Process().memory_info().vms
+        self.vram = torch.cuda.memory_allocated()
        return self
    def __exit__(self, *args):
-        self.mem_used = Process().memory_info().vms - self.vms
+        self.vram_used = torch.cuda.memory_allocated() - self.vram