added memory used to load models

2024-08-30 20:32:17 +00:00 · 2023-08-15 21:56:19 -04:00
parent ec10aca91e
commit f9958de6be
2 changed files with 26 additions and 17 deletions
--- a/invokeai/app/services/invocation_stats.py
+++ b/invokeai/app/services/invocation_stats.py
@ -256,22 +256,27 @@ class InvocationStatsService(InvocationStatsServiceBase):
            logger.info(f"Graph stats: {graph_id}")
            logger.info("Node                 Calls    Seconds VRAM Used")
            for node_type, stats in self._stats[graph_id].nodes.items():
-                logger.info(f"{node_type:<20} {stats.calls:>5}   {stats.time_used:7.3f}s     {stats.max_vram:4.2f}G")
+                logger.info(f"{node_type:<20} {stats.calls:>5}   {stats.time_used:7.3f}s     {stats.max_vram:4.3f}G")
                total_time += stats.time_used

-            logger.info(f"TOTAL GRAPH EXECUTION TIME:  {total_time:7.3f}s")
-            logger.info("RAM used: " + "%4.2fG" % stats.ram_used + f" (delta={stats.ram_changed:4.2f}G)")
-            if torch.cuda.is_available():
-                logger.info("VRAM used (all processes): " + "%4.2fG" % (torch.cuda.memory_allocated() / GIG))
            cache_stats = self._cache_stats[graph_id]
+            hwm = cache_stats.high_watermark / GIG
+            tot = cache_stats.cache_size / GIG
+            loaded = sum([v for v in cache_stats.loaded_model_sizes.values()]) / GIG
+
+            logger.info(f"TOTAL GRAPH EXECUTION TIME:  {total_time:7.3f}s")
+            logger.info(
+                "RAM used by InvokeAI process: " + "%4.2fG" % stats.ram_used + f" (delta={stats.ram_changed:4.2f}G)"
+            )
+            logger.info(f"RAM used to load models: {loaded:4.2f}G")
+            if torch.cuda.is_available():
+                logger.info("VRAM in use: " + "%4.3fG" % (torch.cuda.memory_allocated() / GIG))
            logger.info("RAM cache statistics:")
            logger.info(f"   Model cache hits: {cache_stats.hits}")
            logger.info(f"   Model cache misses: {cache_stats.misses}")
            logger.info(f"   Models cached: {cache_stats.in_cache}")
            logger.info(f"   Models cleared from cache: {cache_stats.cleared}")
-            hwm = cache_stats.high_watermark / GIG
-            tot = cache_stats.cache_size / GIG
-            logger.info(f"   Cache RAM usage: {hwm:4.2f}/{tot:4.2f}G")
+            logger.info(f"   Cache high water mark: {hwm:4.2f}/{tot:4.2f}G")

            completed.add(graph_id)

--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@ -21,7 +21,7 @@ import os
 import sys
 import hashlib
 from contextlib import suppress
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Dict, Union, types, Optional, Type, Any

@ -43,12 +43,14 @@ GIG = 1073741824

@dataclass
 class CacheStats(object):
-    hits: int = 0
-    misses: int = 0
-    high_watermark: int = 0
-    in_cache: int = 0
-    cleared: int = 0
-    cache_size: int = 0
+    hits: int = 0  # cache hits
+    misses: int = 0  # cache misses
+    high_watermark: int = 0  # amount of cache used
+    in_cache: int = 0  # number of models in cache
+    cleared: int = 0  # number of models cleared to make space
+    cache_size: int = 0  # total size of cache
+    # {submodel_key => size}
+    loaded_model_sizes: Dict[str, int] = field(default_factory=dict)


 class ModelLocker(object):
@ -194,7 +196,6 @@ class ModelCache(object):
            model_type=model_type,
            submodel_type=submodel,
        )
-
        # TODO: lock for no copies on simultaneous calls?
        cache_entry = self._cached_models.get(key, None)
        if cache_entry is None:
@ -219,11 +220,14 @@ class ModelCache(object):
        else:
            if self.stats:
                self.stats.hits += 1
-                self.stats.cache_size = self.max_cache_size * GIG

        if self.stats:
+            self.stats.cache_size = self.max_cache_size * GIG
            self.stats.high_watermark = max(self.stats.high_watermark, self._cache_size())
            self.stats.in_cache = len(self._cached_models)
+            self.stats.loaded_model_sizes[key] = max(
+                self.stats.loaded_model_sizes.get("key", 0), model_info.get_size(submodel)
+            )

        with suppress(Exception):
            self._cache_stack.remove(key)