report RAM and RAM cache statistics

This commit is contained in:
Lincoln Stein 2023-08-15 21:00:30 -04:00
parent a4b029d03c
commit ec10aca91e
4 changed files with 115 additions and 36 deletions

View File

@ -43,6 +43,11 @@ import invokeai.backend.util.logging as logger
from ..invocations.baseinvocation import BaseInvocation from ..invocations.baseinvocation import BaseInvocation
from .graph import GraphExecutionState from .graph import GraphExecutionState
from .item_storage import ItemStorageABC from .item_storage import ItemStorageABC
from .model_manager_service import ModelManagerService
from invokeai.backend.model_management.model_cache import CacheStats
# size of GIG in bytes
GIG = 1073741824
class InvocationStatsServiceBase(ABC): class InvocationStatsServiceBase(ABC):
@ -84,7 +89,8 @@ class InvocationStatsServiceBase(ABC):
pass pass
@abstractmethod @abstractmethod
def update_invocation_stats(self, def update_invocation_stats(
self,
graph_id: str, graph_id: str,
invocation_type: str, invocation_type: str,
time_used: float, time_used: float,
@ -119,6 +125,9 @@ class NodeStats:
calls: int = 0 calls: int = 0
time_used: float = 0.0 # seconds time_used: float = 0.0 # seconds
max_vram: float = 0.0 # GB max_vram: float = 0.0 # GB
cache_hits: int = 0
cache_misses: int = 0
cache_high_watermark: int = 0
@dataclass @dataclass
@ -137,36 +146,50 @@ class InvocationStatsService(InvocationStatsServiceBase):
self.graph_execution_manager = graph_execution_manager self.graph_execution_manager = graph_execution_manager
# {graph_id => NodeLog} # {graph_id => NodeLog}
self._stats: Dict[str, NodeLog] = {} self._stats: Dict[str, NodeLog] = {}
self._cache_stats: Dict[str, CacheStats] = {}
class StatsContext: class StatsContext:
def __init__(self, invocation: BaseInvocation, graph_id: str, collector: "InvocationStatsServiceBase"): """Context manager for collecting statistics."""
def __init__(
self,
invocation: BaseInvocation,
graph_id: str,
model_manager: ModelManagerService,
collector: "InvocationStatsServiceBase",
):
"""Initialize statistics for this run."""
self.invocation = invocation self.invocation = invocation
self.collector = collector self.collector = collector
self.graph_id = graph_id self.graph_id = graph_id
self.start_time = 0 self.start_time = 0
self.ram_info = None self.ram_used = 0
self.model_manager = model_manager
def __enter__(self): def __enter__(self):
self.start_time = time.time() self.start_time = time.time()
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
self.ram_info = psutil.virtual_memory() self.ram_used = psutil.Process().memory_info().rss
self.model_manager.collect_cache_stats(self.collector._cache_stats[self.graph_id])
def __exit__(self, *args): def __exit__(self, *args):
"""Called on exit from the context."""
ram_used = psutil.Process().memory_info().rss
self.collector.update_invocation_stats( self.collector.update_invocation_stats(
graph_id = self.graph_id, graph_id=self.graph_id,
invocation_type = self.invocation.type, invocation_type=self.invocation.type,
time_used = time.time() - self.start_time, time_used=time.time() - self.start_time,
vram_used = torch.cuda.max_memory_allocated() / 1e9 if torch.cuda.is_available() else 0.0, vram_used=torch.cuda.max_memory_allocated() / GIG if torch.cuda.is_available() else 0.0,
ram_used = psutil.virtual_memory().used / 1e9, ram_used=ram_used / GIG,
ram_changed = (psutil.virtual_memory().used - self.ram_info.used) / 1e9, ram_changed=(ram_used - self.ram_used) / GIG,
) )
def collect_stats( def collect_stats(
self, self,
invocation: BaseInvocation, invocation: BaseInvocation,
graph_execution_state_id: str, graph_execution_state_id: str,
model_manager: ModelManagerService,
) -> StatsContext: ) -> StatsContext:
""" """
Return a context object that will capture the statistics. Return a context object that will capture the statistics.
@ -175,7 +198,8 @@ class InvocationStatsService(InvocationStatsServiceBase):
""" """
if not self._stats.get(graph_execution_state_id): # first time we're seeing this if not self._stats.get(graph_execution_state_id): # first time we're seeing this
self._stats[graph_execution_state_id] = NodeLog() self._stats[graph_execution_state_id] = NodeLog()
return self.StatsContext(invocation, graph_execution_state_id, self) self._cache_stats[graph_execution_state_id] = CacheStats()
return self.StatsContext(invocation, graph_execution_state_id, model_manager, self)
def reset_all_stats(self): def reset_all_stats(self):
"""Zero all statistics""" """Zero all statistics"""
@ -188,7 +212,8 @@ class InvocationStatsService(InvocationStatsServiceBase):
except KeyError: except KeyError:
logger.warning(f"Attempted to clear statistics for unknown graph {graph_execution_id}") logger.warning(f"Attempted to clear statistics for unknown graph {graph_execution_id}")
def update_invocation_stats(self, def update_invocation_stats(
self,
graph_id: str, graph_id: str,
invocation_type: str, invocation_type: str,
time_used: float, time_used: float,
@ -218,7 +243,7 @@ class InvocationStatsService(InvocationStatsServiceBase):
def log_stats(self): def log_stats(self):
""" """
Send the statistics to the system logger at the info level. Send the statistics to the system logger at the info level.
Stats will only be printed if when the execution of the graph Stats will only be printed when the execution of the graph
is complete. is complete.
""" """
completed = set() completed = set()
@ -235,11 +260,21 @@ class InvocationStatsService(InvocationStatsServiceBase):
total_time += stats.time_used total_time += stats.time_used
logger.info(f"TOTAL GRAPH EXECUTION TIME: {total_time:7.3f}s") logger.info(f"TOTAL GRAPH EXECUTION TIME: {total_time:7.3f}s")
logger.info("Current RAM used: " + "%4.2fG" % stats.ram_used + f" (delta={stats.ram_changed:4.2f}G)") logger.info("RAM used: " + "%4.2fG" % stats.ram_used + f" (delta={stats.ram_changed:4.2f}G)")
if torch.cuda.is_available(): if torch.cuda.is_available():
logger.info("Current VRAM used: " + "%4.2fG" % (torch.cuda.memory_allocated() / 1e9)) logger.info("VRAM used (all processes): " + "%4.2fG" % (torch.cuda.memory_allocated() / GIG))
cache_stats = self._cache_stats[graph_id]
logger.info("RAM cache statistics:")
logger.info(f" Model cache hits: {cache_stats.hits}")
logger.info(f" Model cache misses: {cache_stats.misses}")
logger.info(f" Models cached: {cache_stats.in_cache}")
logger.info(f" Models cleared from cache: {cache_stats.cleared}")
hwm = cache_stats.high_watermark / GIG
tot = cache_stats.cache_size / GIG
logger.info(f" Cache RAM usage: {hwm:4.2f}/{tot:4.2f}G")
completed.add(graph_id) completed.add(graph_id)
for graph_id in completed: for graph_id in completed:
del self._stats[graph_id] del self._stats[graph_id]
del self._cache_stats[graph_id]

View File

@ -22,6 +22,7 @@ from invokeai.backend.model_management import (
ModelNotFoundException, ModelNotFoundException,
) )
from invokeai.backend.model_management.model_search import FindModels from invokeai.backend.model_management.model_search import FindModels
from invokeai.backend.model_management.model_cache import CacheStats
import torch import torch
from invokeai.app.models.exceptions import CanceledException from invokeai.app.models.exceptions import CanceledException
@ -276,6 +277,13 @@ class ModelManagerServiceBase(ABC):
""" """
pass pass
@abstractmethod
def collect_cache_stats(self, cache_stats: CacheStats):
"""
Reset model cache statistics for graph with graph_id.
"""
pass
@abstractmethod @abstractmethod
def commit(self, conf_file: Optional[Path] = None) -> None: def commit(self, conf_file: Optional[Path] = None) -> None:
""" """
@ -500,6 +508,12 @@ class ModelManagerService(ModelManagerServiceBase):
self.logger.debug(f"convert model {model_name}") self.logger.debug(f"convert model {model_name}")
return self.mgr.convert_model(model_name, base_model, model_type, convert_dest_directory) return self.mgr.convert_model(model_name, base_model, model_type, convert_dest_directory)
def collect_cache_stats(self, cache_stats: CacheStats):
"""
Reset model cache statistics for graph with graph_id.
"""
self.mgr.cache.stats = cache_stats
def commit(self, conf_file: Optional[Path] = None): def commit(self, conf_file: Optional[Path] = None):
""" """
Write current configuration out to the indicated file. Write current configuration out to the indicated file.

View File

@ -86,7 +86,9 @@ class DefaultInvocationProcessor(InvocationProcessorABC):
# Invoke # Invoke
try: try:
with statistics.collect_stats(invocation, graph_execution_state.id): graph_id = graph_execution_state.id
model_manager = self.__invoker.services.model_manager
with statistics.collect_stats(invocation, graph_id, model_manager):
outputs = invocation.invoke( outputs = invocation.invoke(
InvocationContext( InvocationContext(
services=self.__invoker.services, services=self.__invoker.services,

View File

@ -21,12 +21,12 @@ import os
import sys import sys
import hashlib import hashlib
from contextlib import suppress from contextlib import suppress
from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Dict, Union, types, Optional, Type, Any from typing import Dict, Union, types, Optional, Type, Any
import torch import torch
import logging
import invokeai.backend.util.logging as logger import invokeai.backend.util.logging as logger
from .models import BaseModelType, ModelType, SubModelType, ModelBase from .models import BaseModelType, ModelType, SubModelType, ModelBase
@ -41,6 +41,16 @@ DEFAULT_MAX_VRAM_CACHE_SIZE = 2.75
GIG = 1073741824 GIG = 1073741824
@dataclass
class CacheStats(object):
hits: int = 0
misses: int = 0
high_watermark: int = 0
in_cache: int = 0
cleared: int = 0
cache_size: int = 0
class ModelLocker(object): class ModelLocker(object):
"Forward declaration" "Forward declaration"
pass pass
@ -115,6 +125,9 @@ class ModelCache(object):
self.sha_chunksize = sha_chunksize self.sha_chunksize = sha_chunksize
self.logger = logger self.logger = logger
# used for stats collection
self.stats = None
self._cached_models = dict() self._cached_models = dict()
self._cache_stack = list() self._cache_stack = list()
@ -188,6 +201,8 @@ class ModelCache(object):
self.logger.info( self.logger.info(
f"Loading model {model_path}, type {base_model.value}:{model_type.value}{':'+submodel.value if submodel else ''}" f"Loading model {model_path}, type {base_model.value}:{model_type.value}{':'+submodel.value if submodel else ''}"
) )
if self.stats:
self.stats.misses += 1
# this will remove older cached models until # this will remove older cached models until
# there is sufficient room to load the requested model # there is sufficient room to load the requested model
@ -201,6 +216,14 @@ class ModelCache(object):
cache_entry = _CacheRecord(self, model, mem_used) cache_entry = _CacheRecord(self, model, mem_used)
self._cached_models[key] = cache_entry self._cached_models[key] = cache_entry
else:
if self.stats:
self.stats.hits += 1
self.stats.cache_size = self.max_cache_size * GIG
if self.stats:
self.stats.high_watermark = max(self.stats.high_watermark, self._cache_size())
self.stats.in_cache = len(self._cached_models)
with suppress(Exception): with suppress(Exception):
self._cache_stack.remove(key) self._cache_stack.remove(key)
@ -280,14 +303,14 @@ class ModelCache(object):
""" """
Given the HF repo id or path to a model on disk, returns a unique Given the HF repo id or path to a model on disk, returns a unique
hash. Works for legacy checkpoint files, HF models on disk, and HF repo IDs hash. Works for legacy checkpoint files, HF models on disk, and HF repo IDs
:param model_path: Path to model file/directory on disk. :param model_path: Path to model file/directory on disk.
""" """
return self._local_model_hash(model_path) return self._local_model_hash(model_path)
def cache_size(self) -> float: def cache_size(self) -> float:
"Return the current size of the cache, in GB" """Return the current size of the cache, in GB."""
current_cache_size = sum([m.size for m in self._cached_models.values()]) return self._cache_size() / GIG
return current_cache_size / GIG
def _has_cuda(self) -> bool: def _has_cuda(self) -> bool:
return self.execution_device.type == "cuda" return self.execution_device.type == "cuda"
@ -310,12 +333,15 @@ class ModelCache(object):
f"Current VRAM/RAM usage: {vram}/{ram}; cached_models/loaded_models/locked_models/ = {cached_models}/{loaded_models}/{locked_models}" f"Current VRAM/RAM usage: {vram}/{ram}; cached_models/loaded_models/locked_models/ = {cached_models}/{loaded_models}/{locked_models}"
) )
def _cache_size(self) -> int:
return sum([m.size for m in self._cached_models.values()])
def _make_cache_room(self, model_size): def _make_cache_room(self, model_size):
# calculate how much memory this model will require # calculate how much memory this model will require
# multiplier = 2 if self.precision==torch.float32 else 1 # multiplier = 2 if self.precision==torch.float32 else 1
bytes_needed = model_size bytes_needed = model_size
maximum_size = self.max_cache_size * GIG # stored in GB, convert to bytes maximum_size = self.max_cache_size * GIG # stored in GB, convert to bytes
current_size = sum([m.size for m in self._cached_models.values()]) current_size = self._cache_size()
if current_size + bytes_needed > maximum_size: if current_size + bytes_needed > maximum_size:
self.logger.debug( self.logger.debug(
@ -364,6 +390,8 @@ class ModelCache(object):
f"Unloading model {model_key} to free {(model_size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)" f"Unloading model {model_key} to free {(model_size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
) )
current_size -= cache_entry.size current_size -= cache_entry.size
if self.stats:
self.stats.cleared += 1
del self._cache_stack[pos] del self._cache_stack[pos]
del self._cached_models[model_key] del self._cached_models[model_key]
del cache_entry del cache_entry