From fd7b84241988a57d7b9dae24798cc0bb475dd92c Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 1 Aug 2023 17:44:09 -0400 Subject: [PATCH 1/6] add execution stat reporting after each invocation --- invokeai/app/services/invocation_stats.py | 115 ++++++++++++++++++++++ invokeai/app/services/processor.py | 43 ++++---- 2 files changed, 139 insertions(+), 19 deletions(-) create mode 100644 invokeai/app/services/invocation_stats.py diff --git a/invokeai/app/services/invocation_stats.py b/invokeai/app/services/invocation_stats.py new file mode 100644 index 0000000000..8d41b60d49 --- /dev/null +++ b/invokeai/app/services/invocation_stats.py @@ -0,0 +1,115 @@ +# Copyright 2023 Lincoln D. Stein +"""Utility to collect execution time and GPU usage stats on invocations in flight""" + +""" +Usage: +statistics = InvocationStats() # keep track of performance metrics +... +with statistics.collect_stats(invocation, graph_execution_state): + outputs = invocation.invoke( + InvocationContext( + services=self.__invoker.services, + graph_execution_state_id=graph_execution_state.id, + ) + ) +... +statistics.log_stats() + +Typical output: +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> Node Calls Seconds +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> main_model_loader 1 0.006s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> clip_skip 1 0.005s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> compel 2 0.351s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> rand_int 1 0.001s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> range_of_size 1 0.001s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> iterate 1 0.001s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> metadata_accumulator 1 0.002s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> noise 1 0.002s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> t2l 1 3.117s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> l2i 1 0.377s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> TOTAL: 3.865s +[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> Max VRAM used for execution: 3.12G. +[2023-08-01 17:34:44,586]::[InvokeAI]::INFO --> Current VRAM utilization 2.31G. +""" + +import time +from typing import Dict, List + +import torch + +from .graph import GraphExecutionState +from .invocation_queue import InvocationQueueItem +from ..invocations.baseinvocation import BaseInvocation + +import invokeai.backend.util.logging as logger + +class InvocationStats(): + """Accumulate performance information about a running graph. Collects time spent in each node, + as well as the maximum and current VRAM utilisation for CUDA systems""" + + def __init__(self): + self._stats: Dict[str, int] = {} + + class StatsContext(): + def __init__(self, invocation: BaseInvocation, collector): + self.invocation = invocation + self.collector = collector + self.start_time = 0 + + def __enter__(self): + self.start_time = time.time() + + def __exit__(self, *args): + self.collector.log_time(self.invocation.type, time.time() - self.start_time) + + def collect_stats(self, + invocation: BaseInvocation, + graph_execution_state: GraphExecutionState, + ) -> StatsContext: + """ + Return a context object that will capture the statistics. + :param invocation: BaseInvocation object from the current graph. + :param graph_execution_state: GraphExecutionState object from the current session. + """ + if len(graph_execution_state.executed)==0: # new graph is starting + self.reset_stats() + self._current_graph_state = graph_execution_state + sc = self.StatsContext(invocation, self) + return self.StatsContext(invocation, self) + + def reset_stats(self): + """Zero the statistics. Ordinarily called internally.""" + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats() + self._stats: Dict[str, List[int, float]] = {} + + + def log_time(self, invocation_type: str, time_used: float): + """ + Add timing information on execution of a node. Usually + used internally. + :param invocation_type: String literal type of the node + :param time_used: Floating point seconds used by node's exection + """ + if not self._stats.get(invocation_type): + self._stats[invocation_type] = [0, 0.0] + self._stats[invocation_type][0] += 1 + self._stats[invocation_type][1] += time_used + + def log_stats(self): + """ + Send the statistics to the system logger at the info level. + Stats will only be printed if when the execution of the graph + is complete. + """ + if self._current_graph_state.is_complete(): + logger.info('Node Calls Seconds') + for node_type, (calls, time_used) in self._stats.items(): + logger.info(f'{node_type:<20} {calls:>5} {time_used:4.3f}s') + + total_time = sum([ticks for _,ticks in self._stats.values()]) + logger.info(f'TOTAL: {total_time:4.3f}s') + if torch.cuda.is_available(): + logger.info('Max VRAM used for execution: '+'%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9)) + logger.info('Current VRAM utilization '+'%4.2fG' % (torch.cuda.memory_allocated() / 1e9)) + diff --git a/invokeai/app/services/processor.py b/invokeai/app/services/processor.py index 50fe217e05..a43e2878ac 100644 --- a/invokeai/app/services/processor.py +++ b/invokeai/app/services/processor.py @@ -5,6 +5,7 @@ from threading import Event, Thread, BoundedSemaphore from ..invocations.baseinvocation import InvocationContext from .invocation_queue import InvocationQueueItem from .invoker import InvocationProcessorABC, Invoker +from .invocation_stats import InvocationStats from ..models.exceptions import CanceledException import invokeai.backend.util.logging as logger @@ -35,6 +36,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC): def __process(self, stop_event: Event): try: self.__threadLimit.acquire() + statistics = InvocationStats() # keep track of performance metrics while not stop_event.is_set(): try: queue_item: InvocationQueueItem = self.__invoker.services.queue.get() @@ -83,30 +85,32 @@ class DefaultInvocationProcessor(InvocationProcessorABC): # Invoke try: - outputs = invocation.invoke( - InvocationContext( - services=self.__invoker.services, - graph_execution_state_id=graph_execution_state.id, + with statistics.collect_stats(invocation, graph_execution_state): + outputs = invocation.invoke( + InvocationContext( + services=self.__invoker.services, + graph_execution_state_id=graph_execution_state.id, + ) ) - ) - # Check queue to see if this is canceled, and skip if so - if self.__invoker.services.queue.is_canceled(graph_execution_state.id): - continue + # Check queue to see if this is canceled, and skip if so + if self.__invoker.services.queue.is_canceled(graph_execution_state.id): + continue - # Save outputs and history - graph_execution_state.complete(invocation.id, outputs) + # Save outputs and history + graph_execution_state.complete(invocation.id, outputs) - # Save the state changes - self.__invoker.services.graph_execution_manager.set(graph_execution_state) + # Save the state changes + self.__invoker.services.graph_execution_manager.set(graph_execution_state) - # Send complete event - self.__invoker.services.events.emit_invocation_complete( - graph_execution_state_id=graph_execution_state.id, - node=invocation.dict(), - source_node_id=source_node_id, - result=outputs.dict(), - ) + # Send complete event + self.__invoker.services.events.emit_invocation_complete( + graph_execution_state_id=graph_execution_state.id, + node=invocation.dict(), + source_node_id=source_node_id, + result=outputs.dict(), + ) + statistics.log_stats() except KeyboardInterrupt: pass @@ -161,3 +165,4 @@ class DefaultInvocationProcessor(InvocationProcessorABC): pass # Log something? KeyboardInterrupt is probably not going to be seen by the processor finally: self.__threadLimit.release() + From 8a4e5f73aa6e914494885438a560a7d6694b6ce2 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 1 Aug 2023 19:39:42 -0400 Subject: [PATCH 2/6] reset stats on exception --- invokeai/app/services/invocation_stats.py | 38 +++++++++++------------ invokeai/app/services/processor.py | 4 +-- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/invokeai/app/services/invocation_stats.py b/invokeai/app/services/invocation_stats.py index 8d41b60d49..24a5662647 100644 --- a/invokeai/app/services/invocation_stats.py +++ b/invokeai/app/services/invocation_stats.py @@ -43,14 +43,15 @@ from ..invocations.baseinvocation import BaseInvocation import invokeai.backend.util.logging as logger -class InvocationStats(): + +class InvocationStats: """Accumulate performance information about a running graph. Collects time spent in each node, as well as the maximum and current VRAM utilisation for CUDA systems""" def __init__(self): self._stats: Dict[str, int] = {} - - class StatsContext(): + + class StatsContext: def __init__(self, invocation: BaseInvocation, collector): self.invocation = invocation self.collector = collector @@ -61,17 +62,18 @@ class InvocationStats(): def __exit__(self, *args): self.collector.log_time(self.invocation.type, time.time() - self.start_time) - - def collect_stats(self, - invocation: BaseInvocation, - graph_execution_state: GraphExecutionState, - ) -> StatsContext: + + def collect_stats( + self, + invocation: BaseInvocation, + graph_execution_state: GraphExecutionState, + ) -> StatsContext: """ Return a context object that will capture the statistics. :param invocation: BaseInvocation object from the current graph. :param graph_execution_state: GraphExecutionState object from the current session. """ - if len(graph_execution_state.executed)==0: # new graph is starting + if len(graph_execution_state.executed) == 0: # new graph is starting self.reset_stats() self._current_graph_state = graph_execution_state sc = self.StatsContext(invocation, self) @@ -83,7 +85,6 @@ class InvocationStats(): torch.cuda.reset_peak_memory_stats() self._stats: Dict[str, List[int, float]] = {} - def log_time(self, invocation_type: str, time_used: float): """ Add timing information on execution of a node. Usually @@ -95,7 +96,7 @@ class InvocationStats(): self._stats[invocation_type] = [0, 0.0] self._stats[invocation_type][0] += 1 self._stats[invocation_type][1] += time_used - + def log_stats(self): """ Send the statistics to the system logger at the info level. @@ -103,13 +104,12 @@ class InvocationStats(): is complete. """ if self._current_graph_state.is_complete(): - logger.info('Node Calls Seconds') + logger.info("Node Calls Seconds") for node_type, (calls, time_used) in self._stats.items(): - logger.info(f'{node_type:<20} {calls:>5} {time_used:4.3f}s') - - total_time = sum([ticks for _,ticks in self._stats.values()]) - logger.info(f'TOTAL: {total_time:4.3f}s') + logger.info(f"{node_type:<20} {calls:>5} {time_used:4.3f}s") + + total_time = sum([ticks for _, ticks in self._stats.values()]) + logger.info(f"TOTAL: {total_time:4.3f}s") if torch.cuda.is_available(): - logger.info('Max VRAM used for execution: '+'%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9)) - logger.info('Current VRAM utilization '+'%4.2fG' % (torch.cuda.memory_allocated() / 1e9)) - + logger.info("Max VRAM used for execution: " + "%4.2fG" % (torch.cuda.max_memory_allocated() / 1e9)) + logger.info("Current VRAM utilization " + "%4.2fG" % (torch.cuda.memory_allocated() / 1e9)) diff --git a/invokeai/app/services/processor.py b/invokeai/app/services/processor.py index a43e2878ac..e9511aa283 100644 --- a/invokeai/app/services/processor.py +++ b/invokeai/app/services/processor.py @@ -116,6 +116,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC): pass except CanceledException: + statistics.reset_stats() pass except Exception as e: @@ -137,7 +138,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC): error_type=e.__class__.__name__, error=error, ) - + statistics.reset_stats() pass # Check queue to see if this is canceled, and skip if so @@ -165,4 +166,3 @@ class DefaultInvocationProcessor(InvocationProcessorABC): pass # Log something? KeyboardInterrupt is probably not going to be seen by the processor finally: self.__threadLimit.release() - From 8fc75a71ee20dd96ceb59f9a4a904125d71e8ad6 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 2 Aug 2023 18:10:52 -0400 Subject: [PATCH 3/6] integrate correctly into app API and add features - Create abstract base class InvocationStatsServiceBase - Store InvocationStatsService in the InvocationServices object - Collect and report stats on simultaneous graph execution independently for each graph id - Track VRAM usage for each node - Handle cancellations and other exceptions gracefully --- invokeai/app/api/dependencies.py | 3 +- invokeai/app/services/invocation_services.py | 3 + invokeai/app/services/invocation_stats.py | 222 ++++++++++++++----- invokeai/app/services/processor.py | 23 +- 4 files changed, 182 insertions(+), 69 deletions(-) diff --git a/invokeai/app/api/dependencies.py b/invokeai/app/api/dependencies.py index d609ce3be2..b25009c8c9 100644 --- a/invokeai/app/api/dependencies.py +++ b/invokeai/app/api/dependencies.py @@ -2,7 +2,6 @@ from typing import Optional from logging import Logger -import os from invokeai.app.services.board_image_record_storage import ( SqliteBoardImageRecordStorage, ) @@ -30,6 +29,7 @@ from ..services.invoker import Invoker from ..services.processor import DefaultInvocationProcessor from ..services.sqlite import SqliteItemStorage from ..services.model_manager_service import ModelManagerService +from ..services.invocation_stats import InvocationStatsService from .events import FastAPIEventService @@ -128,6 +128,7 @@ class ApiDependencies: graph_execution_manager=graph_execution_manager, processor=DefaultInvocationProcessor(), configuration=config, + performance_statistics=InvocationStatsService(graph_execution_manager), logger=logger, ) diff --git a/invokeai/app/services/invocation_services.py b/invokeai/app/services/invocation_services.py index 8af17c7643..d7d9aae024 100644 --- a/invokeai/app/services/invocation_services.py +++ b/invokeai/app/services/invocation_services.py @@ -32,6 +32,7 @@ class InvocationServices: logger: "Logger" model_manager: "ModelManagerServiceBase" processor: "InvocationProcessorABC" + performance_statistics: "InvocationStatsServiceBase" queue: "InvocationQueueABC" def __init__( @@ -47,6 +48,7 @@ class InvocationServices: logger: "Logger", model_manager: "ModelManagerServiceBase", processor: "InvocationProcessorABC", + performance_statistics: "InvocationStatsServiceBase", queue: "InvocationQueueABC", ): self.board_images = board_images @@ -61,4 +63,5 @@ class InvocationServices: self.logger = logger self.model_manager = model_manager self.processor = processor + self.performance_statistics = performance_statistics self.queue = queue diff --git a/invokeai/app/services/invocation_stats.py b/invokeai/app/services/invocation_stats.py index 24a5662647..aca1dba550 100644 --- a/invokeai/app/services/invocation_stats.py +++ b/invokeai/app/services/invocation_stats.py @@ -3,99 +3,196 @@ """ Usage: -statistics = InvocationStats() # keep track of performance metrics -... -with statistics.collect_stats(invocation, graph_execution_state): - outputs = invocation.invoke( - InvocationContext( - services=self.__invoker.services, - graph_execution_state_id=graph_execution_state.id, - ) - ) -... + +statistics = InvocationStatsService(graph_execution_manager) +with statistics.collect_stats(invocation, graph_execution_state.id): + ... execute graphs... statistics.log_stats() Typical output: -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> Node Calls Seconds -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> main_model_loader 1 0.006s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> clip_skip 1 0.005s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> compel 2 0.351s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> rand_int 1 0.001s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> range_of_size 1 0.001s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> iterate 1 0.001s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> metadata_accumulator 1 0.002s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> noise 1 0.002s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> t2l 1 3.117s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> l2i 1 0.377s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> TOTAL: 3.865s -[2023-08-01 17:34:44,585]::[InvokeAI]::INFO --> Max VRAM used for execution: 3.12G. -[2023-08-01 17:34:44,586]::[InvokeAI]::INFO --> Current VRAM utilization 2.31G. +[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> Graph stats: c7764585-9c68-4d9d-a199-55e8186790f3 +[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> Node Calls Seconds VRAM Used +[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> main_model_loader 1 0.005s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> clip_skip 1 0.004s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> compel 2 0.512s 0.26G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> rand_int 1 0.001s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> range_of_size 1 0.001s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> iterate 1 0.001s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> metadata_accumulator 1 0.002s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> noise 1 0.002s 0.01G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> t2l 1 3.541s 1.93G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> l2i 1 0.679s 0.58G +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> TOTAL GRAPH EXECUTION TIME: 4.749s +[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> Current VRAM utilization 0.01G + +The abstract base class for this class is InvocationStatsServiceBase. An implementing class which +writes to the system log is stored in InvocationServices.performance_statistics. """ import time -from typing import Dict, List +from abc import ABC, abstractmethod +from contextlib import AbstractContextManager +from dataclasses import dataclass, field +from typing import Dict import torch -from .graph import GraphExecutionState -from .invocation_queue import InvocationQueueItem -from ..invocations.baseinvocation import BaseInvocation - import invokeai.backend.util.logging as logger +from ..invocations.baseinvocation import BaseInvocation +from .graph import GraphExecutionState +from .item_storage import ItemStorageABC -class InvocationStats: + +class InvocationStatsServiceBase(ABC): + "Abstract base class for recording node memory/time performance statistics" + + @abstractmethod + def __init__(self, graph_execution_manager: ItemStorageABC["GraphExecutionState"]): + """ + Initialize the InvocationStatsService and reset counters to zero + :param graph_execution_manager: Graph execution manager for this session + """ + pass + + @abstractmethod + def collect_stats( + self, + invocation: BaseInvocation, + graph_execution_state_id: str, + ) -> AbstractContextManager: + """ + Return a context object that will capture the statistics on the execution + of invocaation. Use with: to place around the part of the code that executes the invocation. + :param invocation: BaseInvocation object from the current graph. + :param graph_execution_state: GraphExecutionState object from the current session. + """ + pass + + @abstractmethod + def reset_stats(self, graph_execution_state_id: str): + """ + Reset all statistics for the indicated graph + :param graph_execution_state_id + """ + pass + + @abstractmethod + def reset_all_stats(self): + """Zero all statistics""" + pass + + @abstractmethod + def update_invocation_stats( + self, + graph_id: str, + invocation_type: str, + time_used: float, + vram_used: float, + ): + """ + Add timing information on execution of a node. Usually + used internally. + :param graph_id: ID of the graph that is currently executing + :param invocation_type: String literal type of the node + :param time_used: Time used by node's exection (sec) + :param vram_used: Maximum VRAM used during exection (GB) + """ + pass + + @abstractmethod + def log_stats(self): + """ + Write out the accumulated statistics to the log or somewhere else. + """ + pass + + +@dataclass +class NodeStats: + """Class for tracking execution stats of an invocation node""" + + calls: int = 0 + time_used: float = 0.0 # seconds + max_vram: float = 0.0 # GB + + +@dataclass +class NodeLog: + """Class for tracking node usage""" + + # {node_type => NodeStats} + nodes: Dict[str, NodeStats] = field(default_factory=dict) + + +class InvocationStatsService(InvocationStatsServiceBase): """Accumulate performance information about a running graph. Collects time spent in each node, as well as the maximum and current VRAM utilisation for CUDA systems""" - def __init__(self): - self._stats: Dict[str, int] = {} + def __init__(self, graph_execution_manager: ItemStorageABC["GraphExecutionState"]): + self.graph_execution_manager = graph_execution_manager + # {graph_id => NodeLog} + self._stats: Dict[str, NodeLog] = {} class StatsContext: - def __init__(self, invocation: BaseInvocation, collector): + def __init__(self, invocation: BaseInvocation, graph_id: str, collector: "InvocationStatsServiceBase"): self.invocation = invocation self.collector = collector + self.graph_id = graph_id self.start_time = 0 def __enter__(self): self.start_time = time.time() + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats() def __exit__(self, *args): - self.collector.log_time(self.invocation.type, time.time() - self.start_time) + self.collector.update_invocation_stats( + self.graph_id, + self.invocation.type, + time.time() - self.start_time, + torch.cuda.max_memory_allocated() / 1e9 if torch.cuda.is_available() else 0.0, + ) def collect_stats( self, invocation: BaseInvocation, - graph_execution_state: GraphExecutionState, + graph_execution_state_id: str, ) -> StatsContext: """ Return a context object that will capture the statistics. :param invocation: BaseInvocation object from the current graph. :param graph_execution_state: GraphExecutionState object from the current session. """ - if len(graph_execution_state.executed) == 0: # new graph is starting - self.reset_stats() - self._current_graph_state = graph_execution_state - sc = self.StatsContext(invocation, self) - return self.StatsContext(invocation, self) + if not self._stats.get(graph_execution_state_id): # first time we're seeing this + self._stats[graph_execution_state_id] = NodeLog() + return self.StatsContext(invocation, graph_execution_state_id, self) - def reset_stats(self): - """Zero the statistics. Ordinarily called internally.""" - if torch.cuda.is_available(): - torch.cuda.reset_peak_memory_stats() - self._stats: Dict[str, List[int, float]] = {} + def reset_all_stats(self): + """Zero all statistics""" + self._stats = {} - def log_time(self, invocation_type: str, time_used: float): + def reset_stats(self, graph_execution_id: str): + """Zero the statistics for the indicated graph.""" + try: + self._stats.pop(graph_execution_id) + except KeyError: + logger.warning(f"Attempted to clear statistics for unknown graph {graph_execution_id}") + + def update_invocation_stats(self, graph_id: str, invocation_type: str, time_used: float, vram_used: float): """ Add timing information on execution of a node. Usually used internally. + :param graph_id: ID of the graph that is currently executing :param invocation_type: String literal type of the node :param time_used: Floating point seconds used by node's exection """ - if not self._stats.get(invocation_type): - self._stats[invocation_type] = [0, 0.0] - self._stats[invocation_type][0] += 1 - self._stats[invocation_type][1] += time_used + if not self._stats[graph_id].nodes.get(invocation_type): + self._stats[graph_id].nodes[invocation_type] = NodeStats() + stats = self._stats[graph_id].nodes[invocation_type] + stats.calls += 1 + stats.time_used += time_used + stats.max_vram = max(stats.max_vram, vram_used) def log_stats(self): """ @@ -103,13 +200,24 @@ class InvocationStats: Stats will only be printed if when the execution of the graph is complete. """ - if self._current_graph_state.is_complete(): - logger.info("Node Calls Seconds") - for node_type, (calls, time_used) in self._stats.items(): - logger.info(f"{node_type:<20} {calls:>5} {time_used:4.3f}s") + completed = set() + for graph_id, node_log in self._stats.items(): + current_graph_state = self.graph_execution_manager.get(graph_id) + if not current_graph_state.is_complete(): + continue - total_time = sum([ticks for _, ticks in self._stats.values()]) - logger.info(f"TOTAL: {total_time:4.3f}s") + total_time = 0 + logger.info(f"Graph stats: {graph_id}") + logger.info("Node Calls Seconds VRAM Used") + for node_type, stats in self._stats[graph_id].nodes.items(): + logger.info(f"{node_type:<20} {stats.calls:>5} {stats.time_used:4.3f}s {stats.max_vram:4.2f}G") + total_time += stats.time_used + + logger.info(f"TOTAL GRAPH EXECUTION TIME: {total_time:4.3f}s") if torch.cuda.is_available(): - logger.info("Max VRAM used for execution: " + "%4.2fG" % (torch.cuda.max_memory_allocated() / 1e9)) logger.info("Current VRAM utilization " + "%4.2fG" % (torch.cuda.memory_allocated() / 1e9)) + + completed.add(graph_id) + + for graph_id in completed: + del self._stats[graph_id] diff --git a/invokeai/app/services/processor.py b/invokeai/app/services/processor.py index e9511aa283..41170a304b 100644 --- a/invokeai/app/services/processor.py +++ b/invokeai/app/services/processor.py @@ -1,15 +1,15 @@ import time import traceback -from threading import Event, Thread, BoundedSemaphore - -from ..invocations.baseinvocation import InvocationContext -from .invocation_queue import InvocationQueueItem -from .invoker import InvocationProcessorABC, Invoker -from .invocation_stats import InvocationStats -from ..models.exceptions import CanceledException +from threading import BoundedSemaphore, Event, Thread import invokeai.backend.util.logging as logger +from ..invocations.baseinvocation import InvocationContext +from ..models.exceptions import CanceledException +from .invocation_queue import InvocationQueueItem +from .invocation_stats import InvocationStatsServiceBase +from .invoker import InvocationProcessorABC, Invoker + class DefaultInvocationProcessor(InvocationProcessorABC): __invoker_thread: Thread @@ -36,7 +36,8 @@ class DefaultInvocationProcessor(InvocationProcessorABC): def __process(self, stop_event: Event): try: self.__threadLimit.acquire() - statistics = InvocationStats() # keep track of performance metrics + statistics: InvocationStatsServiceBase = self.__invoker.services.performance_statistics + while not stop_event.is_set(): try: queue_item: InvocationQueueItem = self.__invoker.services.queue.get() @@ -85,7 +86,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC): # Invoke try: - with statistics.collect_stats(invocation, graph_execution_state): + with statistics.collect_stats(invocation, graph_execution_state.id): outputs = invocation.invoke( InvocationContext( services=self.__invoker.services, @@ -116,7 +117,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC): pass except CanceledException: - statistics.reset_stats() + statistics.reset_stats(graph_execution_state.id) pass except Exception as e: @@ -138,7 +139,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC): error_type=e.__class__.__name__, error=error, ) - statistics.reset_stats() + statistics.reset_stats(graph_execution_state.id) pass # Check queue to see if this is canceled, and skip if so From 3fc789a7eeb9efa6255e331ab7dda03097c451d6 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 2 Aug 2023 18:31:10 -0400 Subject: [PATCH 4/6] fix unit tests --- tests/nodes/test_graph_execution_state.py | 9 ++++++--- tests/nodes/test_invoker.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/nodes/test_graph_execution_state.py b/tests/nodes/test_graph_execution_state.py index e0ee120b54..248bc6fee1 100644 --- a/tests/nodes/test_graph_execution_state.py +++ b/tests/nodes/test_graph_execution_state.py @@ -16,6 +16,7 @@ from invokeai.app.invocations.baseinvocation import ( from invokeai.app.invocations.collections import RangeInvocation from invokeai.app.invocations.math import AddInvocation, MultiplyInvocation from invokeai.app.services.invocation_services import InvocationServices +from invokeai.app.services.invocation_stats import InvocationStatsService from invokeai.app.services.graph import ( Graph, CollectInvocation, @@ -41,6 +42,9 @@ def simple_graph(): @pytest.fixture def mock_services() -> InvocationServices: # NOTE: none of these are actually called by the test invocations + graph_execution_manager = SqliteItemStorage[GraphExecutionState]( + filename=sqlite_memory, table_name="graph_executions" + ) return InvocationServices( model_manager=None, # type: ignore events=TestEventService(), @@ -51,9 +55,8 @@ def mock_services() -> InvocationServices: board_images=None, # type: ignore queue=MemoryInvocationQueue(), graph_library=SqliteItemStorage[LibraryGraph](filename=sqlite_memory, table_name="graphs"), - graph_execution_manager=SqliteItemStorage[GraphExecutionState]( - filename=sqlite_memory, table_name="graph_executions" - ), + graph_execution_manager=graph_execution_manager, + performance_statistics=InvocationStatsService(graph_execution_manager), processor=DefaultInvocationProcessor(), configuration=None, # type: ignore ) diff --git a/tests/nodes/test_invoker.py b/tests/nodes/test_invoker.py index 8eba6d468f..5985c7e8bb 100644 --- a/tests/nodes/test_invoker.py +++ b/tests/nodes/test_invoker.py @@ -11,6 +11,7 @@ from invokeai.app.services.processor import DefaultInvocationProcessor from invokeai.app.services.sqlite import SqliteItemStorage, sqlite_memory from invokeai.app.services.invoker import Invoker from invokeai.app.services.invocation_services import InvocationServices +from invokeai.app.services.invocation_stats import InvocationStatsService from invokeai.app.services.graph import ( Graph, GraphExecutionState, @@ -34,6 +35,9 @@ def simple_graph(): @pytest.fixture def mock_services() -> InvocationServices: # NOTE: none of these are actually called by the test invocations + graph_execution_manager = SqliteItemStorage[GraphExecutionState]( + filename=sqlite_memory, table_name="graph_executions" + ) return InvocationServices( model_manager=None, # type: ignore events=TestEventService(), @@ -44,10 +48,9 @@ def mock_services() -> InvocationServices: board_images=None, # type: ignore queue=MemoryInvocationQueue(), graph_library=SqliteItemStorage[LibraryGraph](filename=sqlite_memory, table_name="graphs"), - graph_execution_manager=SqliteItemStorage[GraphExecutionState]( - filename=sqlite_memory, table_name="graph_executions" - ), + graph_execution_manager=graph_execution_manager, processor=DefaultInvocationProcessor(), + performance_statistics=InvocationStatsService(graph_execution_manager), configuration=None, # type: ignore ) From 921ccad04d2b903617cd4dd6d15c4f7fa8fe9c66 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 2 Aug 2023 18:41:43 -0400 Subject: [PATCH 5/6] added stats service to the cli_app startup --- invokeai/app/cli_app.py | 2 ++ scripts/dream.py | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/invokeai/app/cli_app.py b/invokeai/app/cli_app.py index bad95bb559..4558c9219f 100644 --- a/invokeai/app/cli_app.py +++ b/invokeai/app/cli_app.py @@ -37,6 +37,7 @@ from invokeai.app.services.image_record_storage import SqliteImageRecordStorage from invokeai.app.services.images import ImageService, ImageServiceDependencies from invokeai.app.services.resource_name import SimpleNameService from invokeai.app.services.urls import LocalUrlService +from invokeai.app.services.invocation_stats import InvocationStatsService from .services.default_graphs import default_text_to_image_graph_id, create_system_graphs from .services.latent_storage import DiskLatentsStorage, ForwardCacheLatentsStorage @@ -311,6 +312,7 @@ def invoke_cli(): graph_library=SqliteItemStorage[LibraryGraph](filename=db_location, table_name="graphs"), graph_execution_manager=graph_execution_manager, processor=DefaultInvocationProcessor(), + performance_statistics=InvocationStatsService(graph_execution_manager), logger=logger, configuration=config, ) diff --git a/scripts/dream.py b/scripts/dream.py index 12176db41e..d2735046a4 100755 --- a/scripts/dream.py +++ b/scripts/dream.py @@ -2,10 +2,12 @@ # Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein) import warnings -from invokeai.frontend.CLI import invokeai_command_line_interface as main warnings.warn( "dream.py is being deprecated, please run invoke.py for the " "new UI/API or legacy_api.py for the old API", DeprecationWarning, ) -main() + +from invokeai.app.cli_app import invoke_cli + +invoke_cli() From d2bddf7f9161d8c18dda60ebebeedc35a0dd1a78 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 3 Aug 2023 08:47:56 -0400 Subject: [PATCH 6/6] tweak formatting to accommodate longer runtimes --- invokeai/app/services/invocation_stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/invokeai/app/services/invocation_stats.py b/invokeai/app/services/invocation_stats.py index aca1dba550..50320a6611 100644 --- a/invokeai/app/services/invocation_stats.py +++ b/invokeai/app/services/invocation_stats.py @@ -208,12 +208,12 @@ class InvocationStatsService(InvocationStatsServiceBase): total_time = 0 logger.info(f"Graph stats: {graph_id}") - logger.info("Node Calls Seconds VRAM Used") + logger.info("Node Calls Seconds VRAM Used") for node_type, stats in self._stats[graph_id].nodes.items(): - logger.info(f"{node_type:<20} {stats.calls:>5} {stats.time_used:4.3f}s {stats.max_vram:4.2f}G") + logger.info(f"{node_type:<20} {stats.calls:>5} {stats.time_used:7.3f}s {stats.max_vram:4.2f}G") total_time += stats.time_used - logger.info(f"TOTAL GRAPH EXECUTION TIME: {total_time:4.3f}s") + logger.info(f"TOTAL GRAPH EXECUTION TIME: {total_time:7.3f}s") if torch.cuda.is_available(): logger.info("Current VRAM utilization " + "%4.2fG" % (torch.cuda.memory_allocated() / 1e9))