make model manager v2 ready for PR review

- Replace legacy model manager service with the v2 manager.

- Update invocations to use new load interface.

- Fixed many but not all type checking errors in the invocations. Most
  were unrelated to model manager

- Updated routes. All the new routes live under the route tag
  `model_manager_v2`. To avoid confusion with the old routes,
  they have the URL prefix `/api/v2/models`. The old routes
  have been de-registered.

- Added a pytest for the loader.

- Updated documentation in contributing/MODEL_MANAGER.md
This commit is contained in:
Lincoln Stein
2024-02-10 18:09:45 -05:00
committed by psychedelicious
parent 2b1dc74080
commit 94e8d1b6d5
36 changed files with 680 additions and 435 deletions

View File

@ -43,8 +43,10 @@ class InvocationStatsService(InvocationStatsServiceBase):
@contextmanager
def collect_stats(self, invocation: BaseInvocation, graph_execution_state_id: str) -> Iterator[None]:
# This is to handle case of the model manager not being initialized, which happens
# during some tests.
services = self._invoker.services
if services.model_records is None or services.model_records.loader is None:
if services.model_manager is None or services.model_manager.load is None:
yield None
if not self._stats.get(graph_execution_state_id):
# First time we're seeing this graph_execution_state_id.
@ -60,9 +62,8 @@ class InvocationStatsService(InvocationStatsServiceBase):
if torch.cuda.is_available():
torch.cuda.reset_peak_memory_stats()
# TO DO [LS]: clean up loader service - shouldn't be an attribute of model records
assert services.model_records.loader is not None
services.model_records.loader.ram_cache.stats = self._cache_stats[graph_execution_state_id]
assert services.model_manager.load is not None
services.model_manager.load.ram_cache.stats = self._cache_stats[graph_execution_state_id]
try:
# Let the invocation run.