From b9e9087dbe04b728aad9f11d1c1aeca40228d297 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 14 May 2023 18:09:38 -0400 Subject: [PATCH] do not manage GPU for pipelines if sequential_offloading is True --- .../backend/model_management/model_cache.py | 52 +++++++++++++++---- .../backend/model_management/model_manager.py | 17 +++--- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py index aee9cbeb49..c214e9ea48 100644 --- a/invokeai/backend/model_management/model_cache.py +++ b/invokeai/backend/model_management/model_cache.py @@ -21,13 +21,14 @@ import gc import hashlib import warnings from collections import Counter +from contextlib import suppress from enum import Enum from pathlib import Path -from typing import Dict, Sequence, Union, Tuple, types, Optional +from typing import Dict, Sequence, Union, Set, Tuple, types, Optional import torch import safetensors.torch - + from diffusers import DiffusionPipeline, StableDiffusionPipeline, AutoencoderKL, SchedulerMixin, UNet2DConditionModel, ConfigMixin from diffusers import logging as diffusers_logging from diffusers.pipelines.stable_diffusion.safety_checker import \ @@ -87,6 +88,16 @@ MODEL_CLASSES = { SDModelType.TextualInversion: TIType, } +DIFFUSERS_PARTS = { + SDModelType.Vae, + SDModelType.TextEncoder, + SDModelType.Tokenizer, + SDModelType.UNet, + SDModelType.Scheduler, + SDModelType.SafetyChecker, + SDModelType.FeatureExtractor, +} + class ModelStatus(Enum): unknown='unknown' not_loaded='not loaded' @@ -169,7 +180,7 @@ class ModelCache(object): subfolder: Path = None, submodel: SDModelType = None, revision: str = None, - attach_model_part: Tuple[SDModelType, str] = (None, None), + attach_model_parts: Optional[Set[Tuple[SDModelType, str]]] = None, gpu_load: bool = True, ) -> ModelLocker: # ?? what does it return ''' @@ -213,15 +224,18 @@ class ModelCache(object): pipeline_context = cache.get_model( 'runwayml/stable-diffusion-v1-5', - attach_model_part=(SDModelType.Vae,'stabilityai/sd-vae-ft-mse') + attach_model_parts=set( + [SDModelType.Vae,'stabilityai/sd-vae-ft-mse'] + [SDModelType.UNet,'runwayml/stable-diffusion-1.5','unet'] #type, ID, subfolder ) + ) The model will be locked into GPU VRAM for the duration of the context. :param repo_id_or_path: either the HuggingFace repo_id or a Path to a local model :param model_type: An SDModelType enum indicating the type of the (parent) model :param subfolder: name of a subfolder in which the model can be found, e.g. "vae" :param submodel: an SDModelType enum indicating the model part to return, e.g. SDModelType.Vae - :param attach_model_part: load and attach a diffusers model component. Pass a tuple of format (SDModelType,repo_id) + :param attach_model_parts: load and attach a diffusers model component. Pass a set of tuple of format (SDModelType,repo_id_or_path,subfolder) :param revision: model revision :param gpu_load: load the model into GPU [default True] ''' @@ -274,8 +288,9 @@ class ModelCache(object): self.current_cache_size += mem_used # increment size of the cache # this is a bit of legacy work needed to support the old-style "load this diffuser with custom VAE" - if model_type == SDModelType.Diffusers and attach_model_part[0]: - self.attach_part(model, *attach_model_part) + if model_type == SDModelType.Diffusers and attach_model_parts: + for attach_model_part in attach_model_parts: + self.attach_part(model, *attach_model_part) self.stack.append(key) # add to LRU cache self.models[key] = model # keep copy of model in dict @@ -320,11 +335,12 @@ class ModelCache(object): if model.device != cache.execution_device: cache.logger.debug(f'Moving {key} into {cache.execution_device}') with VRAMUsage() as mem: - model.to(cache.execution_device) # move into GPU + self._to(model,cache.execution_device) + # model.to(cache.execution_device) # move into GPU + cache.logger.debug(f'GPU VRAM used for load: {(mem.vram_used/GIG):.2f} GB') cache.model_sizes[key] = mem.vram_used # more accurate size - cache.logger.debug(f'Locking {key} in {cache.execution_device}') cache._print_cuda_stats() else: @@ -332,7 +348,8 @@ class ModelCache(object): # move it into CPU if it is in GPU and not locked if hasattr(model, 'to') and (key in cache.loaded_models and cache.locked_models[key] == 0): - model.to(cache.storage_device) + self._go(model,cache.storage_device) + # model.to(cache.storage_device) cache.loaded_models.remove(key) return model @@ -347,6 +364,18 @@ class ModelCache(object): cache._offload_unlocked_models() cache._print_cuda_stats() + def _to(self, model, device): + # if set, sequential offload will take care of GPU management for diffusers + if self.cache.sequential_offload and isinstance(model, StableDiffusionGeneratorPipeline): + return + + self.cache.logger.debug(f'Moving {key} into {cache.execution_device}') + model.to(device) + if isinstance(model,MODEL_CLASSES[SDModelType.Diffusers]): + for part in DIFFUSERS_PARTS: + with suppress(Exception): + getattr(model,part).to(device) + def attach_part( self, diffusers_model: StableDiffusionPipeline, @@ -366,7 +395,8 @@ class ModelCache(object): model_type=part_type, subfolder=subfolder, ) - part.to(diffusers_model.device) + if hasattr(part,'to'): + part.to(diffusers_model.device) setattr(diffusers_model, part_type, part) self.logger.debug(f'Attached {part_type} {part_id}') diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py index 3c15bb1bfb..c45494386e 100644 --- a/invokeai/backend/model_management/model_manager.py +++ b/invokeai/backend/model_management/model_manager.py @@ -146,6 +146,7 @@ from typing import Callable, Optional, List, Tuple, Union, types import safetensors import safetensors.torch import torch +from diffusers import AutoencoderKL from huggingface_hub import scan_cache_dir from omegaconf import OmegaConf from omegaconf.dictconfig import DictConfig @@ -157,7 +158,7 @@ from invokeai.backend.util import download_with_resume from ..util import CUDA_DEVICE from .model_cache import (ModelCache, ModelLocker, ModelStatus, SDModelType, - SilenceWarnings) + SilenceWarnings, DIFFUSERS_PARTS) # We are only starting to number the config file with release 3. # The config file version doesn't have to start at release version, but it will help @@ -375,12 +376,14 @@ class ModelManager(object): # to support the traditional way of attaching a VAE # to a model, we hacked in `attach_model_part` # TODO: generalize this - vae = (None, None) + external_parts = set() if model_type == SDModelType.Diffusers: - with suppress(Exception): - vae_id = mconfig.vae.get('path') or mconfig.vae.get('repo_id') - vae_subfolder = mconfig.vae.get('subfolder') - vae = (SDModelType.Vae, vae_id, vae_subfolder) + for part in DIFFUSERS_PARTS: + with suppress(Exception): + if part_config := mconfig.get(part): + id = part_config.get('path') or part_config.get('repo_id') + subfolder = part_config.get('subfolder') + external_parts.add((part, id, subfolder)) model_context = self.cache.get_model( location, @@ -388,7 +391,7 @@ class ModelManager(object): revision = revision, subfolder = subfolder, submodel = submodel, - attach_model_part = vae, + attach_model_parts = external_parts, ) # in case we need to communicate information about this