From b9e9087dbe04b728aad9f11d1c1aeca40228d297 Mon Sep 17 00:00:00 2001
From: Lincoln Stein <lstein@gmail.com>
Date: Sun, 14 May 2023 18:09:38 -0400
Subject: [PATCH] do not manage GPU for pipelines if sequential_offloading is
 True

---
 .../backend/model_management/model_cache.py   | 52 +++++++++++++++----
 .../backend/model_management/model_manager.py | 17 +++---
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/invokeai/backend/model_management/model_cache.py b/invokeai/backend/model_management/model_cache.py
index aee9cbeb49..c214e9ea48 100644
--- a/invokeai/backend/model_management/model_cache.py
+++ b/invokeai/backend/model_management/model_cache.py
@@ -21,13 +21,14 @@ import gc
 import hashlib
 import warnings
 from collections import Counter
+from contextlib import suppress
 from enum import Enum
 from pathlib import Path
-from typing import Dict, Sequence, Union, Tuple, types, Optional
+from typing import Dict, Sequence, Union, Set, Tuple, types, Optional
 
 import torch
 import safetensors.torch
-    
+
 from diffusers import DiffusionPipeline, StableDiffusionPipeline, AutoencoderKL, SchedulerMixin, UNet2DConditionModel, ConfigMixin
 from diffusers import logging as diffusers_logging
 from diffusers.pipelines.stable_diffusion.safety_checker import \
@@ -87,6 +88,16 @@ MODEL_CLASSES = {
     SDModelType.TextualInversion: TIType,
 }
 
+DIFFUSERS_PARTS = {
+    SDModelType.Vae,
+    SDModelType.TextEncoder,
+    SDModelType.Tokenizer,
+    SDModelType.UNet,
+    SDModelType.Scheduler,
+    SDModelType.SafetyChecker,
+    SDModelType.FeatureExtractor,
+}
+
 class ModelStatus(Enum):
     unknown='unknown'
     not_loaded='not loaded'
@@ -169,7 +180,7 @@ class ModelCache(object):
         subfolder: Path = None,
         submodel: SDModelType = None,
         revision: str = None,
-        attach_model_part: Tuple[SDModelType, str] = (None, None),
+        attach_model_parts: Optional[Set[Tuple[SDModelType, str]]] = None,
         gpu_load: bool = True,
     ) -> ModelLocker:  # ?? what does it return
         '''
@@ -213,15 +224,18 @@ class ModelCache(object):
 
               pipeline_context = cache.get_model(
                                       'runwayml/stable-diffusion-v1-5',
-                                      attach_model_part=(SDModelType.Vae,'stabilityai/sd-vae-ft-mse')
+                                      attach_model_parts=set(
+                                              [SDModelType.Vae,'stabilityai/sd-vae-ft-mse']
+                                              [SDModelType.UNet,'runwayml/stable-diffusion-1.5','unet'] #type, ID, subfolder
                                       )
+                                  )
 
         The model will be locked into GPU VRAM for the duration of the context.
         :param repo_id_or_path: either the HuggingFace repo_id or a Path to a local model
         :param model_type: An SDModelType enum indicating the type of the (parent) model
         :param subfolder: name of a subfolder in which the model can be found, e.g. "vae"
         :param submodel: an SDModelType enum indicating the model part to return, e.g. SDModelType.Vae
-        :param attach_model_part: load and attach a diffusers model component. Pass a tuple of format (SDModelType,repo_id)
+        :param attach_model_parts: load and attach a diffusers model component. Pass a set of tuple of format (SDModelType,repo_id_or_path,subfolder)
         :param revision: model revision
         :param gpu_load: load the model into GPU [default True]
         '''
@@ -274,8 +288,9 @@ class ModelCache(object):
                 self.current_cache_size += mem_used   # increment size of the cache
 
             # this is a bit of legacy work needed to support the old-style "load this diffuser with custom VAE"
-            if model_type == SDModelType.Diffusers and attach_model_part[0]:
-                self.attach_part(model, *attach_model_part)
+            if model_type == SDModelType.Diffusers and attach_model_parts:
+                for attach_model_part in attach_model_parts:
+                    self.attach_part(model, *attach_model_part)
 
             self.stack.append(key)          # add to LRU cache
             self.models[key] = model          # keep copy of model in dict
@@ -320,11 +335,12 @@ class ModelCache(object):
                 if  model.device != cache.execution_device:
                     cache.logger.debug(f'Moving {key} into {cache.execution_device}')
                     with VRAMUsage() as mem:
-                        model.to(cache.execution_device)  # move into GPU
+                        self._to(model,cache.execution_device)
+                        # model.to(cache.execution_device)  # move into GPU
+
                     cache.logger.debug(f'GPU VRAM used for load: {(mem.vram_used/GIG):.2f} GB')
                     cache.model_sizes[key] = mem.vram_used   # more accurate size
                     
-                cache.logger.debug(f'Locking {key} in {cache.execution_device}')                
                 cache._print_cuda_stats()
                 
             else:
@@ -332,7 +348,8 @@ class ModelCache(object):
                 # move it into CPU if it is in GPU and not locked
                 if hasattr(model, 'to') and (key in cache.loaded_models
                                             and cache.locked_models[key] == 0):
-                    model.to(cache.storage_device)
+                    self._go(model,cache.storage_device)
+                    # model.to(cache.storage_device)
                     cache.loaded_models.remove(key)
             return model
 
@@ -347,6 +364,18 @@ class ModelCache(object):
                 cache._offload_unlocked_models()
                 cache._print_cuda_stats()
 
+        def _to(self, model, device):
+            # if set, sequential offload will take care of GPU management for diffusers
+            if self.cache.sequential_offload and isinstance(model, StableDiffusionGeneratorPipeline):
+                return
+
+            self.cache.logger.debug(f'Moving {key} into {cache.execution_device}')                
+            model.to(device)
+            if isinstance(model,MODEL_CLASSES[SDModelType.Diffusers]):
+                for part in DIFFUSERS_PARTS:
+                    with suppress(Exception):
+                        getattr(model,part).to(device)
+
     def attach_part(
         self,
         diffusers_model: StableDiffusionPipeline,
@@ -366,7 +395,8 @@ class ModelCache(object):
             model_type=part_type,
             subfolder=subfolder,
         )
-        part.to(diffusers_model.device)
+        if hasattr(part,'to'):
+            part.to(diffusers_model.device)
         setattr(diffusers_model, part_type, part)
         self.logger.debug(f'Attached {part_type} {part_id}')
 
diff --git a/invokeai/backend/model_management/model_manager.py b/invokeai/backend/model_management/model_manager.py
index 3c15bb1bfb..c45494386e 100644
--- a/invokeai/backend/model_management/model_manager.py
+++ b/invokeai/backend/model_management/model_manager.py
@@ -146,6 +146,7 @@ from typing import Callable, Optional, List, Tuple, Union, types
 import safetensors
 import safetensors.torch
 import torch
+from diffusers import AutoencoderKL
 from huggingface_hub import scan_cache_dir
 from omegaconf import OmegaConf
 from omegaconf.dictconfig import DictConfig
@@ -157,7 +158,7 @@ from invokeai.backend.util import download_with_resume
 
 from ..util import CUDA_DEVICE
 from .model_cache import (ModelCache, ModelLocker, ModelStatus, SDModelType,
-                          SilenceWarnings)
+                          SilenceWarnings, DIFFUSERS_PARTS)
 
 # We are only starting to number the config file with release 3.
 # The config file version doesn't have to start at release version, but it will help
@@ -375,12 +376,14 @@ class ModelManager(object):
         # to support the traditional way of attaching a VAE
         # to a model, we hacked in `attach_model_part`
         # TODO: generalize this
-        vae = (None, None)
+        external_parts = set()
         if model_type == SDModelType.Diffusers:
-            with suppress(Exception):
-                vae_id = mconfig.vae.get('path') or mconfig.vae.get('repo_id')
-                vae_subfolder = mconfig.vae.get('subfolder')
-                vae = (SDModelType.Vae, vae_id, vae_subfolder)
+            for part in DIFFUSERS_PARTS:
+                with suppress(Exception):
+                    if part_config := mconfig.get(part):
+                        id = part_config.get('path') or part_config.get('repo_id')
+                        subfolder = part_config.get('subfolder')
+                        external_parts.add((part, id, subfolder))
 
         model_context = self.cache.get_model(
             location,
@@ -388,7 +391,7 @@ class ModelManager(object):
             revision = revision,
             subfolder = subfolder,
             submodel = submodel,
-            attach_model_part = vae,
+            attach_model_parts = external_parts,
         )
 
         # in case we need to communicate information about this