mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
new OffloadingDevice loads one model at a time, on demand (#2596)
* new OffloadingDevice loads one model at a time, on demand * fixup! new OffloadingDevice loads one model at a time, on demand * fix(prompt_to_embeddings): call the text encoder directly instead of its forward method allowing any associated hooks to run with it. * more attempts to get things on the right device from the offloader * more attempts to get things on the right device from the offloader * make offloading methods an explicit part of the pipeline interface * inlining some calls where device is only used once * ensure model group is ready after pipeline.to is called * fixup! Strategize slicing based on free [V]RAM (#2572) * doc(offloading): docstrings for offloading.ModelGroup * doc(offloading): docstrings for offloading-related pipeline methods * refactor(offloading): s/SimpleModelGroup/FullyLoadedModelGroup * refactor(offloading): s/HotSeatModelGroup/LazilyLoadedModelGroup to frame it is the same terms as "FullyLoadedModelGroup" --------- Co-authored-by: Damian Stewart <null@damianstewart.com>
This commit is contained in:
@ -25,8 +25,6 @@ import torch
|
||||
import transformers
|
||||
from diffusers import AutoencoderKL
|
||||
from diffusers import logging as dlogging
|
||||
from diffusers.utils.logging import (get_verbosity, set_verbosity,
|
||||
set_verbosity_error)
|
||||
from huggingface_hub import scan_cache_dir
|
||||
from omegaconf import OmegaConf
|
||||
from omegaconf.dictconfig import DictConfig
|
||||
@ -49,9 +47,10 @@ class ModelManager(object):
|
||||
def __init__(
|
||||
self,
|
||||
config: OmegaConf,
|
||||
device_type: str = "cpu",
|
||||
device_type: str | torch.device = "cpu",
|
||||
precision: str = "float16",
|
||||
max_loaded_models=DEFAULT_MAX_MODELS,
|
||||
sequential_offload = False
|
||||
):
|
||||
"""
|
||||
Initialize with the path to the models.yaml config file,
|
||||
@ -69,6 +68,7 @@ class ModelManager(object):
|
||||
self.models = {}
|
||||
self.stack = [] # this is an LRU FIFO
|
||||
self.current_model = None
|
||||
self.sequential_offload = sequential_offload
|
||||
|
||||
def valid_model(self, model_name: str) -> bool:
|
||||
"""
|
||||
@ -529,7 +529,10 @@ class ModelManager(object):
|
||||
dlogging.set_verbosity(verbosity)
|
||||
assert pipeline is not None, OSError(f'"{name_or_path}" could not be loaded')
|
||||
|
||||
pipeline.to(self.device)
|
||||
if self.sequential_offload:
|
||||
pipeline.enable_offload_submodels(self.device)
|
||||
else:
|
||||
pipeline.to(self.device)
|
||||
|
||||
model_hash = self._diffuser_sha256(name_or_path)
|
||||
|
||||
@ -748,7 +751,6 @@ class ModelManager(object):
|
||||
into models.yaml.
|
||||
"""
|
||||
new_config = None
|
||||
import transformers
|
||||
|
||||
from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser
|
||||
|
||||
@ -995,12 +997,12 @@ class ModelManager(object):
|
||||
if self.device == "cpu":
|
||||
return model
|
||||
|
||||
# diffusers really really doesn't like us moving a float16 model onto CPU
|
||||
verbosity = get_verbosity()
|
||||
set_verbosity_error()
|
||||
if isinstance(model, StableDiffusionGeneratorPipeline):
|
||||
model.offload_all()
|
||||
return model
|
||||
|
||||
model.cond_stage_model.device = "cpu"
|
||||
model.to("cpu")
|
||||
set_verbosity(verbosity)
|
||||
|
||||
for submodel in ("first_stage_model", "cond_stage_model", "model"):
|
||||
try:
|
||||
@ -1013,6 +1015,10 @@ class ModelManager(object):
|
||||
if self.device == "cpu":
|
||||
return model
|
||||
|
||||
if isinstance(model, StableDiffusionGeneratorPipeline):
|
||||
model.ready()
|
||||
return model
|
||||
|
||||
model.to(self.device)
|
||||
model.cond_stage_model.device = self.device
|
||||
|
||||
@ -1163,7 +1169,7 @@ class ModelManager(object):
|
||||
strategy.execute()
|
||||
|
||||
@staticmethod
|
||||
def _abs_path(path: Union(str, Path)) -> Path:
|
||||
def _abs_path(path: str | Path) -> Path:
|
||||
if path is None or Path(path).is_absolute():
|
||||
return path
|
||||
return Path(Globals.root, path).resolve()
|
||||
|
Reference in New Issue
Block a user