fixup config_default; patch TorchDevice to work dynamically

This commit is contained in:
Lincoln Stein 2024-04-15 22:15:50 -04:00
parent 7dd93cb810
commit f7436f3bae
13 changed files with 102 additions and 52 deletions

View File

@ -5,7 +5,6 @@ from logging import Logger
import torch
import invokeai.backend.util.devices # horrible hack
from invokeai.app.services.object_serializer.object_serializer_disk import ObjectSerializerDisk
from invokeai.app.services.object_serializer.object_serializer_forward_cache import ObjectSerializerForwardCache
from invokeai.app.services.shared.sqlite.sqlite_util import init_db
@ -104,7 +103,7 @@ class ApiDependencies:
)
# horrible hack - remove
invokeai.backend.util.devices.RAM_CACHE = model_manager.load.ram_cache
names = SimpleNameService()
session_processor = DefaultSessionProcessor()
session_queue = SqliteSessionQueue(db=db)

View File

@ -68,16 +68,19 @@ class CompelInvocation(BaseInvocation):
tokenizer_model = tokenizer_info.model
assert isinstance(tokenizer_model, CLIPTokenizer)
text_encoder_info = context.models.load(self.clip.text_encoder)
text_encoder_model = text_encoder_info.model
assert isinstance(text_encoder_model, CLIPTextModel)
def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
for lora in self.clip.loras:
lora_info = context.models.load(lora.lora)
assert isinstance(lora_info.model, LoRAModelRaw)
with lora_info as model:
yield (model, lora.weight)
yield (lora_info.model, lora.weight)
del lora_info
return
# loras = [(context.models.get(**lora.dict(exclude={"weight"})).context.model, lora.weight) for lora in self.clip.loras]
ti_list = generate_ti_list(self.prompt, text_encoder_info.config.base, context)
with (
@ -136,7 +139,8 @@ class SDXLPromptInvocationBase:
tokenizer_model = tokenizer_info.model
assert isinstance(tokenizer_model, CLIPTokenizer)
text_encoder_info = context.models.load(clip_field.text_encoder)
assert isinstance(text_encoder_info.model, (CLIPTextModel, CLIPTextModelWithProjection))
text_encoder_model = text_encoder_info.model
assert isinstance(text_encoder_model, (CLIPTextModel, CLIPTextModelWithProjection))
# return zero on empty
if prompt == "" and zero_on_empty:
@ -195,11 +199,11 @@ class SDXLPromptInvocationBase:
requires_pooled=get_pooled,
)
conjunction = Compel.parse_prompt_string(prompt)
conjunction = Compel.parse_prompt_string(prompt)
if context.config.get().log_tokenization:
# TODO: better logging for and syntax
log_tokenization_for_conjunction(conjunction, tokenizer)
if context.config.get().log_tokenization:
# TODO: better logging for and syntax
log_tokenization_for_conjunction(conjunction, tokenizer)
# TODO: ask for optimizations? to not run text_encoder twice
c, _options = compel.build_conditioning_tensor_for_conjunction(conjunction)

View File

@ -4,7 +4,7 @@ import math
from contextlib import ExitStack
from functools import singledispatchmethod
from typing import Any, Iterator, List, Literal, Optional, Tuple, Union
import threading
import einops
import numpy as np
import numpy.typing as npt
@ -525,11 +525,6 @@ class DenoiseLatentsInvocation(BaseInvocation):
guidance_scale=self.cfg_scale,
guidance_rescale_multiplier=self.cfg_rescale_multiplier,
)
if conditioning_data.unconditioned_embeddings.embeds.device != conditioning_data.text_embeddings.embeds.device:
print(f'DEBUG; ERROR uc={conditioning_data.unconditioned_embeddings.embeds.device} c={conditioning_data.text_embeddings.embeds.device} unet={unet.device}, tid={threading.current_thread().ident}')
return conditioning_data
def create_pipeline(
@ -899,6 +894,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
mask = mask.to(device=unet.device, dtype=unet.dtype)
if masked_latents is not None:
masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,

View File

@ -31,7 +31,7 @@ ATTENTION_TYPE = Literal["auto", "normal", "xformers", "sliced", "torch-sdp"]
ATTENTION_SLICE_SIZE = Literal["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8]
LOG_FORMAT = Literal["plain", "color", "syslog", "legacy"]
LOG_LEVEL = Literal["debug", "info", "warning", "error", "critical"]
CONFIG_SCHEMA_VERSION = "4.0.1"
CONFIG_SCHEMA_VERSION = "4.0.2"
def get_default_ram_cache_size() -> float:
@ -101,9 +101,9 @@ class InvokeAIAppConfig(BaseSettings):
ram: Maximum memory amount used by memory model cache for rapid switching (GB).
convert_cache: Maximum size of on-disk converted models cache (GB).
log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda:0`, `cuda:1`, `cuda:2`, `cuda:3`, `cuda:4`, `cuda:5`, `cuda:6`, `cuda:7`, `cuda:8`, `mps`
devices: List of execution devices to use in a multi-GPU environment; will override default device selected.
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`
device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda:0`, `cuda:1`, `cuda:2`, `cuda:3`, `cuda:4`, `cuda:5`, `cuda:6`, `cuda:7`, `mps`
devices: List of execution devices; will override default device selected.
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast`
sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements.
attention_type: Attention type.<br>Valid values: `auto`, `normal`, `xformers`, `sliced`, `torch-sdp`
attention_slice_size: Slice size, valid when attention_type=="sliced".<br>Valid values: `auto`, `balanced`, `max`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`
@ -366,9 +366,9 @@ def migrate_v3_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
# `max_cache_size` was renamed to `ram` some time in v3, but both names were used
if k == "max_cache_size" and "ram" not in category_dict:
parsed_config_dict["ram"] = v
# `max_vram_cache_size` was renamed to `vram` some time in v3, but both names were used
if k == "max_vram_cache_size" and "vram" not in category_dict:
parsed_config_dict["vram"] = v
# vram was removed in v4.0.2
if k in ["vram", "max_vram_cache_size", "lazy_offload"]:
continue
# autocast was removed in v4.0.1
if k == "precision" and v == "autocast":
parsed_config_dict["precision"] = "auto"
@ -416,6 +416,25 @@ def migrate_v4_0_0_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig
return config
def migrate_v4_0_1_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
"""Migrate v4.0.1 config dictionary to a current config object.
Args:
config_dict: A dictionary of settings from a v4.0.1 config file.
Returns:
An instance of `InvokeAIAppConfig` with the migrated settings.
"""
parsed_config_dict: dict[str, Any] = {}
for k, v in config_dict.items():
if k not in ["vram", "lazy_offload"]:
parsed_config_dict[k] = v
if k == "schema_version":
parsed_config_dict[k] = CONFIG_SCHEMA_VERSION
config = DefaultInvokeAIAppConfig.model_validate(parsed_config_dict)
return config
def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
"""Load and migrate a config file to the latest version.
@ -447,6 +466,10 @@ def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
loaded_config_dict = migrate_v4_0_0_config_dict(loaded_config_dict)
loaded_config_dict.write_file(config_path)
elif loaded_config_dict["schema_version"] == "4.0.1":
loaded_config_dict = migrate_v4_0_1_config_dict(loaded_config_dict)
loaded_config_dict.write_file(config_path)
# Attempt to load as a v4 config file
try:
# Meta is not included in the model fields, so we need to validate it separately

View File

@ -1,14 +1,11 @@
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
"""Implementation of ModelManagerServiceBase."""
from typing import Optional
import torch
from typing_extensions import Self
from invokeai.app.services.invoker import Invoker
from invokeai.backend.model_manager.load import ModelCache, ModelConvertCache, ModelLoaderRegistry
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.logging import InvokeAILogger
from ..config import InvokeAIAppConfig
@ -89,8 +86,6 @@ class ModelManagerService(ModelManagerServiceBase):
max_cache_size=app_config.ram,
logger=logger,
execution_devices=execution_devices,
max_vram_cache_size=app_config.vram,
lazy_offloading=app_config.lazy_offload,
)
convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache)
loader = ModelLoadService(

View File

@ -1,6 +1,6 @@
import threading
from queue import Queue
from typing import TYPE_CHECKING, Optional, TypeVar
import threading
from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase

View File

@ -187,8 +187,7 @@ class DefaultSessionProcessor(SessionProcessorBase):
profiler.start(profile_id=session.session_id)
# reserve a GPU for this session - may block
with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device() as gpu:
with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device():
# Prepare invocations and take the first
with self._process_lock:
invocation = session.session.next()

View File

@ -3,6 +3,7 @@ from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Optional, Union
import torch
from PIL.Image import Image
from torch import Tensor
@ -15,15 +16,24 @@ from invokeai.app.services.images.images_common import ImageDTO
from invokeai.app.services.invocation_services import InvocationServices
from invokeai.app.services.model_records.model_records_base import UnknownModelException
from invokeai.app.util.step_callback import stable_diffusion_step_callback
from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType
from invokeai.backend.model_manager.config import (
AnyModel,
AnyModelConfig,
BaseModelType,
ModelFormat,
ModelType,
SubModelType,
)
from invokeai.backend.model_manager.load.load_base import LoadedModel
from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData
from invokeai.backend.util.devices import TorchDevice
if TYPE_CHECKING:
from invokeai.app.invocations.baseinvocation import BaseInvocation
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.services.session_queue.session_queue_common import SessionQueueItem
from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
"""
The InvocationContext provides access to various services and data about the current invocation.
@ -473,6 +483,28 @@ class UtilInterface(InvocationContextInterface):
is_canceled=self.is_canceled,
)
def torch_device(self) -> torch.device:
"""
Return a torch device to use in the current invocation.
Returns:
A torch.device not currently in use by the system.
"""
ram_cache: "ModelCacheBase[AnyModel]" = self._services.model_manager.load.ram_cache
return ram_cache.get_execution_device()
def torch_dtype(self, device: Optional[torch.device] = None) -> torch.dtype:
"""
Return a precision type to use with the current invocation and torch device.
Args:
device: Optional device.
Returns:
A torch.dtype suited for the current device.
"""
return TorchDevice.choose_torch_dtype(device)
class InvocationContext:
"""Provides access to various services and data for the current invocation.

View File

@ -106,7 +106,7 @@ class ModelCacheBase(ABC, Generic[T]):
Return an execution device that has been reserved for current thread.
Note that reservations are done using the current thread's TID.
It would be better to do this using the session ID, but that involves
It might be better to do this using the session ID, but that involves
too many detailed changes to model manager calls.
May generate a ValueError if no GPU has been reserved.

View File

@ -127,7 +127,6 @@ class ModelCache(ModelCacheBase[AnyModel]):
assigned = [x for x, tid in self._execution_devices.items() if current_thread == tid]
if not assigned:
raise ValueError("No GPU has been reserved for the use of thread {current_thread}")
print(f'DEBUG: TID={current_thread}; owns {assigned[0]}')
return assigned[0]
@contextmanager
@ -157,12 +156,15 @@ class ModelCache(ModelCacheBase[AnyModel]):
device = free_device[0]
# we are outside the lock region now
print(f'DEBUG: RESERVED {device} for TID {current_thread}')
self.logger.info("Reserved torch device {device} for execution thread {current_thread}")
# Tell TorchDevice to use this object to get the torch device.
TorchDevice.set_model_cache(self)
try:
yield device
finally:
with self._device_lock:
print(f'DEBUG: RELEASED {device} for TID {current_thread}')
self.logger.info("Released torch device {device}")
self._execution_devices[device] = 0
self._free_execution_device.release()
torch.cuda.empty_cache()
@ -407,12 +409,11 @@ class ModelCache(ModelCacheBase[AnyModel]):
if torch.cuda.is_available():
devices = {torch.device(f"cuda:{x}") for x in range(0, torch.cuda.device_count())}
elif torch.backends.mps.is_available():
devices = {torch.device('mps')}
devices = {torch.device("mps")}
else:
devices = {torch.device('cpu')}
devices = {torch.device("cpu")}
return devices
@staticmethod
def _device_name(device: torch.device) -> str:
return f"{device.type}:{device.index}"

View File

@ -399,11 +399,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
unet_attention_patcher = UNetAttentionPatcher(ip_adapters)
attn_ctx = unet_attention_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model)
# NOTE error is not here!
if conditioning_data.unconditioned_embeddings.embeds.device != \
conditioning_data.text_embeddings.embeds.device:
print('DEBUG; HERE IS THE ERROR 1')
with attn_ctx:
if callback is not None:
callback(
@ -418,10 +413,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# print("timesteps:", timesteps)
for i, t in enumerate(self.progress_bar(timesteps)):
if conditioning_data.unconditioned_embeddings.embeds.device != \
conditioning_data.text_embeddings.embeds.device:
print('DEBUG; HERE IS THE ERROR 2')
batched_t = t.expand(batch_size)
step_output = self.step(
batched_t,
@ -466,7 +457,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
ip_adapter_data: Optional[list[IPAdapterData]] = None,
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
):
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
timestep = t[0]
if additional_guidance is None:

View File

@ -4,7 +4,6 @@ import math
from typing import Any, Callable, Optional, Union
import torch
import threading
from typing_extensions import TypeAlias
from invokeai.app.services.config.config_default import get_config
@ -256,8 +255,6 @@ class InvokeAIDiffuserComponent:
unconditioning, encoder_attention_mask = _pad_conditioning(unconditioning, max_len, encoder_attention_mask)
conditioning, encoder_attention_mask = _pad_conditioning(conditioning, max_len, encoder_attention_mask)
if unconditioning.device != conditioning.device:
print(f'DEBUG: TID={threading.current_thread().ident}: Unconditioning device = {unconditioning.device}, conditioning device={conditioning.device}')
return torch.cat([unconditioning, conditioning]), encoder_attention_mask
# methods below are called from do_diffusion_step and should be considered private to this class.

View File

@ -1,16 +1,21 @@
from typing import Dict, Literal, Optional, Union
from typing import TYPE_CHECKING, Dict, Literal, Optional, Union
import torch
from deprecated import deprecated
from invokeai.app.services.config.config_default import get_config
if TYPE_CHECKING:
from invokeai.backend.model_manager.config import AnyModel
from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase
# legacy APIs
TorchPrecisionNames = Literal["float32", "float16", "bfloat16"]
CPU_DEVICE = torch.device("cpu")
CUDA_DEVICE = torch.device("cuda")
MPS_DEVICE = torch.device("mps")
@deprecated("Use TorchDevice.choose_torch_dtype() instead.") # type: ignore
def choose_precision(device: torch.device) -> TorchPrecisionNames:
"""Return the string representation of the recommended torch device."""
@ -41,9 +46,18 @@ PRECISION_TO_NAME: Dict[torch.dtype, TorchPrecisionNames] = {v: k for k, v in NA
class TorchDevice:
"""Abstraction layer for torch devices."""
_model_cache: Optional["ModelCacheBase[AnyModel]"] = None
@classmethod
def set_model_cache(cls, cache: "ModelCacheBase[AnyModel]"):
"""Set the current model cache."""
cls._model_cache = cache
@classmethod
def choose_torch_device(cls) -> torch.device:
"""Return the torch.device to use for accelerated inference."""
if cls._model_cache:
return cls._model_cache.get_execution_device()
app_config = get_config()
if app_config.device != "auto":
device = torch.device(app_config.device)