diff --git a/invokeai/app/api/dependencies.py b/invokeai/app/api/dependencies.py index 995d08106a..f492da90f3 100644 --- a/invokeai/app/api/dependencies.py +++ b/invokeai/app/api/dependencies.py @@ -5,7 +5,6 @@ from logging import Logger import torch import invokeai.backend.util.devices # horrible hack - from invokeai.app.services.object_serializer.object_serializer_disk import ObjectSerializerDisk from invokeai.app.services.object_serializer.object_serializer_forward_cache import ObjectSerializerForwardCache from invokeai.app.services.shared.sqlite.sqlite_util import init_db @@ -104,7 +103,7 @@ class ApiDependencies: ) # horrible hack - remove invokeai.backend.util.devices.RAM_CACHE = model_manager.load.ram_cache - + names = SimpleNameService() session_processor = DefaultSessionProcessor() session_queue = SqliteSessionQueue(db=db) diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py index 0d5024a9c5..158f11a58e 100644 --- a/invokeai/app/invocations/compel.py +++ b/invokeai/app/invocations/compel.py @@ -68,16 +68,19 @@ class CompelInvocation(BaseInvocation): tokenizer_model = tokenizer_info.model assert isinstance(tokenizer_model, CLIPTokenizer) text_encoder_info = context.models.load(self.clip.text_encoder) + text_encoder_model = text_encoder_info.model + assert isinstance(text_encoder_model, CLIPTextModel) def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]: for lora in self.clip.loras: lora_info = context.models.load(lora.lora) assert isinstance(lora_info.model, LoRAModelRaw) - with lora_info as model: - yield (model, lora.weight) + yield (lora_info.model, lora.weight) del lora_info return + # loras = [(context.models.get(**lora.dict(exclude={"weight"})).context.model, lora.weight) for lora in self.clip.loras] + ti_list = generate_ti_list(self.prompt, text_encoder_info.config.base, context) with ( @@ -136,7 +139,8 @@ class SDXLPromptInvocationBase: tokenizer_model = tokenizer_info.model assert isinstance(tokenizer_model, CLIPTokenizer) text_encoder_info = context.models.load(clip_field.text_encoder) - assert isinstance(text_encoder_info.model, (CLIPTextModel, CLIPTextModelWithProjection)) + text_encoder_model = text_encoder_info.model + assert isinstance(text_encoder_model, (CLIPTextModel, CLIPTextModelWithProjection)) # return zero on empty if prompt == "" and zero_on_empty: @@ -195,11 +199,11 @@ class SDXLPromptInvocationBase: requires_pooled=get_pooled, ) - conjunction = Compel.parse_prompt_string(prompt) + conjunction = Compel.parse_prompt_string(prompt) - if context.config.get().log_tokenization: - # TODO: better logging for and syntax - log_tokenization_for_conjunction(conjunction, tokenizer) + if context.config.get().log_tokenization: + # TODO: better logging for and syntax + log_tokenization_for_conjunction(conjunction, tokenizer) # TODO: ask for optimizations? to not run text_encoder twice c, _options = compel.build_conditioning_tensor_for_conjunction(conjunction) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 1bdceff9da..a8ead96f3a 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -4,7 +4,7 @@ import math from contextlib import ExitStack from functools import singledispatchmethod from typing import Any, Iterator, List, Literal, Optional, Tuple, Union -import threading + import einops import numpy as np import numpy.typing as npt @@ -525,11 +525,6 @@ class DenoiseLatentsInvocation(BaseInvocation): guidance_scale=self.cfg_scale, guidance_rescale_multiplier=self.cfg_rescale_multiplier, ) - - if conditioning_data.unconditioned_embeddings.embeds.device != conditioning_data.text_embeddings.embeds.device: - print(f'DEBUG; ERROR uc={conditioning_data.unconditioned_embeddings.embeds.device} c={conditioning_data.text_embeddings.embeds.device} unet={unet.device}, tid={threading.current_thread().ident}') - - return conditioning_data def create_pipeline( @@ -899,6 +894,7 @@ class DenoiseLatentsInvocation(BaseInvocation): mask = mask.to(device=unet.device, dtype=unet.dtype) if masked_latents is not None: masked_latents = masked_latents.to(device=unet.device, dtype=unet.dtype) + scheduler = get_scheduler( context=context, scheduler_info=self.unet.scheduler, diff --git a/invokeai/app/services/config/config_default.py b/invokeai/app/services/config/config_default.py index 908a0de1d8..39dc1fe83b 100644 --- a/invokeai/app/services/config/config_default.py +++ b/invokeai/app/services/config/config_default.py @@ -31,7 +31,7 @@ ATTENTION_TYPE = Literal["auto", "normal", "xformers", "sliced", "torch-sdp"] ATTENTION_SLICE_SIZE = Literal["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8] LOG_FORMAT = Literal["plain", "color", "syslog", "legacy"] LOG_LEVEL = Literal["debug", "info", "warning", "error", "critical"] -CONFIG_SCHEMA_VERSION = "4.0.1" +CONFIG_SCHEMA_VERSION = "4.0.2" def get_default_ram_cache_size() -> float: @@ -101,9 +101,9 @@ class InvokeAIAppConfig(BaseSettings): ram: Maximum memory amount used by memory model cache for rapid switching (GB). convert_cache: Maximum size of on-disk converted models cache (GB). log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour. - device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.
Valid values: `auto`, `cpu`, `cuda:0`, `cuda:1`, `cuda:2`, `cuda:3`, `cuda:4`, `cuda:5`, `cuda:6`, `cuda:7`, `cuda:8`, `mps` - devices: List of execution devices to use in a multi-GPU environment; will override default device selected. - precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.
Valid values: `auto`, `float16`, `bfloat16`, `float32` + device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.
Valid values: `auto`, `cpu`, `cuda:0`, `cuda:1`, `cuda:2`, `cuda:3`, `cuda:4`, `cuda:5`, `cuda:6`, `cuda:7`, `mps` + devices: List of execution devices; will override default device selected. + precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.
Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast` sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements. attention_type: Attention type.
Valid values: `auto`, `normal`, `xformers`, `sliced`, `torch-sdp` attention_slice_size: Slice size, valid when attention_type=="sliced".
Valid values: `auto`, `balanced`, `max`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8` @@ -366,9 +366,9 @@ def migrate_v3_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig: # `max_cache_size` was renamed to `ram` some time in v3, but both names were used if k == "max_cache_size" and "ram" not in category_dict: parsed_config_dict["ram"] = v - # `max_vram_cache_size` was renamed to `vram` some time in v3, but both names were used - if k == "max_vram_cache_size" and "vram" not in category_dict: - parsed_config_dict["vram"] = v + # vram was removed in v4.0.2 + if k in ["vram", "max_vram_cache_size", "lazy_offload"]: + continue # autocast was removed in v4.0.1 if k == "precision" and v == "autocast": parsed_config_dict["precision"] = "auto" @@ -416,6 +416,25 @@ def migrate_v4_0_0_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig return config +def migrate_v4_0_1_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig: + """Migrate v4.0.1 config dictionary to a current config object. + + Args: + config_dict: A dictionary of settings from a v4.0.1 config file. + + Returns: + An instance of `InvokeAIAppConfig` with the migrated settings. + """ + parsed_config_dict: dict[str, Any] = {} + for k, v in config_dict.items(): + if k not in ["vram", "lazy_offload"]: + parsed_config_dict[k] = v + if k == "schema_version": + parsed_config_dict[k] = CONFIG_SCHEMA_VERSION + config = DefaultInvokeAIAppConfig.model_validate(parsed_config_dict) + return config + + def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig: """Load and migrate a config file to the latest version. @@ -447,6 +466,10 @@ def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig: loaded_config_dict = migrate_v4_0_0_config_dict(loaded_config_dict) loaded_config_dict.write_file(config_path) + elif loaded_config_dict["schema_version"] == "4.0.1": + loaded_config_dict = migrate_v4_0_1_config_dict(loaded_config_dict) + loaded_config_dict.write_file(config_path) + # Attempt to load as a v4 config file try: # Meta is not included in the model fields, so we need to validate it separately diff --git a/invokeai/app/services/model_manager/model_manager_default.py b/invokeai/app/services/model_manager/model_manager_default.py index 4d595835d1..241259c803 100644 --- a/invokeai/app/services/model_manager/model_manager_default.py +++ b/invokeai/app/services/model_manager/model_manager_default.py @@ -1,14 +1,11 @@ # Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team """Implementation of ModelManagerServiceBase.""" -from typing import Optional - import torch from typing_extensions import Self from invokeai.app.services.invoker import Invoker from invokeai.backend.model_manager.load import ModelCache, ModelConvertCache, ModelLoaderRegistry -from invokeai.backend.util.devices import TorchDevice from invokeai.backend.util.logging import InvokeAILogger from ..config import InvokeAIAppConfig @@ -89,8 +86,6 @@ class ModelManagerService(ModelManagerServiceBase): max_cache_size=app_config.ram, logger=logger, execution_devices=execution_devices, - max_vram_cache_size=app_config.vram, - lazy_offloading=app_config.lazy_offload, ) convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache) loader = ModelLoadService( diff --git a/invokeai/app/services/object_serializer/object_serializer_forward_cache.py b/invokeai/app/services/object_serializer/object_serializer_forward_cache.py index 7d04d47d5c..bf16bfe242 100644 --- a/invokeai/app/services/object_serializer/object_serializer_forward_cache.py +++ b/invokeai/app/services/object_serializer/object_serializer_forward_cache.py @@ -1,6 +1,6 @@ +import threading from queue import Queue from typing import TYPE_CHECKING, Optional, TypeVar -import threading from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase diff --git a/invokeai/app/services/session_processor/session_processor_default.py b/invokeai/app/services/session_processor/session_processor_default.py index d65f5ba86c..eb00caba5b 100644 --- a/invokeai/app/services/session_processor/session_processor_default.py +++ b/invokeai/app/services/session_processor/session_processor_default.py @@ -187,8 +187,7 @@ class DefaultSessionProcessor(SessionProcessorBase): profiler.start(profile_id=session.session_id) # reserve a GPU for this session - may block - with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device() as gpu: - + with self._invoker.services.model_manager.load.ram_cache.reserve_execution_device(): # Prepare invocations and take the first with self._process_lock: invocation = session.session.next() diff --git a/invokeai/app/services/shared/invocation_context.py b/invokeai/app/services/shared/invocation_context.py index 9994d663e5..c2c37de78d 100644 --- a/invokeai/app/services/shared/invocation_context.py +++ b/invokeai/app/services/shared/invocation_context.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from pathlib import Path from typing import TYPE_CHECKING, Optional, Union +import torch from PIL.Image import Image from torch import Tensor @@ -15,15 +16,24 @@ from invokeai.app.services.images.images_common import ImageDTO from invokeai.app.services.invocation_services import InvocationServices from invokeai.app.services.model_records.model_records_base import UnknownModelException from invokeai.app.util.step_callback import stable_diffusion_step_callback -from invokeai.backend.model_manager.config import AnyModelConfig, BaseModelType, ModelFormat, ModelType, SubModelType +from invokeai.backend.model_manager.config import ( + AnyModel, + AnyModelConfig, + BaseModelType, + ModelFormat, + ModelType, + SubModelType, +) from invokeai.backend.model_manager.load.load_base import LoadedModel from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData +from invokeai.backend.util.devices import TorchDevice if TYPE_CHECKING: from invokeai.app.invocations.baseinvocation import BaseInvocation from invokeai.app.invocations.model import ModelIdentifierField from invokeai.app.services.session_queue.session_queue_common import SessionQueueItem + from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase """ The InvocationContext provides access to various services and data about the current invocation. @@ -473,6 +483,28 @@ class UtilInterface(InvocationContextInterface): is_canceled=self.is_canceled, ) + def torch_device(self) -> torch.device: + """ + Return a torch device to use in the current invocation. + + Returns: + A torch.device not currently in use by the system. + """ + ram_cache: "ModelCacheBase[AnyModel]" = self._services.model_manager.load.ram_cache + return ram_cache.get_execution_device() + + def torch_dtype(self, device: Optional[torch.device] = None) -> torch.dtype: + """ + Return a precision type to use with the current invocation and torch device. + + Args: + device: Optional device. + + Returns: + A torch.dtype suited for the current device. + """ + return TorchDevice.choose_torch_dtype(device) + class InvocationContext: """Provides access to various services and data for the current invocation. diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py index 469d51e4e5..c86ec5ddda 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_base.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_base.py @@ -106,7 +106,7 @@ class ModelCacheBase(ABC, Generic[T]): Return an execution device that has been reserved for current thread. Note that reservations are done using the current thread's TID. - It would be better to do this using the session ID, but that involves + It might be better to do this using the session ID, but that involves too many detailed changes to model manager calls. May generate a ValueError if no GPU has been reserved. diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py index fa13b8c627..f7f466f2b0 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py @@ -127,7 +127,6 @@ class ModelCache(ModelCacheBase[AnyModel]): assigned = [x for x, tid in self._execution_devices.items() if current_thread == tid] if not assigned: raise ValueError("No GPU has been reserved for the use of thread {current_thread}") - print(f'DEBUG: TID={current_thread}; owns {assigned[0]}') return assigned[0] @contextmanager @@ -157,12 +156,15 @@ class ModelCache(ModelCacheBase[AnyModel]): device = free_device[0] # we are outside the lock region now - print(f'DEBUG: RESERVED {device} for TID {current_thread}') + self.logger.info("Reserved torch device {device} for execution thread {current_thread}") + + # Tell TorchDevice to use this object to get the torch device. + TorchDevice.set_model_cache(self) try: yield device finally: with self._device_lock: - print(f'DEBUG: RELEASED {device} for TID {current_thread}') + self.logger.info("Released torch device {device}") self._execution_devices[device] = 0 self._free_execution_device.release() torch.cuda.empty_cache() @@ -407,12 +409,11 @@ class ModelCache(ModelCacheBase[AnyModel]): if torch.cuda.is_available(): devices = {torch.device(f"cuda:{x}") for x in range(0, torch.cuda.device_count())} elif torch.backends.mps.is_available(): - devices = {torch.device('mps')} + devices = {torch.device("mps")} else: - devices = {torch.device('cpu')} + devices = {torch.device("cpu")} return devices @staticmethod def _device_name(device: torch.device) -> str: return f"{device.type}:{device.index}" - diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 548bbea3ce..bd60b0b8c7 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -399,11 +399,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): unet_attention_patcher = UNetAttentionPatcher(ip_adapters) attn_ctx = unet_attention_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model) - # NOTE error is not here! - if conditioning_data.unconditioned_embeddings.embeds.device != \ - conditioning_data.text_embeddings.embeds.device: - print('DEBUG; HERE IS THE ERROR 1') - with attn_ctx: if callback is not None: callback( @@ -418,10 +413,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): # print("timesteps:", timesteps) for i, t in enumerate(self.progress_bar(timesteps)): - if conditioning_data.unconditioned_embeddings.embeds.device != \ - conditioning_data.text_embeddings.embeds.device: - print('DEBUG; HERE IS THE ERROR 2') - batched_t = t.expand(batch_size) step_output = self.step( batched_t, @@ -466,7 +457,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): ip_adapter_data: Optional[list[IPAdapterData]] = None, t2i_adapter_data: Optional[list[T2IAdapterData]] = None, ): - # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value timestep = t[0] if additional_guidance is None: diff --git a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py index 137c84c14c..f418133e49 100644 --- a/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py +++ b/invokeai/backend/stable_diffusion/diffusion/shared_invokeai_diffusion.py @@ -4,7 +4,6 @@ import math from typing import Any, Callable, Optional, Union import torch -import threading from typing_extensions import TypeAlias from invokeai.app.services.config.config_default import get_config @@ -256,8 +255,6 @@ class InvokeAIDiffuserComponent: unconditioning, encoder_attention_mask = _pad_conditioning(unconditioning, max_len, encoder_attention_mask) conditioning, encoder_attention_mask = _pad_conditioning(conditioning, max_len, encoder_attention_mask) - if unconditioning.device != conditioning.device: - print(f'DEBUG: TID={threading.current_thread().ident}: Unconditioning device = {unconditioning.device}, conditioning device={conditioning.device}') return torch.cat([unconditioning, conditioning]), encoder_attention_mask # methods below are called from do_diffusion_step and should be considered private to this class. diff --git a/invokeai/backend/util/devices.py b/invokeai/backend/util/devices.py index c5a4def281..b8cdec2ac3 100644 --- a/invokeai/backend/util/devices.py +++ b/invokeai/backend/util/devices.py @@ -1,16 +1,21 @@ -from typing import Dict, Literal, Optional, Union +from typing import TYPE_CHECKING, Dict, Literal, Optional, Union import torch from deprecated import deprecated from invokeai.app.services.config.config_default import get_config +if TYPE_CHECKING: + from invokeai.backend.model_manager.config import AnyModel + from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase + # legacy APIs TorchPrecisionNames = Literal["float32", "float16", "bfloat16"] CPU_DEVICE = torch.device("cpu") CUDA_DEVICE = torch.device("cuda") MPS_DEVICE = torch.device("mps") + @deprecated("Use TorchDevice.choose_torch_dtype() instead.") # type: ignore def choose_precision(device: torch.device) -> TorchPrecisionNames: """Return the string representation of the recommended torch device.""" @@ -41,9 +46,18 @@ PRECISION_TO_NAME: Dict[torch.dtype, TorchPrecisionNames] = {v: k for k, v in NA class TorchDevice: """Abstraction layer for torch devices.""" + _model_cache: Optional["ModelCacheBase[AnyModel]"] = None + + @classmethod + def set_model_cache(cls, cache: "ModelCacheBase[AnyModel]"): + """Set the current model cache.""" + cls._model_cache = cache + @classmethod def choose_torch_device(cls) -> torch.device: """Return the torch.device to use for accelerated inference.""" + if cls._model_cache: + return cls._model_cache.get_execution_device() app_config = get_config() if app_config.device != "auto": device = torch.device(app_config.device)