InvokeAI/invokeai/backend/textual_inversion.py

"""Textual Inversion wrapper class."""

from pathlib import Path
from typing import Optional, Union

import torch
from compel.embeddings_provider import BaseTextualInversionManager
from safetensors.torch import load_file
from transformers import CLIPTokenizer
from typing_extensions import Self

from invokeai.backend.raw_model import RawModel


class TextualInversionModelRaw(RawModel):
    embedding: torch.Tensor  # [n, 768]|[n, 1280]
    embedding_2: Optional[torch.Tensor] = None  # [n, 768]|[n, 1280]   - for SDXL models

    @classmethod
    def from_checkpoint(
        cls,
        file_path: Union[str, Path],
        device: Optional[torch.device] = None,
        dtype: Optional[torch.dtype] = None,
    ) -> Self:
        if not isinstance(file_path, Path):
            file_path = Path(file_path)

        result = cls()  # TODO:

        if file_path.suffix == ".safetensors":
            state_dict = load_file(file_path.absolute().as_posix(), device="cpu")
        else:
            state_dict = torch.load(file_path, map_location="cpu")

        # both v1 and v2 format embeddings
        # difference mostly in metadata
        if "string_to_param" in state_dict:
            if len(state_dict["string_to_param"]) > 1:
                print(
                    f'Warn: Embedding "{file_path.name}" contains multiple tokens, which is not supported. The first',
                    " token will be used.",
                )

            result.embedding = next(iter(state_dict["string_to_param"].values()))

        # v3 (easynegative)
        elif "emb_params" in state_dict:
            result.embedding = state_dict["emb_params"]

        # v5(sdxl safetensors file)
        elif "clip_g" in state_dict and "clip_l" in state_dict:
            result.embedding = state_dict["clip_g"]
            result.embedding_2 = state_dict["clip_l"]

        # v4(diffusers bin files)
        else:
            result.embedding = next(iter(state_dict.values()))

            if len(result.embedding.shape) == 1:
                result.embedding = result.embedding.unsqueeze(0)

            if not isinstance(result.embedding, torch.Tensor):
                raise ValueError(f"Invalid embeddings file: {file_path.name}")

        return result

    def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
        if not torch.cuda.is_available():
            return
        for emb in [self.embedding, self.embedding_2]:
            if emb is not None:
                emb.to(device=device, dtype=dtype)

    def calc_size(self) -> int:
        """Get the size of this model in bytes."""
        embedding_size = self.embedding.element_size() * self.embedding.nelement()
        embedding_2_size = 0
        if self.embedding_2 is not None:
            embedding_2_size = self.embedding_2.element_size() * self.embedding_2.nelement()
        return embedding_size + embedding_2_size


class TextualInversionManager(BaseTextualInversionManager):
    """TextualInversionManager implements the BaseTextualInversionManager ABC from the compel library."""

    def __init__(self, tokenizer: CLIPTokenizer):
        self.pad_tokens: dict[int, list[int]] = {}
        self.tokenizer = tokenizer

    def expand_textual_inversion_token_ids_if_necessary(self, token_ids: list[int]) -> list[int]:
        """Given a list of tokens ids, expand any TI tokens to their corresponding pad tokens.

        For example, suppose we have a `<ti_dog>` TI with 4 vectors that was added to the tokenizer with the following
        mapping of tokens to token_ids:
        ```
        <ti_dog>: 49408
        <ti_dog-!pad-1>: 49409
        <ti_dog-!pad-2>: 49410
        <ti_dog-!pad-3>: 49411
        ```
        `self.pad_tokens` would be set to `{49408: [49408, 49409, 49410, 49411]}`.
        This function is responsible for expanding `49408` in the token_ids list to `[49408, 49409, 49410, 49411]`.
        """
        # Short circuit if there are no pad tokens to save a little time.
        if len(self.pad_tokens) == 0:
            return token_ids

        # This function assumes that compel has not included the BOS and EOS tokens in the token_ids list. We verify
        # this assumption here.
        if token_ids[0] == self.tokenizer.bos_token_id:
            raise ValueError("token_ids must not start with bos_token_id")
        if token_ids[-1] == self.tokenizer.eos_token_id:
            raise ValueError("token_ids must not end with eos_token_id")

        # Expand any TI tokens to their corresponding pad tokens.
        new_token_ids: list[int] = []
        for token_id in token_ids:
            new_token_ids.append(token_id)
            if token_id in self.pad_tokens:
                new_token_ids.extend(self.pad_tokens[token_id])

        # Do not exceed the max model input size. The -2 here is compensating for
        # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens.
        max_length = self.tokenizer.model_max_length - 2
        if len(new_token_ids) > max_length:
            # HACK: If TI token expansion causes us to exceed the max text encoder input length, we silently discard
            # tokens. Token expansion should happen in a way that is compatible with compel's default handling of long
            # prompts.
            new_token_ids = new_token_ids[0:max_length]

        return new_token_ids
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`"""Textual Inversion wrapper class."""`

			`from pathlib import Path`
Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`from typing import Optional, Union`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00
			`import torch`
			`from compel.embeddings_provider import BaseTextualInversionManager`
			`from safetensors.torch import load_file`
			`from transformers import CLIPTokenizer`
			`from typing_extensions import Self`
final tidying before marking PR as ready for review - Replace AnyModelLoader with ModelLoaderRegistry - Fix type check errors in multiple files - Remove apparently unneeded `get_model_config_enum()` method from model manager - Remove last vestiges of old model manager - Updated tests and documentation resolve conflict with seamless.py 2024-02-18 06:27:42 +00:00
Apply ruff rule to disallow all relative imports. 2024-07-03 16:20:35 +00:00			`from invokeai.backend.raw_model import RawModel`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00
final tidying before marking PR as ready for review - Replace AnyModelLoader with ModelLoaderRegistry - Fix type check errors in multiple files - Remove apparently unneeded `get_model_config_enum()` method from model manager - Remove last vestiges of old model manager - Updated tests and documentation resolve conflict with seamless.py 2024-02-18 06:27:42 +00:00
Tidy names and locations of modules - Rename old "model_management" directory to "model_management_OLD" in order to catch dangling references to original model manager. - Caught and fixed most dangling references (still checking) - Rename lora, textual_inversion and model_patcher modules - Introduce a RawModel base class to simplfy the Union returned by the model loaders. - Tidy up the model manager 2-related tests. Add useful fixtures, and a finalizer to the queue and installer fixtures that will stop the services and release threads. 2024-02-17 16:45:32 +00:00			`class TextualInversionModelRaw(RawModel):`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`embedding: torch.Tensor # [n, 768]\|[n, 1280]`
			`embedding_2: Optional[torch.Tensor] = None # [n, 768]\|[n, 1280] - for SDXL models`

			`@classmethod`
			`def from_checkpoint(`
			`cls,`
			`file_path: Union[str, Path],`
			`device: Optional[torch.device] = None,`
			`dtype: Optional[torch.dtype] = None,`
			`) -> Self:`
			`if not isinstance(file_path, Path):`
			`file_path = Path(file_path)`

			`result = cls() # TODO:`

			`if file_path.suffix == ".safetensors":`
			`state_dict = load_file(file_path.absolute().as_posix(), device="cpu")`
			`else:`
			`state_dict = torch.load(file_path, map_location="cpu")`

			`# both v1 and v2 format embeddings`
			`# difference mostly in metadata`
			`if "string_to_param" in state_dict:`
			`if len(state_dict["string_to_param"]) > 1:`
			`print(`
			`f'Warn: Embedding "{file_path.name}" contains multiple tokens, which is not supported. The first',`
			`" token will be used.",`
			`)`

			`result.embedding = next(iter(state_dict["string_to_param"].values()))`

			`# v3 (easynegative)`
			`elif "emb_params" in state_dict:`
			`result.embedding = state_dict["emb_params"]`

			`# v5(sdxl safetensors file)`
			`elif "clip_g" in state_dict and "clip_l" in state_dict:`
			`result.embedding = state_dict["clip_g"]`
			`result.embedding_2 = state_dict["clip_l"]`

			`# v4(diffusers bin files)`
			`else:`
			`result.embedding = next(iter(state_dict.values()))`

			`if len(result.embedding.shape) == 1:`
			`result.embedding = result.embedding.unsqueeze(0)`

			`if not isinstance(result.embedding, torch.Tensor):`
			`raise ValueError(f"Invalid embeddings file: {file_path.name}")`

			`return result`

fix(backend): revert non-blocking device transfer In #6490 we enabled non-blocking torch device transfers throughout the model manager's memory management code. When using this torch feature, torch attempts to wait until the tensor transfer has completed before allowing any access to the tensor. Theoretically, that should make this a safe feature to use. This provides a small performance improvement but causes race conditions in some situations. Specific platforms/systems are affected, and complicated data dependencies can make this unsafe. - Intermittent black images on MPS devices - reported on discord and #6545, fixed with special handling in #6549. - Intermittent OOMs and black images on a P4000 GPU on Windows - reported in #6613, fixed in this commit. On my system, I haven't experience any issues with generation, but targeted testing of non-blocking ops did expose a race condition when moving tensors from CUDA to CPU. One workaround is to use torch streams with manual sync points. Our application logic is complicated enough that this would be a lot of work and feels ripe for edge cases and missed spots. Much safer is to fully revert non-locking - which is what this change does. 2024-07-15 21:05:29 +00:00			`def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:`
Improve RAM<->VRAM memory copy performance in LoRA patching and elsewhere (#6490) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes requested during penultimate review * add non-blocking=True parameters to several torch.nn.Module.to() calls, for slight performance increases * fix ruff errors * prevent crash on non-cuda-enabled systems --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> 2024-06-13 17:10:03 +00:00			`if not torch.cuda.is_available():`
			`return`
			`for emb in [self.embedding, self.embedding_2]:`
			`if emb is not None:`
fix(backend): revert non-blocking device transfer In #6490 we enabled non-blocking torch device transfers throughout the model manager's memory management code. When using this torch feature, torch attempts to wait until the tensor transfer has completed before allowing any access to the tensor. Theoretically, that should make this a safe feature to use. This provides a small performance improvement but causes race conditions in some situations. Specific platforms/systems are affected, and complicated data dependencies can make this unsafe. - Intermittent black images on MPS devices - reported on discord and #6545, fixed with special handling in #6549. - Intermittent OOMs and black images on a P4000 GPU on Windows - reported in #6613, fixed in this commit. On my system, I haven't experience any issues with generation, but targeted testing of non-blocking ops did expose a race condition when moving tensors from CUDA to CPU. One workaround is to use torch streams with manual sync points. Our application logic is complicated enough that this would be a lot of work and feels ripe for edge cases and missed spots. Much safer is to fully revert non-locking - which is what this change does. 2024-07-15 21:05:29 +00:00			`emb.to(device=device, dtype=dtype)`
Improve RAM<->VRAM memory copy performance in LoRA patching and elsewhere (#6490) * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * allow model patcher to optimize away the unpatching step when feasible * remove lazy_offloading functionality * do not save original weights if there is a CPU copy of state dict * Update invokeai/backend/model_manager/load/load_base.py Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> * documentation fixes requested during penultimate review * add non-blocking=True parameters to several torch.nn.Module.to() calls, for slight performance increases * fix ruff errors * prevent crash on non-cuda-enabled systems --------- Co-authored-by: Lincoln Stein <lstein@gmail.com> Co-authored-by: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com> Co-authored-by: Ryan Dick <ryanjdick3@gmail.com> 2024-06-13 17:10:03 +00:00
Update calc_model_size_by_data(...) to handle all expected model types, and to log an error if an unexpected model type is received. 2024-07-03 01:14:12 +00:00			`def calc_size(self) -> int:`
			`"""Get the size of this model in bytes."""`
			`embedding_size = self.embedding.element_size() * self.embedding.nelement()`
			`embedding_2_size = 0`
			`if self.embedding_2 is not None:`
			`embedding_2_size = self.embedding_2.element_size() * self.embedding_2.nelement()`
			`return embedding_size + embedding_2_size`

BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00
Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`class TextualInversionManager(BaseTextualInversionManager):`
			`"""TextualInversionManager implements the BaseTextualInversionManager ABC from the compel library."""`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00
			`def __init__(self, tokenizer: CLIPTokenizer):`
Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`self.pad_tokens: dict[int, list[int]] = {}`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`self.tokenizer = tokenizer`

			`def expand_textual_inversion_token_ids_if_necessary(self, token_ids: list[int]) -> list[int]:`
Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`"""Given a list of tokens ids, expand any TI tokens to their corresponding pad tokens.`

			For example, suppose we have a `<ti_dog>` TI with 4 vectors that was added to the tokenizer with the following
			`mapping of tokens to token_ids:`
			```
			`<ti_dog>: 49408`
			`<ti_dog-!pad-1>: 49409`
			`<ti_dog-!pad-2>: 49410`
			`<ti_dog-!pad-3>: 49411`
			```
			`self.pad_tokens` would be set to `{49408: [49408, 49409, 49410, 49411]}`.
			This function is responsible for expanding `49408` in the token_ids list to `[49408, 49409, 49410, 49411]`.
			`"""`
			`# Short circuit if there are no pad tokens to save a little time.`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`if len(self.pad_tokens) == 0:`
			`return token_ids`

Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`# This function assumes that compel has not included the BOS and EOS tokens in the token_ids list. We verify`
			`# this assumption here.`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`if token_ids[0] == self.tokenizer.bos_token_id:`
			`raise ValueError("token_ids must not start with bos_token_id")`
			`if token_ids[-1] == self.tokenizer.eos_token_id:`
			`raise ValueError("token_ids must not end with eos_token_id")`

Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`# Expand any TI tokens to their corresponding pad tokens.`
			`new_token_ids: list[int] = []`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`for token_id in token_ids:`
			`new_token_ids.append(token_id)`
			`if token_id in self.pad_tokens:`
			`new_token_ids.extend(self.pad_tokens[token_id])`

Add docs to TextualInversionManager and improve types. No changes to functionality. 2024-05-27 14:32:49 +00:00			`# Do not exceed the max model input size. The -2 here is compensating for`
			`# compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens.`
Update TextualInversionManager for compatibility with the latest transformers release. See https://github.com/invoke-ai/InvokeAI/issues/6445. 2024-05-27 14:35:02 +00:00			`max_length = self.tokenizer.model_max_length - 2`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`if len(new_token_ids) > max_length:`
Add a callout about the hackiness of dropping tokens in the TextualInversionManager. 2024-05-27 14:53:12 +00:00			`# HACK: If TI token expansion causes us to exceed the max text encoder input length, we silently discard`
			`# tokens. Token expansion should happen in a way that is compatible with compel's default handling of long`
			`# prompts.`
BREAKING CHANGES: invocations now require model key, not base/type/name - Implement new model loader and modify invocations and embeddings - Finish implementation loaders for all models currently supported by InvokeAI. - Move lora, textual_inversion, and model patching support into backend/embeddings. - Restore support for model cache statistics collection (a little ugly, needs work). - Fixed up invocations that load and patch models. - Move seamless and silencewarnings utils into better location 2024-02-06 03:56:32 +00:00			`new_token_ids = new_token_ids[0:max_length]`

			`return new_token_ids`