From 994c61b67afb0ef515b89133ffa7a6d9f108e33d Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 May 2024 10:32:49 -0400 Subject: [PATCH 1/3] Add docs to TextualInversionManager and improve types. No changes to functionality. --- invokeai/backend/textual_inversion.py | 34 +++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py index f7390979bb..368736617b 100644 --- a/invokeai/backend/textual_inversion.py +++ b/invokeai/backend/textual_inversion.py @@ -1,7 +1,7 @@ """Textual Inversion wrapper class.""" from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Optional, Union import torch from compel.embeddings_provider import BaseTextualInversionManager @@ -66,33 +66,47 @@ class TextualInversionModelRaw(RawModel): return result -# no type hints for BaseTextualInversionManager? -class TextualInversionManager(BaseTextualInversionManager): # type: ignore - pad_tokens: Dict[int, List[int]] - tokenizer: CLIPTokenizer +class TextualInversionManager(BaseTextualInversionManager): + """TextualInversionManager implements the BaseTextualInversionManager ABC from the compel library.""" def __init__(self, tokenizer: CLIPTokenizer): - self.pad_tokens = {} + self.pad_tokens: dict[int, list[int]] = {} self.tokenizer = tokenizer def expand_textual_inversion_token_ids_if_necessary(self, token_ids: list[int]) -> list[int]: + """Given a list of tokens ids, expand any TI tokens to their corresponding pad tokens. + + For example, suppose we have a `` TI with 4 vectors that was added to the tokenizer with the following + mapping of tokens to token_ids: + ``` + : 49408 + : 49409 + : 49410 + : 49411 + ``` + `self.pad_tokens` would be set to `{49408: [49408, 49409, 49410, 49411]}`. + This function is responsible for expanding `49408` in the token_ids list to `[49408, 49409, 49410, 49411]`. + """ + # Short circuit if there are no pad tokens to save a little time. if len(self.pad_tokens) == 0: return token_ids + # This function assumes that compel has not included the BOS and EOS tokens in the token_ids list. We verify + # this assumption here. if token_ids[0] == self.tokenizer.bos_token_id: raise ValueError("token_ids must not start with bos_token_id") if token_ids[-1] == self.tokenizer.eos_token_id: raise ValueError("token_ids must not end with eos_token_id") - new_token_ids = [] + # Expand any TI tokens to their corresponding pad tokens. + new_token_ids: list[int] = [] for token_id in token_ids: new_token_ids.append(token_id) if token_id in self.pad_tokens: new_token_ids.extend(self.pad_tokens[token_id]) - # Do not exceed the max model input size - # The -2 here is compensating for compensate compel.embeddings_provider.get_token_ids(), - # which first removes and then adds back the start and end tokens. + # Do not exceed the max model input size. The -2 here is compensating for + # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens. max_length = list(self.tokenizer.max_model_input_sizes.values())[0] - 2 if len(new_token_ids) > max_length: new_token_ids = new_token_ids[0:max_length] From 3aa1c8d3a88205b133b31688b39648e431178016 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 May 2024 10:35:02 -0400 Subject: [PATCH 2/3] Update TextualInversionManager for compatibility with the latest transformers release. See https://github.com/invoke-ai/InvokeAI/issues/6445. --- invokeai/backend/textual_inversion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py index 368736617b..005031c95b 100644 --- a/invokeai/backend/textual_inversion.py +++ b/invokeai/backend/textual_inversion.py @@ -107,7 +107,7 @@ class TextualInversionManager(BaseTextualInversionManager): # Do not exceed the max model input size. The -2 here is compensating for # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens. - max_length = list(self.tokenizer.max_model_input_sizes.values())[0] - 2 + max_length = self.tokenizer.model_max_length - 2 if len(new_token_ids) > max_length: new_token_ids = new_token_ids[0:max_length] From 829b9ad66bb8f95c19b9aa2744d15112760009e4 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 May 2024 10:53:12 -0400 Subject: [PATCH 3/3] Add a callout about the hackiness of dropping tokens in the TextualInversionManager. --- invokeai/backend/textual_inversion.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py index 005031c95b..98104f769e 100644 --- a/invokeai/backend/textual_inversion.py +++ b/invokeai/backend/textual_inversion.py @@ -109,6 +109,9 @@ class TextualInversionManager(BaseTextualInversionManager): # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens. max_length = self.tokenizer.model_max_length - 2 if len(new_token_ids) > max_length: + # HACK: If TI token expansion causes us to exceed the max text encoder input length, we silently discard + # tokens. Token expansion should happen in a way that is compatible with compel's default handling of long + # prompts. new_token_ids = new_token_ids[0:max_length] return new_token_ids