From 994c61b67afb0ef515b89133ffa7a6d9f108e33d Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 27 May 2024 10:32:49 -0400
Subject: [PATCH 1/3] Add docs to TextualInversionManager and improve types. No
 changes to functionality.

---
 invokeai/backend/textual_inversion.py | 34 +++++++++++++++++++--------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py
index f7390979bb..368736617b 100644
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -1,7 +1,7 @@
 """Textual Inversion wrapper class."""
 
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Optional, Union
 
 import torch
 from compel.embeddings_provider import BaseTextualInversionManager
@@ -66,33 +66,47 @@ class TextualInversionModelRaw(RawModel):
         return result
 
 
-# no type hints for BaseTextualInversionManager?
-class TextualInversionManager(BaseTextualInversionManager):  # type: ignore
-    pad_tokens: Dict[int, List[int]]
-    tokenizer: CLIPTokenizer
+class TextualInversionManager(BaseTextualInversionManager):
+    """TextualInversionManager implements the BaseTextualInversionManager ABC from the compel library."""
 
     def __init__(self, tokenizer: CLIPTokenizer):
-        self.pad_tokens = {}
+        self.pad_tokens: dict[int, list[int]] = {}
         self.tokenizer = tokenizer
 
     def expand_textual_inversion_token_ids_if_necessary(self, token_ids: list[int]) -> list[int]:
+        """Given a list of tokens ids, expand any TI tokens to their corresponding pad tokens.
+
+        For example, suppose we have a `<ti_dog>` TI with 4 vectors that was added to the tokenizer with the following
+        mapping of tokens to token_ids:
+        ```
+        <ti_dog>: 49408
+        <ti_dog-!pad-1>: 49409
+        <ti_dog-!pad-2>: 49410
+        <ti_dog-!pad-3>: 49411
+        ```
+        `self.pad_tokens` would be set to `{49408: [49408, 49409, 49410, 49411]}`.
+        This function is responsible for expanding `49408` in the token_ids list to `[49408, 49409, 49410, 49411]`.
+        """
+        # Short circuit if there are no pad tokens to save a little time.
         if len(self.pad_tokens) == 0:
             return token_ids
 
+        # This function assumes that compel has not included the BOS and EOS tokens in the token_ids list. We verify
+        # this assumption here.
         if token_ids[0] == self.tokenizer.bos_token_id:
             raise ValueError("token_ids must not start with bos_token_id")
         if token_ids[-1] == self.tokenizer.eos_token_id:
             raise ValueError("token_ids must not end with eos_token_id")
 
-        new_token_ids = []
+        # Expand any TI tokens to their corresponding pad tokens.
+        new_token_ids: list[int] = []
         for token_id in token_ids:
             new_token_ids.append(token_id)
             if token_id in self.pad_tokens:
                 new_token_ids.extend(self.pad_tokens[token_id])
 
-        # Do not exceed the max model input size
-        # The -2 here is compensating for compensate compel.embeddings_provider.get_token_ids(),
-        # which first removes and then adds back the start and end tokens.
+        # Do not exceed the max model input size. The -2 here is compensating for
+        # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens.
         max_length = list(self.tokenizer.max_model_input_sizes.values())[0] - 2
         if len(new_token_ids) > max_length:
             new_token_ids = new_token_ids[0:max_length]

From 3aa1c8d3a88205b133b31688b39648e431178016 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 27 May 2024 10:35:02 -0400
Subject: [PATCH 2/3] Update TextualInversionManager for compatibility with the
 latest transformers release. See
 https://github.com/invoke-ai/InvokeAI/issues/6445.

---
 invokeai/backend/textual_inversion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py
index 368736617b..005031c95b 100644
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -107,7 +107,7 @@ class TextualInversionManager(BaseTextualInversionManager):
 
         # Do not exceed the max model input size. The -2 here is compensating for
         # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens.
-        max_length = list(self.tokenizer.max_model_input_sizes.values())[0] - 2
+        max_length = self.tokenizer.model_max_length - 2
         if len(new_token_ids) > max_length:
             new_token_ids = new_token_ids[0:max_length]
 

From 829b9ad66bb8f95c19b9aa2744d15112760009e4 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Mon, 27 May 2024 10:53:12 -0400
Subject: [PATCH 3/3] Add a callout about the hackiness of dropping tokens in
 the TextualInversionManager.

---
 invokeai/backend/textual_inversion.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py
index 005031c95b..98104f769e 100644
--- a/invokeai/backend/textual_inversion.py
+++ b/invokeai/backend/textual_inversion.py
@@ -109,6 +109,9 @@ class TextualInversionManager(BaseTextualInversionManager):
         # compel.embeddings_provider.get_token_ids(), which first removes and then adds back the start and end tokens.
         max_length = self.tokenizer.model_max_length - 2
         if len(new_token_ids) > max_length:
+            # HACK: If TI token expansion causes us to exceed the max text encoder input length, we silently discard
+            # tokens. Token expansion should happen in a way that is compatible with compel's default handling of long
+            # prompts.
             new_token_ids = new_token_ids[0:max_length]
 
         return new_token_ids