From d2c55dc0110dc561a16b8b3aad436a7ad2a4a112 Mon Sep 17 00:00:00 2001
From: Damian Stewart <d@damianstewart.com>
Date: Sun, 30 Jul 2023 14:20:59 +0200
Subject: [PATCH 1/7] enable .and() syntax and long prompts

---
 invokeai/app/invocations/compel.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py
index fb29e01628..3795fea8fd 100644
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -127,16 +127,15 @@ class CompelInvocation(BaseInvocation):
                 text_encoder=text_encoder,
                 textual_inversion_manager=ti_manager,
                 dtype_for_device_getter=torch_dtype,
-                truncate_long_prompts=True,
+                truncate_long_prompts=False,
             )
 
             conjunction = Compel.parse_prompt_string(self.prompt)
-            prompt: Union[FlattenedPrompt, Blend] = conjunction.prompts[0]
 
             if context.services.configuration.log_tokenization:
-                log_tokenization_for_prompt_object(prompt, tokenizer)
+                log_tokenization_for_prompt_object(conjunction, tokenizer)
 
-            c, options = compel.build_conditioning_tensor_for_prompt_object(prompt)
+            c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)
 
             ec = InvokeAIDiffuserComponent.ExtraConditioningInfo(
                 tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
@@ -289,7 +288,7 @@ class SDXLPromptInvocationBase:
                 text_encoder=text_encoder,
                 textual_inversion_manager=ti_manager,
                 dtype_for_device_getter=torch_dtype,
-                truncate_long_prompts=True,  # TODO:
+                truncate_long_prompts=False,  # TODO:
                 returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,  # TODO: clip skip
                 requires_pooled=True,
             )
@@ -298,8 +297,7 @@ class SDXLPromptInvocationBase:
 
             if context.services.configuration.log_tokenization:
                 # TODO: better logging for and syntax
-                for prompt_obj in conjunction.prompts:
-                    log_tokenization_for_prompt_object(prompt_obj, tokenizer)
+                log_tokenization_for_conjunction(conjunction, tokenizer)
 
             # TODO: ask for optimizations? to not run text_encoder twice
             c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)

From b65c9ad61209f089dcfb80ead84c36e9256c1ccf Mon Sep 17 00:00:00 2001
From: Sergey Borisov <stalkek7779@yandex.ru>
Date: Mon, 28 Aug 2023 04:50:58 +0300
Subject: [PATCH 2/7] Add monkeypatch for xformers to align unaligned
 attention_mask

---
 invokeai/backend/util/hotfixes.py | 44 +++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/invokeai/backend/util/hotfixes.py b/invokeai/backend/util/hotfixes.py
index 3d7f278f86..cf97d494d7 100644
--- a/invokeai/backend/util/hotfixes.py
+++ b/invokeai/backend/util/hotfixes.py
@@ -761,3 +761,47 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
 
 diffusers.ControlNetModel = ControlNetModel
 diffusers.models.controlnet.ControlNetModel = ControlNetModel
+
+try:
+    import xformers
+    xformers_available = True
+except:
+    xformers_available = False
+
+
+if xformers_available:
+    # TODO: remove when fixed in diffusers
+    _xformers_memory_efficient_attention = xformers.ops.memory_efficient_attention
+    def new_memory_efficient_attention(
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        attn_bias = None,
+        p: float = 0.0,
+        scale: Optional[float] = None,
+        *,
+        op = None,
+    ):
+        # diffusers not align shape to 8, which is required by xformers
+        if attn_bias is not None and type(attn_bias) is torch.Tensor:
+            orig_size = attn_bias.shape[-1]
+            new_size = ((orig_size + 7) // 8) * 8
+            aligned_attn_bias =  torch.zeros(
+                (attn_bias.shape[0], attn_bias.shape[1], new_size),
+                device=attn_bias.device,
+                dtype=attn_bias.dtype,
+            )
+            aligned_attn_bias[:,:,:orig_size] = attn_bias
+            attn_bias = aligned_attn_bias[:,:,:orig_size]
+
+        return _xformers_memory_efficient_attention(
+            query=query,
+            key=key,
+            value=value,
+            attn_bias=attn_bias,
+            p=p,
+            scale=scale,
+            op=op,
+        )
+
+    xformers.ops.memory_efficient_attention = new_memory_efficient_attention

From 2bf747caf6a5ea5481984e2545c73af955c1b54c Mon Sep 17 00:00:00 2001
From: Sergey Borisov <stalkek7779@yandex.ru>
Date: Mon, 28 Aug 2023 18:36:27 +0300
Subject: [PATCH 3/7] Blackify

---
 invokeai/backend/util/hotfixes.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/invokeai/backend/util/hotfixes.py b/invokeai/backend/util/hotfixes.py
index cf97d494d7..161a35eb52 100644
--- a/invokeai/backend/util/hotfixes.py
+++ b/invokeai/backend/util/hotfixes.py
@@ -764,6 +764,7 @@ diffusers.models.controlnet.ControlNetModel = ControlNetModel
 
 try:
     import xformers
+
     xformers_available = True
 except:
     xformers_available = False
@@ -772,27 +773,28 @@ except:
 if xformers_available:
     # TODO: remove when fixed in diffusers
     _xformers_memory_efficient_attention = xformers.ops.memory_efficient_attention
+
     def new_memory_efficient_attention(
         query: torch.Tensor,
         key: torch.Tensor,
         value: torch.Tensor,
-        attn_bias = None,
+        attn_bias=None,
         p: float = 0.0,
         scale: Optional[float] = None,
         *,
-        op = None,
+        op=None,
     ):
         # diffusers not align shape to 8, which is required by xformers
         if attn_bias is not None and type(attn_bias) is torch.Tensor:
             orig_size = attn_bias.shape[-1]
             new_size = ((orig_size + 7) // 8) * 8
-            aligned_attn_bias =  torch.zeros(
+            aligned_attn_bias = torch.zeros(
                 (attn_bias.shape[0], attn_bias.shape[1], new_size),
                 device=attn_bias.device,
                 dtype=attn_bias.dtype,
             )
-            aligned_attn_bias[:,:,:orig_size] = attn_bias
-            attn_bias = aligned_attn_bias[:,:,:orig_size]
+            aligned_attn_bias[:, :, :orig_size] = attn_bias
+            attn_bias = aligned_attn_bias[:, :, :orig_size]
 
         return _xformers_memory_efficient_attention(
             query=query,

From 4196c669a0fc9d4bf1d2160657af7a697b2db23c Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:57:26 +1200
Subject: [PATCH 4/7] chore: black / flake lint errors

---
 invokeai/backend/util/hotfixes.py | 33 ++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/invokeai/backend/util/hotfixes.py b/invokeai/backend/util/hotfixes.py
index 34aefdd827..983d0b7601 100644
--- a/invokeai/backend/util/hotfixes.py
+++ b/invokeai/backend/util/hotfixes.py
@@ -38,7 +38,8 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
             Whether to flip the sin to cos in the time embedding.
         freq_shift (`int`, defaults to 0):
             The frequency shift to apply to the time embedding.
-        down_block_types (`tuple[str]`, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D")`):
+        down_block_types (`tuple[str]`, defaults to `("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", \
+            "CrossAttnDownBlock2D", "DownBlock2D")`):
             The tuple of downsample blocks to use.
         only_cross_attention (`Union[bool, Tuple[bool]]`, defaults to `False`):
         block_out_channels (`tuple[int]`, defaults to `(320, 640, 1280, 1280)`):
@@ -140,7 +141,9 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
         # If `num_attention_heads` is not defined (which is the case for most models)
         # it will default to `attention_head_dim`. This looks weird upon first reading it and it is.
         # The reason for this behavior is to correct for incorrectly named variables that were introduced
-        # when this library was created. The incorrect naming was only discovered much later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131
+        # when this library was created...
+        # The incorrect naming was only discovered much ...
+        # later in https://github.com/huggingface/diffusers/issues/2011#issuecomment-1547958131
         # Changing `attention_head_dim` to `num_attention_heads` for 40,000+ configurations is too backwards breaking
         # which is why we correct for the naming here.
         num_attention_heads = num_attention_heads or attention_head_dim
@@ -148,17 +151,20 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
         # Check inputs
         if len(block_out_channels) != len(down_block_types):
             raise ValueError(
-                f"Must provide the same number of `block_out_channels` as `down_block_types`. `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `block_out_channels` as `down_block_types`. \
+                    `block_out_channels`: {block_out_channels}. `down_block_types`: {down_block_types}."
             )
 
         if not isinstance(only_cross_attention, bool) and len(only_cross_attention) != len(down_block_types):
             raise ValueError(
-                f"Must provide the same number of `only_cross_attention` as `down_block_types`. `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `only_cross_attention` as `down_block_types`. \
+                    `only_cross_attention`: {only_cross_attention}. `down_block_types`: {down_block_types}."
             )
 
         if not isinstance(num_attention_heads, int) and len(num_attention_heads) != len(down_block_types):
             raise ValueError(
-                f"Must provide the same number of `num_attention_heads` as `down_block_types`. `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}."
+                f"Must provide the same number of `num_attention_heads` as `down_block_types`. \
+                    `num_attention_heads`: {num_attention_heads}. `down_block_types`: {down_block_types}."
             )
 
         if isinstance(transformer_layers_per_block, int):
@@ -195,7 +201,8 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
             self.encoder_hid_proj = nn.Linear(encoder_hid_dim, cross_attention_dim)
         elif encoder_hid_dim_type == "text_image_proj":
             # image_embed_dim DOESN'T have to be `cross_attention_dim`. To not clutter the __init__ too much
-            # they are set to `cross_attention_dim` here as this is exactly the required dimension for the currently only use
+            # they are set to `cross_attention_dim` here as this is exactly the required dimension ...
+            # for the currently only use
             # case when `addition_embed_type == "text_image_proj"` (Kadinsky 2.1)`
             self.encoder_hid_proj = TextImageProjection(
                 text_embed_dim=encoder_hid_dim,
@@ -243,8 +250,10 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
                 text_time_embedding_from_dim, time_embed_dim, num_heads=addition_embed_type_num_heads
             )
         elif addition_embed_type == "text_image":
-            # text_embed_dim and image_embed_dim DON'T have to be `cross_attention_dim`. To not clutter the __init__ too much
-            # they are set to `cross_attention_dim` here as this is exactly the required dimension for the currently only use
+            # text_embed_dim and image_embed_dim DON'T have to be `cross_attention_dim`.
+            # To not clutter the __init__ too much
+            # they are set to `cross_attention_dim` here as this is exactly the required dimension...
+            # for the currently only use
             # case when `addition_embed_type == "text_image"` (Kadinsky 2.1)`
             self.add_embedding = TextImageTimeEmbedding(
                 text_embed_dim=cross_attention_dim, image_embed_dim=cross_attention_dim, time_embed_dim=time_embed_dim
@@ -666,12 +675,14 @@ class ControlNetModel(ModelMixin, ConfigMixin, FromOriginalControlnetMixin):
             elif self.config.addition_embed_type == "text_time":
                 if "text_embeds" not in added_cond_kwargs:
                     raise ValueError(
-                        f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
+                        f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which \
+                            requires the keyword argument `text_embeds` to be passed in `added_cond_kwargs`"
                     )
                 text_embeds = added_cond_kwargs.get("text_embeds")
                 if "time_ids" not in added_cond_kwargs:
                     raise ValueError(
-                        f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
+                        f"{self.__class__} has the config param `addition_embed_type` set to 'text_time' which \
+                            requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`"
                     )
                 time_ids = added_cond_kwargs.get("time_ids")
                 time_embeds = self.add_time_proj(time_ids.flatten())
@@ -774,7 +785,7 @@ try:
     import xformers
 
     xformers_available = True
-except:
+except Exception:
     xformers_available = False
 
 

From 68dc3c6cb41a492dd1292451a59b0ec832324ca0 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:58:59 +1200
Subject: [PATCH 5/7] feat: Upgrade compel to 2.0.2

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9aef66a35f..129538264d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,7 @@ dependencies = [
   "albumentations",
   "click",
   "clip_anytorch",  # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
-  "compel~=2.0.0",
+  "compel~=2.0.2",
   "controlnet-aux>=0.0.6",
   "timm==0.6.13",   # needed to override timm latest in controlnet_aux, see  https://github.com/isl-org/ZoeDepth/issues/26
   "datasets",

From cfee8d9804fee37a350f55447e4204b5d9d8f576 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Tue, 29 Aug 2023 13:09:30 +1200
Subject: [PATCH 6/7] chore: seamless print statement cleanup

---
 invokeai/backend/model_management/seamless.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/invokeai/backend/model_management/seamless.py b/invokeai/backend/model_management/seamless.py
index 54885769ad..7138f2e123 100644
--- a/invokeai/backend/model_management/seamless.py
+++ b/invokeai/backend/model_management/seamless.py
@@ -71,7 +71,6 @@ def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL], seamless_axe
             """
 
             if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
-                print(f"applied - {m_name}")
                 m.asymmetric_padding_mode = {}
                 m.asymmetric_padding = {}
                 m.asymmetric_padding_mode["x"] = "circular" if ("x" in seamless_axes) else "constant"

From 121396f8440517b913f4dce0a763a99c002c9f42 Mon Sep 17 00:00:00 2001
From: Sergey Borisov <stalkek7779@yandex.ru>
Date: Tue, 29 Aug 2023 17:07:33 +0300
Subject: [PATCH 7/7] Fix tokenization log for sd models

---
 invokeai/app/invocations/compel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/invokeai/app/invocations/compel.py b/invokeai/app/invocations/compel.py
index 8a4cadc139..e128792d70 100644
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@@ -122,7 +122,7 @@ class CompelInvocation(BaseInvocation):
             conjunction = Compel.parse_prompt_string(self.prompt)
 
             if context.services.configuration.log_tokenization:
-                log_tokenization_for_prompt_object(conjunction, tokenizer)
+                log_tokenization_for_conjunction(conjunction, tokenizer)
 
             c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)