InvokeAI/invokeai/backend/stable_diffusion/diffusion/custom_atttention.py

from itertools import cycle, islice
from typing import List, Optional, TypedDict, cast

import torch
import torch.nn.functional as F
from diffusers.models.attention_processor import Attention, AttnProcessor2_0

from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionProcessorWeights
from invokeai.backend.stable_diffusion.diffusion.regional_ip_data import RegionalIPData
from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData


class IPAdapterAttentionWeights(TypedDict):
    ip_adapter_weights: List[IPAttentionProcessorWeights]
    skip: bool


class CustomAttnProcessor2_0(AttnProcessor2_0):
    """A custom implementation of AttnProcessor2_0 that supports additional Invoke features.
    This implementation is based on
    https://github.com/huggingface/diffusers/blame/fcfa270fbd1dc294e2f3a505bae6bcb791d721c3/src/diffusers/models/attention_processor.py#L1204
    Supported custom features:
    - IP-Adapter
    - Regional prompt attention
    """

    def __init__(
        self,
        ip_adapter_attention_weights: Optional[IPAdapterAttentionWeights] = None,
    ):
        """Initialize a CustomAttnProcessor2_0.
        Note: Arguments that are the same for all attention layers are passed to __call__(). Arguments that are
        layer-specific are passed to __init__().
        Args:
            ip_adapter_weights: The IP-Adapter attention weights. ip_adapter_weights[i] contains the attention weights
                for the i'th IP-Adapter.
        """
        super().__init__()
        self._ip_adapter_attention_weights = ip_adapter_attention_weights

    def __call__(
        self,
        attn: Attention,
        hidden_states: torch.Tensor,
        encoder_hidden_states: Optional[torch.Tensor] = None,
        attention_mask: Optional[torch.Tensor] = None,
        temb: Optional[torch.Tensor] = None,
        # For Regional Prompting:
        regional_prompt_data: Optional[RegionalPromptData] = None,
        percent_through: Optional[torch.Tensor] = None,
        # For IP-Adapter:
        regional_ip_data: Optional[RegionalIPData] = None,
        *args,
        **kwargs,
    ) -> torch.FloatTensor:
        """Apply attention.
        Args:
            regional_prompt_data: The regional prompt data for the current batch. If not None, this will be used to
                apply regional prompt masking.
            regional_ip_data: The IP-Adapter data for the current batch.
        """
        # If true, we are doing cross-attention, if false we are doing self-attention.
        is_cross_attention = encoder_hidden_states is not None

        # Start unmodified block from AttnProcessor2_0.
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        residual = hidden_states
        if attn.spatial_norm is not None:
            hidden_states = attn.spatial_norm(hidden_states, temb)

        input_ndim = hidden_states.ndim

        if input_ndim == 4:
            batch_size, channel, height, width = hidden_states.shape
            hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)

        batch_size, sequence_length, _ = (
            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
        )
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        # End unmodified block from AttnProcessor2_0.

        _, query_seq_len, _ = hidden_states.shape
        # Handle regional prompt attention masks.
        if regional_prompt_data is not None and is_cross_attention:
            assert percent_through is not None
            prompt_region_attention_mask = regional_prompt_data.get_cross_attn_mask(
                query_seq_len=query_seq_len, key_seq_len=sequence_length
            )

            if attention_mask is None:
                attention_mask = prompt_region_attention_mask
            else:
                attention_mask = prompt_region_attention_mask + attention_mask

        # Start unmodified block from AttnProcessor2_0.
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        if attention_mask is not None:
            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
            # scaled_dot_product_attention expects attention_mask shape to be
            # (batch, heads, source_length, target_length)
            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])

        if attn.group_norm is not None:
            hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)

        query = attn.to_q(hidden_states)

        if encoder_hidden_states is None:
            encoder_hidden_states = hidden_states
        elif attn.norm_cross:
            encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)

        key = attn.to_k(encoder_hidden_states)
        value = attn.to_v(encoder_hidden_states)

        inner_dim = key.shape[-1]
        head_dim = inner_dim // attn.heads

        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)

        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)

        # the output of sdp = (batch, num_heads, seq_len, head_dim)
        # TODO: add support for attn.scale when we move to Torch 2.1
        hidden_states = F.scaled_dot_product_attention(
            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
        )

        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
        hidden_states = hidden_states.to(query.dtype)
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        # End unmodified block from AttnProcessor2_0.

        # Apply IP-Adapter conditioning.
        if is_cross_attention:
            if self._ip_adapter_attention_weights:
                assert regional_ip_data is not None
                ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len)

                # Pad weight tensor list to match size of regional embeds
                self._ip_adapter_attention_weights["ip_adapter_weights"] = list(
                    islice(
                        cycle(self._ip_adapter_attention_weights["ip_adapter_weights"]),
                        len(regional_ip_data.image_prompt_embeds),
                    )
                )

                assert (
                    len(regional_ip_data.image_prompt_embeds)
                    == len(self._ip_adapter_attention_weights["ip_adapter_weights"])
                    == len(regional_ip_data.scales)
                    == ip_masks.shape[1]
                )

                for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds):
                    ipa_weights = self._ip_adapter_attention_weights["ip_adapter_weights"][ipa_index]
                    ipa_scale = regional_ip_data.scales[ipa_index]
                    ip_mask = ip_masks[0, ipa_index, ...]

                    # The batch dimensions should match.
                    assert ipa_embed.shape[0] == encoder_hidden_states.shape[0]
                    # The token_len dimensions should match.
                    assert ipa_embed.shape[-1] == encoder_hidden_states.shape[-1]

                    ip_hidden_states = ipa_embed

                    # Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding)

                    if not self._ip_adapter_attention_weights["skip"]:
                        ip_key = ipa_weights.to_k_ip(ip_hidden_states)
                        ip_value = ipa_weights.to_v_ip(ip_hidden_states)

                        # Expected ip_key and ip_value shape:
                        # (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads)

                        ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
                        ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)

                        # Expected ip_key and ip_value shape:
                        # (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim)

                        # TODO: add support for attn.scale when we move to Torch 2.1
                        ip_hidden_states = F.scaled_dot_product_attention(
                            query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
                        )

                        # Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim)

                        ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(
                            batch_size, -1, attn.heads * head_dim
                        )
                        ip_hidden_states = ip_hidden_states.to(query.dtype)

                        # Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim)

                        hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask
            else:
                # If IP-Adapter is not enabled, then regional_ip_data should not be passed in.
                assert regional_ip_data is None

        # Start unmodified block from AttnProcessor2_0.
        # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
        # linear proj
        hidden_states = attn.to_out[0](hidden_states)
        # dropout
        hidden_states = attn.to_out[1](hidden_states)

        if input_ndim == 4:
            batch_size, channel, height, width = hidden_states.shape
            hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)

        if attn.residual_connection:
            hidden_states = hidden_states + residual

        hidden_states = hidden_states / attn.rescale_output_factor

        return cast(torch.FloatTensor, hidden_states)
fix(experimental): Possible fix for conflict with regional embed length mismatch Pushing this so people can test it out and see if this needs to be handled in a different way. 2024-04-14 06:49:19 +00:00			`from itertools import cycle, islice`
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`from typing import List, Optional, TypedDict, cast`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00
			`import torch`
			`import torch.nn.functional as F`
			`from diffusers.models.attention_processor import Attention, AttnProcessor2_0`

Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionProcessorWeights`
Pass IP-Adapter scales through the cross_attn_kwargs pathway, since they are the same for all attention layers. This change also helps to prepare for adding IP-Adapter region masks. 2024-03-14 17:56:03 +00:00			`from invokeai.backend.stable_diffusion.diffusion.regional_ip_data import RegionalIPData`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData`

Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`class IPAdapterAttentionWeights(TypedDict):`
			`ip_adapter_weights: List[IPAttentionProcessorWeights]`
			`skip: bool`


Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`class CustomAttnProcessor2_0(AttnProcessor2_0):`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`"""A custom implementation of AttnProcessor2_0 that supports additional Invoke features.`
			`This implementation is based on`
			`https://github.com/huggingface/diffusers/blame/fcfa270fbd1dc294e2f3a505bae6bcb791d721c3/src/diffusers/models/attention_processor.py#L1204`
			`Supported custom features:`
			`- IP-Adapter`
			`- Regional prompt attention`
			`"""`

			`def __init__(`
			`self,`
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`ip_adapter_attention_weights: Optional[IPAdapterAttentionWeights] = None,`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`):`
			`"""Initialize a CustomAttnProcessor2_0.`
			`Note: Arguments that are the same for all attention layers are passed to __call__(). Arguments that are`
			`layer-specific are passed to __init__().`
			`Args:`
			`ip_adapter_weights: The IP-Adapter attention weights. ip_adapter_weights[i] contains the attention weights`
			`for the i'th IP-Adapter.`
			`"""`
			`super().__init__()`
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`self._ip_adapter_attention_weights = ip_adapter_attention_weights`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00
			`def __call__(`
			`self,`
			`attn: Attention,`
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`hidden_states: torch.Tensor,`
			`encoder_hidden_states: Optional[torch.Tensor] = None,`
			`attention_mask: Optional[torch.Tensor] = None,`
			`temb: Optional[torch.Tensor] = None,`
			`# For Regional Prompting:`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`regional_prompt_data: Optional[RegionalPromptData] = None,`
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`percent_through: Optional[torch.Tensor] = None,`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`# For IP-Adapter:`
Pass IP-Adapter scales through the cross_attn_kwargs pathway, since they are the same for all attention layers. This change also helps to prepare for adding IP-Adapter region masks. 2024-03-14 17:56:03 +00:00			`regional_ip_data: Optional[RegionalIPData] = None,`
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`*args,`
			`**kwargs,`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`) -> torch.FloatTensor:`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`"""Apply attention.`
			`Args:`
			`regional_prompt_data: The regional prompt data for the current batch. If not None, this will be used to`
			`apply regional prompt masking.`
Pass IP-Adapter scales through the cross_attn_kwargs pathway, since they are the same for all attention layers. This change also helps to prepare for adding IP-Adapter region masks. 2024-03-14 17:56:03 +00:00			`regional_ip_data: The IP-Adapter data for the current batch.`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`"""`
			`# If true, we are doing cross-attention, if false we are doing self-attention.`
			`is_cross_attention = encoder_hidden_states is not None`

			`# Start unmodified block from AttnProcessor2_0.`
			`# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`residual = hidden_states`
			`if attn.spatial_norm is not None:`
			`hidden_states = attn.spatial_norm(hidden_states, temb)`

			`input_ndim = hidden_states.ndim`

			`if input_ndim == 4:`
			`batch_size, channel, height, width = hidden_states.shape`
			`hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)`

			`batch_size, sequence_length, _ = (`
			`hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape`
			`)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^`
			`# End unmodified block from AttnProcessor2_0.`

Add support for IP-Adapter masks. 2024-03-14 20:58:11 +00:00			`_, query_seq_len, _ = hidden_states.shape`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`# Handle regional prompt attention masks.`
Update the diffusion logic to use the new regional prompting feature. 2024-03-08 19:34:49 +00:00			`if regional_prompt_data is not None and is_cross_attention:`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`assert percent_through is not None`
Update the diffusion logic to use the new regional prompting feature. 2024-03-08 19:34:49 +00:00			`prompt_region_attention_mask = regional_prompt_data.get_cross_attn_mask(`
			`query_seq_len=query_seq_len, key_seq_len=sequence_length`
			`)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
			`if attention_mask is None:`
			`attention_mask = prompt_region_attention_mask`
			`else:`
			`attention_mask = prompt_region_attention_mask + attention_mask`

			`# Start unmodified block from AttnProcessor2_0.`
			`# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`if attention_mask is not None:`
			`attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)`
			`# scaled_dot_product_attention expects attention_mask shape to be`
			`# (batch, heads, source_length, target_length)`
			`attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])`

			`if attn.group_norm is not None:`
			`hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)`

Pull the upstream changes from diffusers' AttnProcessor2_0 into CustomAttnProcessor2_0. This fixes a bug in CustomAttnProcessor2_0 that was being triggered when peft was not installed. The bug was present in a block of code that was previously copied from diffusers. The bug seems to have been introduced during diffusers' migration to PEFT for their LoRA handling. The upstream bug was fixed in https://github.com/huggingface/diffusers/commit/531e719163d2d7cf0d725bb685c1e8fe3393b9da. 2024-04-08 14:55:54 +00:00			`query = attn.to_q(hidden_states)`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00
			`if encoder_hidden_states is None:`
			`encoder_hidden_states = hidden_states`
			`elif attn.norm_cross:`
			`encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)`

Pull the upstream changes from diffusers' AttnProcessor2_0 into CustomAttnProcessor2_0. This fixes a bug in CustomAttnProcessor2_0 that was being triggered when peft was not installed. The bug was present in a block of code that was previously copied from diffusers. The bug seems to have been introduced during diffusers' migration to PEFT for their LoRA handling. The upstream bug was fixed in https://github.com/huggingface/diffusers/commit/531e719163d2d7cf0d725bb685c1e8fe3393b9da. 2024-04-08 14:55:54 +00:00			`key = attn.to_k(encoder_hidden_states)`
			`value = attn.to_v(encoder_hidden_states)`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00
			`inner_dim = key.shape[-1]`
			`head_dim = inner_dim // attn.heads`

			`query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)`

			`key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)`
			`value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)`

			`# the output of sdp = (batch, num_heads, seq_len, head_dim)`
			`# TODO: add support for attn.scale when we move to Torch 2.1`
			`hidden_states = F.scaled_dot_product_attention(`
			`query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False`
			`)`

			`hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)`
			`hidden_states = hidden_states.to(query.dtype)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^`
			`# End unmodified block from AttnProcessor2_0.`

			`# Apply IP-Adapter conditioning.`
(minor) Fix IP-Adapter conditional logic in CustomAttnProcessor2_0. 2024-04-09 16:27:24 +00:00			`if is_cross_attention:`
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`if self._ip_adapter_attention_weights:`
Pass IP-Adapter scales through the cross_attn_kwargs pathway, since they are the same for all attention layers. This change also helps to prepare for adding IP-Adapter region masks. 2024-03-14 17:56:03 +00:00			`assert regional_ip_data is not None`
Add support for IP-Adapter masks. 2024-03-14 20:58:11 +00:00			`ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len)`
fix(experimental): Possible fix for conflict with regional embed length mismatch Pushing this so people can test it out and see if this needs to be handled in a different way. 2024-04-14 06:49:19 +00:00
			`# Pad weight tensor list to match size of regional embeds`
			`self._ip_adapter_attention_weights["ip_adapter_weights"] = list(`
			`islice(`
			`cycle(self._ip_adapter_attention_weights["ip_adapter_weights"]),`
			`len(regional_ip_data.image_prompt_embeds),`
			`)`
			`)`

Add support for IP-Adapter masks. 2024-03-14 20:58:11 +00:00			`assert (`
			`len(regional_ip_data.image_prompt_embeds)`
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`== len(self._ip_adapter_attention_weights["ip_adapter_weights"])`
Add support for IP-Adapter masks. 2024-03-14 20:58:11 +00:00			`== len(regional_ip_data.scales)`
			`== ip_masks.shape[1]`
			`)`
fix(experimental): Possible fix for conflict with regional embed length mismatch Pushing this so people can test it out and see if this needs to be handled in a different way. 2024-04-14 06:49:19 +00:00
Add support for IP-Adapter masks. 2024-03-14 20:58:11 +00:00			`for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds):`
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`ipa_weights = self._ip_adapter_attention_weights["ip_adapter_weights"][ipa_index]`
Add support for IP-Adapter masks. 2024-03-14 20:58:11 +00:00			`ipa_scale = regional_ip_data.scales[ipa_index]`
			`ip_mask = ip_masks[0, ipa_index, ...]`

Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`# The batch dimensions should match.`
			`assert ipa_embed.shape[0] == encoder_hidden_states.shape[0]`
			`# The token_len dimensions should match.`
			`assert ipa_embed.shape[-1] == encoder_hidden_states.shape[-1]`

			`ip_hidden_states = ipa_embed`

			`# Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding)`

refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`if not self._ip_adapter_attention_weights["skip"]:`
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`ip_key = ipa_weights.to_k_ip(ip_hidden_states)`
			`ip_value = ipa_weights.to_v_ip(ip_hidden_states)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`# Expected ip_key and ip_value shape:`
			`# (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)`
			`ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`# Expected ip_key and ip_value shape:`
			`# (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`# TODO: add support for attn.scale when we move to Torch 2.1`
			`ip_hidden_states = F.scaled_dot_product_attention(`
			`query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False`
			`)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`# Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(`
			`batch_size, -1, attn.heads * head_dim`
			`)`
			`ip_hidden_states = ip_hidden_states.to(query.dtype)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`# Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim)`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00
wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 05:39:45 +00:00			`hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask`
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`else:`
Pass IP-Adapter scales through the cross_attn_kwargs pathway, since they are the same for all attention layers. This change also helps to prepare for adding IP-Adapter region masks. 2024-03-14 17:56:03 +00:00			`# If IP-Adapter is not enabled, then regional_ip_data should not be passed in.`
			`assert regional_ip_data is None`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00
Update CustomAttention to support both IP-Adapters and regional prompting. 2024-03-08 19:03:33 +00:00			`# Start unmodified block from AttnProcessor2_0.`
			`# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`# linear proj`
Pull the upstream changes from diffusers' AttnProcessor2_0 into CustomAttnProcessor2_0. This fixes a bug in CustomAttnProcessor2_0 that was being triggered when peft was not installed. The bug was present in a block of code that was previously copied from diffusers. The bug seems to have been introduced during diffusers' migration to PEFT for their LoRA handling. The upstream bug was fixed in https://github.com/huggingface/diffusers/commit/531e719163d2d7cf0d725bb685c1e8fe3393b9da. 2024-04-08 14:55:54 +00:00			`hidden_states = attn.to_out[0](hidden_states)`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`# dropout`
			`hidden_states = attn.to_out[1](hidden_states)`

			`if input_ndim == 4:`
refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`batch_size, channel, height, width = hidden_states.shape`
Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-02-15 22:52:44 +00:00			`hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)`

			`if attn.residual_connection:`
			`hidden_states = hidden_states + residual`

			`hidden_states = hidden_states / attn.rescale_output_factor`

refactor: fix a bunch of type issues in custom_attention 2024-04-13 08:47:25 +00:00			`return cast(torch.FloatTensor, hidden_states)`