Pass IP-Adapter conditioning via cross_attention_kwargs instead of concatenating to the text embedding. This avoids interference with other features that manipulate the text embedding (e.g. long prompts).

2024-08-30 20:32:17 +00:00 · 2023-09-08 11:47:36 -04:00
parent ddc148b70b
commit b2d5b53b5f
5 changed files with 135 additions and 68 deletions
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@ -92,7 +92,6 @@ class IPAdapter:
                print("swapping in IPAttnProcessor for", name)
                attn_procs[name] = IPAttnProcessor(
                    hidden_size=hidden_size,
-                    image_embedding_len=self.num_tokens,
                    cross_attention_dim=cross_attention_dim,
                    scale=1.0,
                ).to(self.device, dtype=torch.float16)