From cd078b1865681e0a45c4cd1768963ed94dd58652 Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Fri, 29 Mar 2024 11:58:10 +0530
Subject: [PATCH] fix: Raise a better error when incorrect CLIP Vision model is
 used

---
 invokeai/backend/ip_adapter/ip_adapter.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py
index 5444c76c8c..1155e571ae 100644
--- a/invokeai/backend/ip_adapter/ip_adapter.py
+++ b/invokeai/backend/ip_adapter/ip_adapter.py
@@ -146,9 +146,12 @@ class IPAdapter(RawModel):
     def get_image_embeds(self, pil_image: List[Image.Image], image_encoder: CLIPVisionModelWithProjection):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image_embeds = image_encoder(clip_image.to(self.device, dtype=self.dtype)).image_embeds
-        image_prompt_embeds = self._image_proj_model(clip_image_embeds)
-        uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
-        return image_prompt_embeds, uncond_image_prompt_embeds
+        try:
+            image_prompt_embeds = self._image_proj_model(clip_image_embeds)
+            uncond_image_prompt_embeds = self._image_proj_model(torch.zeros_like(clip_image_embeds))
+            return image_prompt_embeds, uncond_image_prompt_embeds
+        except RuntimeError:
+            raise RuntimeError("Selected CLIP Vision Model is incompatible with the current IP Adapter")
 
 
 class IPAdapterPlus(IPAdapter):
@@ -169,12 +172,15 @@ class IPAdapterPlus(IPAdapter):
         clip_image = self._clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         clip_image = clip_image.to(self.device, dtype=self.dtype)
         clip_image_embeds = image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
-        image_prompt_embeds = self._image_proj_model(clip_image_embeds)
         uncond_clip_image_embeds = image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[
             -2
         ]
-        uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
-        return image_prompt_embeds, uncond_image_prompt_embeds
+        try:
+            image_prompt_embeds = self._image_proj_model(clip_image_embeds)
+            uncond_image_prompt_embeds = self._image_proj_model(uncond_clip_image_embeds)
+            return image_prompt_embeds, uncond_image_prompt_embeds
+        except RuntimeError:
+            raise RuntimeError("Selected CLIP Vision Model is incompatible with the current IP Adapter")
 
 
 class IPAdapterFull(IPAdapterPlus):