diff --git a/invokeai/app/invocations/grounded_sam.py b/invokeai/app/invocations/grounded_sam.py index 8eb8770e47..411ec4a91f 100644 --- a/invokeai/app/invocations/grounded_sam.py +++ b/invokeai/app/invocations/grounded_sam.py @@ -60,7 +60,8 @@ class GroundedSAMInvocation(BaseInvocation): @torch.no_grad() def invoke(self, context: InvocationContext) -> ImageOutput: - image_pil = context.images.get_pil(self.image.image_name) + # The models expect a 3-channel RGB image. + image_pil = context.images.get_pil(self.image.image_name, mode="RGB") detections = self._detect( context=context, image=image_pil, labels=[self.prompt], threshold=self.detection_threshold