Fix dimensions of mask produced by ExtractMasksAndPromptsInvocation. Also, added a clearer error message in case the same error is introduced in the future.

2024-08-30 20:32:17 +00:00 · 2024-04-08 12:27:57 -04:00
parent 75ef473748
commit 5b5a4204a1
2 changed files with 8 additions and 1 deletions
--- a/invokeai/app/invocations/latent.py
+++ b/invokeai/app/invocations/latent.py
@ -401,6 +401,11 @@ class DenoiseLatentsInvocation(BaseInvocation):
        tf = torchvision.transforms.Resize(
            (target_height, target_width), interpolation=torchvision.transforms.InterpolationMode.NEAREST
        )
+
+        if len(mask.shape) != 3 or mask.shape[0] != 1:
+            raise ValueError(f"Invalid regional prompt mask shape: {mask.shape}. Expected shape (1, h, w).")
+
+        # Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
        mask = mask.unsqueeze(0)  # Shape: (1, h, w) -> (1, 1, h, w)
        resized_mask = tf(mask)
        return resized_mask
--- a/invokeai/app/invocations/mask.py
+++ b/invokeai/app/invocations/mask.py
@ -88,10 +88,12 @@ class ExtractMasksAndPromptsInvocation(BaseInvocation):
        image_as_tensor = torch.from_numpy(np.array(image, dtype=np.uint8))

        for pair in self.prompt_color_pairs:
+            # TODO(ryand): Make this work for both RGB and RGBA images.
            mask = torch.all(image_as_tensor == torch.tensor(pair.color.tuple()), dim=-1)
+            # Add explicit channel dimension.
+            mask = mask.unsqueeze(0)
            mask_name = context.tensors.save(mask)
            prompt_mask_pairs.append(PromptMaskPair(prompt=pair.prompt, mask=MaskField(mask_name=mask_name)))
-
        return ExtractMasksAndPromptsOutput(prompt_mask_pairs=prompt_mask_pairs)