diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 764e744a2e..db7cd20172 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -401,6 +401,11 @@ class DenoiseLatentsInvocation(BaseInvocation): tf = torchvision.transforms.Resize( (target_height, target_width), interpolation=torchvision.transforms.InterpolationMode.NEAREST ) + + if len(mask.shape) != 3 or mask.shape[0] != 1: + raise ValueError(f"Invalid regional prompt mask shape: {mask.shape}. Expected shape (1, h, w).") + + # Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w). mask = mask.unsqueeze(0) # Shape: (1, h, w) -> (1, 1, h, w) resized_mask = tf(mask) return resized_mask diff --git a/invokeai/app/invocations/mask.py b/invokeai/app/invocations/mask.py index 572fd7c15d..31eb70e056 100644 --- a/invokeai/app/invocations/mask.py +++ b/invokeai/app/invocations/mask.py @@ -88,10 +88,12 @@ class ExtractMasksAndPromptsInvocation(BaseInvocation): image_as_tensor = torch.from_numpy(np.array(image, dtype=np.uint8)) for pair in self.prompt_color_pairs: + # TODO(ryand): Make this work for both RGB and RGBA images. mask = torch.all(image_as_tensor == torch.tensor(pair.color.tuple()), dim=-1) + # Add explicit channel dimension. + mask = mask.unsqueeze(0) mask_name = context.tensors.save(mask) prompt_mask_pairs.append(PromptMaskPair(prompt=pair.prompt, mask=MaskField(mask_name=mask_name))) - return ExtractMasksAndPromptsOutput(prompt_mask_pairs=prompt_mask_pairs)