From fba40eb1bdfcd91c282b5ead0346cb583d47ead5 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Tue, 9 Apr 2024 15:15:12 -0400
Subject: [PATCH] Fix the padding behavior when max-pooling regional prompt
 masks to mirror the downscaling behavior of SD and SDXL. Prior to this
 change, denoising with input latent dimensions that were not evenly divisible
 by 8 would raise an exception.

---
 .../stable_diffusion/diffusion/regional_prompt_data.py       | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/invokeai/backend/stable_diffusion/diffusion/regional_prompt_data.py b/invokeai/backend/stable_diffusion/diffusion/regional_prompt_data.py
index 85331013d5..f09cc0a0d2 100644
--- a/invokeai/backend/stable_diffusion/diffusion/regional_prompt_data.py
+++ b/invokeai/backend/stable_diffusion/diffusion/regional_prompt_data.py
@@ -61,9 +61,12 @@ class RegionalPromptData:
                 if downscale_factor <= max_downscale_factor:
                     # We use max pooling because we downscale to a pretty low resolution, so we don't want small prompt
                     # regions to be lost entirely.
+                    #
+                    # ceil_mode=True is set to mirror the downsampling behavior of SD and SDXL.
+                    #
                     # TODO(ryand): In the future, we may want to experiment with other downsampling methods (e.g.
                     # nearest interpolation), and could potentially use a weighted mask rather than a binary mask.
-                    batch_sample_masks = F.max_pool2d(batch_sample_masks, kernel_size=2, stride=2)
+                    batch_sample_masks = F.max_pool2d(batch_sample_masks, kernel_size=2, stride=2, ceil_mode=True)
 
         return batch_sample_masks_by_seq_len