mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
Fix the padding behavior when max-pooling regional IP-Adapter masks to mirror the downscaling behavior of SD and SDXL. Prior to this change, denoising with input latent dimensions that were not evenly divisible by 8 would raise an exception.
This commit is contained in:
parent
fba40eb1bd
commit
f9af32a6d1
@ -59,8 +59,11 @@ class RegionalIPData:
|
||||
if downscale_factor <= max_downscale_factor:
|
||||
# We use max pooling because we downscale to a pretty low resolution, so we don't want small mask
|
||||
# regions to be lost entirely.
|
||||
#
|
||||
# ceil_mode=True is set to mirror the downsampling behavior of SD and SDXL.
|
||||
#
|
||||
# TODO(ryand): In the future, we may want to experiment with other downsampling methods.
|
||||
mask_tensor = torch.nn.functional.max_pool2d(mask_tensor, kernel_size=2, stride=2)
|
||||
mask_tensor = torch.nn.functional.max_pool2d(mask_tensor, kernel_size=2, stride=2, ceil_mode=True)
|
||||
|
||||
return masks_by_seq_len
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user