diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 934861f008..d1b51a43f0 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -127,8 +127,6 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs - image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") - image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") blend_amount: int = InputField( ge=0, @@ -139,6 +137,13 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): images = [twi.image for twi in self.tiles_with_images] tiles = [twi.tile for twi in self.tiles_with_images] + # Infer the output image dimensions from the max/min tile limits. + height = 0 + width = 0 + for tile in tiles: + height = max(height, tile.coords.bottom) + width = max(width, tile.coords.right) + # Get all tile images for processing. # TODO(ryand): It pains me that we spend time PNG decoding each tile from disk when they almost certainly # existed in memory at an earlier point in the graph. @@ -152,7 +157,7 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): # Check the first tile to determine how many image channels are expected in the output. channels = tile_np_images[0].shape[-1] dtype = tile_np_images[0].dtype - np_image = np.zeros(shape=(self.image_height, self.image_width, channels), dtype=dtype) + np_image = np.zeros(shape=(height, width, channels), dtype=dtype) merge_tiles_with_linear_blending( dst_image=np_image, tiles=tiles, tile_images=tile_np_images, blend_amount=self.blend_amount