From 892fe622648e720d769a30ddd4f618c010637715 Mon Sep 17 00:00:00 2001 From: Jonathan <34005131+JPPhoto@users.noreply.github.com> Date: Tue, 16 Jan 2024 07:50:36 -0600 Subject: [PATCH] Add Ideal Size node to core nodes The Ideal Size node is useful for High-Res Optimization as it gives the optimum size for creating an initial generation with minimal artifacts (duplication and other strangeness) from today's models. After inclusion, front end graph generation can be simplified by offloading calculations for HRO initial generation to this node. --- invokeai/app/invocations/latent.py | 52 ++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 9b93cf0a3d..e90df9b8cd 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1,5 +1,6 @@ # Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654) +import math from contextlib import ExitStack from functools import singledispatchmethod from typing import List, Literal, Optional, Union @@ -1228,3 +1229,54 @@ class CropLatentsCoreInvocation(BaseInvocation): context.services.latents.save(name, cropped_latents) return build_latents_output(latents_name=name, latents=cropped_latents) + + +@invocation_output("ideal_size_output") +class IdealSizeOutput(BaseInvocationOutput): + """Base class for invocations that output an image""" + + width: int = OutputField(description="The ideal width of the image in pixels") + height: int = OutputField(description="The ideal height of the image in pixels") + + +@invocation( + "ideal_size", + title="Ideal Size", + tags=["latents", "math", "ideal_size"], + version="1.0.2", +) +class IdealSizeInvocation(BaseInvocation): + """Calculates the ideal size for generation to avoid duplication""" + + width: int = InputField(default=1024, description="Target width") + height: int = InputField(default=576, description="Target height") + unet: UNetField = InputField(default=None, description="UNet submodel") + multiplier: float = InputField(default=1.0, description="Dimensional multiplier") + + def trim_to_multiple_of(self, *args, multiple_of=LATENT_SCALE_FACTOR): + return tuple((x - x % multiple_of) for x in args) + + def invoke(self, context: InvocationContext) -> IdealSizeOutput: + aspect = self.width / self.height + dimension = 512 + if self.unet.unet.base_model == BaseModelType.StableDiffusion2: + dimension = 768 + elif self.unet.unet.base_model == BaseModelType.StableDiffusionXL: + dimension = 1024 + dimension = dimension * self.multiplier + min_dimension = math.floor(dimension * 0.5) + model_area = dimension * dimension # hardcoded for now since all models are trained on square images + + if aspect > 1.0: + init_height = max(min_dimension, math.sqrt(model_area / aspect)) + init_width = init_height * aspect + else: + init_width = max(min_dimension, math.sqrt(model_area * aspect)) + init_height = init_width / aspect + + scaled_width, scaled_height = self.trim_to_multiple_of( + math.floor(init_width), + math.floor(init_height), + ) + + return IdealSizeOutput(width=scaled_width, height=scaled_height)