From e680cf76f690c64d6bdadb2a8519f7e5940b9fb8 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 21 Aug 2024 13:45:22 +0000 Subject: [PATCH] Address minor review comments. --- invokeai/app/invocations/fields.py | 2 +- invokeai/app/invocations/flux_text_encoder.py | 12 ++++++------ invokeai/app/invocations/flux_text_to_image.py | 8 +++++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index 2abd6b39b7..1b52f27fb5 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -135,7 +135,7 @@ class FieldDescriptions: vae_model = "VAE model to load" lora_model = "LoRA model to load" main_model = "Main model (UNet, VAE, CLIP) to load" - flux_model = "Flux model (Transformer, VAE, CLIP) to load" + flux_model = "Flux model (Transformer) to load" sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load" sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load" onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load" diff --git a/invokeai/app/invocations/flux_text_encoder.py b/invokeai/app/invocations/flux_text_encoder.py index 7b08201e18..54c6ff2b33 100644 --- a/invokeai/app/invocations/flux_text_encoder.py +++ b/invokeai/app/invocations/flux_text_encoder.py @@ -15,8 +15,8 @@ from invokeai.backend.stable_diffusion.diffusion.conditioning_data import Condit @invocation( "flux_text_encoder", title="FLUX Text Encoding", - tags=["image", "flux"], - category="image", + tags=["prompt", "conditioning", "flux"], + category="conditioning", version="1.0.0", ) class FluxTextEncoderInvocation(BaseInvocation): @@ -32,7 +32,9 @@ class FluxTextEncoderInvocation(BaseInvocation): description=FieldDescriptions.t5_encoder, input=Input.Connection, ) - max_seq_len: Literal[256, 512] = InputField(description="Max sequence length for the desired flux model") + t5_max_seq_len: Literal[256, 512] = InputField( + description="Max sequence length for the T5 encoder. Expected to be 256 for FLUX schnell models and 512 for FLUX dev models." + ) positive_prompt: str = InputField(description="Positive prompt for text-to-image generation.") # TODO(ryand): Should we create a new return type for this invocation? This ConditioningOutput is clearly not @@ -48,8 +50,6 @@ class FluxTextEncoderInvocation(BaseInvocation): return ConditioningOutput.build(conditioning_name) def _encode_prompt(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor]: - max_seq_len = self.max_seq_len - # Load CLIP. clip_tokenizer_info = context.models.load(self.clip.tokenizer) clip_text_encoder_info = context.models.load(self.clip.text_encoder) @@ -70,7 +70,7 @@ class FluxTextEncoderInvocation(BaseInvocation): assert isinstance(t5_tokenizer, T5Tokenizer) clip_encoder = HFEncoder(clip_text_encoder, clip_tokenizer, True, 77) - t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, max_seq_len) + t5_encoder = HFEncoder(t5_text_encoder, t5_tokenizer, False, self.t5_max_seq_len) prompt = [self.positive_prompt] prompt_embeds = t5_encoder(prompt) diff --git a/invokeai/app/invocations/flux_text_to_image.py b/invokeai/app/invocations/flux_text_to_image.py index f67cf4a155..d2789b86f0 100644 --- a/invokeai/app/invocations/flux_text_to_image.py +++ b/invokeai/app/invocations/flux_text_to_image.py @@ -33,7 +33,7 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): """Text-to-image generation using a FLUX model.""" transformer: TransformerField = InputField( - description=FieldDescriptions.unet, + description=FieldDescriptions.flux_model, input=Input.Connection, title="Transformer", ) @@ -46,10 +46,12 @@ class FluxTextToImageInvocation(BaseInvocation, WithMetadata, WithBoard): ) width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.") - num_steps: int = InputField(default=4, description="Number of diffusion steps.") + num_steps: int = InputField( + default=4, description="Number of diffusion steps. Recommend values are schnell: 4, dev: 50." + ) guidance: float = InputField( default=4.0, - description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images.", + description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.", ) seed: int = InputField(default=0, description="Randomness seed for reproducibility.")