feat: sdxl metadata

- update `CoreMetadata` class & `MetadataAccumulator` with fields for SDXL-specific metadata - update the linear UI graphs to populate this metadata
2024-08-30 20:32:17 +00:00 · 2023-07-26 00:22:47 +10:00 · 2023-07-26 00:22:47 +10:00 · 61291ea105
commit 61291ea105
parent 840205496a
5 changed files with 212 additions and 105 deletions
--- a/invokeai/app/invocations/metadata.py
+++ b/invokeai/app/invocations/metadata.py
@ -2,16 +2,19 @@ from typing import Literal, Optional, Union

 from pydantic import BaseModel, Field

-from invokeai.app.invocations.baseinvocation import (BaseInvocation,
-                                                     BaseInvocationOutput, InvocationConfig,
-                                                     InvocationContext)
+from invokeai.app.invocations.baseinvocation import (
+    BaseInvocation,
+    BaseInvocationOutput,
+    InvocationConfig,
+    InvocationContext,
+)
 from invokeai.app.invocations.controlnet_image_processors import ControlField
-from invokeai.app.invocations.model import (LoRAModelField, MainModelField,
-                                            VAEModelField)
+from invokeai.app.invocations.model import LoRAModelField, MainModelField, VAEModelField


 class LoRAMetadataField(BaseModel):
    """LoRA metadata for an image generated in InvokeAI."""
+
    lora: LoRAModelField = Field(description="The LoRA model")
    weight: float = Field(description="The weight of the LoRA model")

@ -19,7 +22,9 @@ class LoRAMetadataField(BaseModel):
 class CoreMetadata(BaseModel):
    """Core generation metadata for an image generated in InvokeAI."""

-    generation_mode: str = Field(description="The generation mode that output this image",)
+    generation_mode: str = Field(
+        description="The generation mode that output this image",
+    )
    positive_prompt: str = Field(description="The positive prompt parameter")
    negative_prompt: str = Field(description="The negative prompt parameter")
    width: int = Field(description="The width parameter")
@ -29,10 +34,20 @@ class CoreMetadata(BaseModel):
    cfg_scale: float = Field(description="The classifier-free guidance scale parameter")
    steps: int = Field(description="The number of steps used for inference")
    scheduler: str = Field(description="The scheduler used for inference")
-    clip_skip: int = Field(description="The number of skipped CLIP layers",)
+    clip_skip: int = Field(
+        description="The number of skipped CLIP layers",
+    )
    model: MainModelField = Field(description="The main model used for inference")
-    controlnets: list[ControlField]= Field(description="The ControlNets used for inference")
+    controlnets: list[ControlField] = Field(
+        description="The ControlNets used for inference"
+    )
    loras: list[LoRAMetadataField] = Field(description="The LoRAs used for inference")
+    vae: Union[VAEModelField, None] = Field(
+        default=None,
+        description="The VAE used for decoding, if the main model's default was not used",
+    )
+
+    # Latents-to-Latents
    strength: Union[float, None] = Field(
        default=None,
        description="The strength used for latents-to-latents",
@ -40,9 +55,34 @@ class CoreMetadata(BaseModel):
    init_image: Union[str, None] = Field(
        default=None, description="The name of the initial image"
    )
-    vae: Union[VAEModelField, None] = Field(
+
+    # SDXL
+    positive_style_prompt: Union[str, None] = Field(
+        default=None, description="The positive style prompt parameter"
+    )
+    negative_style_prompt: Union[str, None] = Field(
+        default=None, description="The negative style prompt parameter"
+    )
+
+    # SDXL Refiner
+    refiner_model: Union[MainModelField, None] = Field(
+        default=None, description="The SDXL Refiner model used"
+    )
+    refiner_cfg_scale: Union[float, None] = Field(
        default=None,
-        description="The VAE used for decoding, if the main model's default was not used",
+        description="The classifier-free guidance scale parameter used for the refiner",
+    )
+    refiner_steps: Union[int, None] = Field(
+        default=None, description="The number of steps used for the refiner"
+    )
+    refiner_scheduler: Union[str, None] = Field(
+        default=None, description="The scheduler used for the refiner"
+    )
+    refiner_aesthetic_store: Union[float, None] = Field(
+        default=None, description="The aesthetic score used for the refiner"
+    )
+    refiner_start: Union[float, None] = Field(
+        default=None, description="The start value used for refiner denoising"
    )


@ -71,7 +111,9 @@ class MetadataAccumulatorInvocation(BaseInvocation):

    type: Literal["metadata_accumulator"] = "metadata_accumulator"

-    generation_mode: str = Field(description="The generation mode that output this image",)
+    generation_mode: str = Field(
+        description="The generation mode that output this image",
+    )
    positive_prompt: str = Field(description="The positive prompt parameter")
    negative_prompt: str = Field(description="The negative prompt parameter")
    width: int = Field(description="The width parameter")
@ -81,9 +123,13 @@ class MetadataAccumulatorInvocation(BaseInvocation):
    cfg_scale: float = Field(description="The classifier-free guidance scale parameter")
    steps: int = Field(description="The number of steps used for inference")
    scheduler: str = Field(description="The scheduler used for inference")
-    clip_skip: int = Field(description="The number of skipped CLIP layers",)
+    clip_skip: int = Field(
+        description="The number of skipped CLIP layers",
+    )
    model: MainModelField = Field(description="The main model used for inference")
-    controlnets: list[ControlField]= Field(description="The ControlNets used for inference")
+    controlnets: list[ControlField] = Field(
+        description="The ControlNets used for inference"
+    )
    loras: list[LoRAMetadataField] = Field(description="The LoRAs used for inference")
    strength: Union[float, None] = Field(
        default=None,
@ -97,36 +143,44 @@ class MetadataAccumulatorInvocation(BaseInvocation):
        description="The VAE used for decoding, if the main model's default was not used",
    )

+    # SDXL
+    positive_style_prompt: Union[str, None] = Field(
+        default=None, description="The positive style prompt parameter"
+    )
+    negative_style_prompt: Union[str, None] = Field(
+        default=None, description="The negative style prompt parameter"
+    )
+
+    # SDXL Refiner
+    refiner_model: Union[MainModelField, None] = Field(
+        default=None, description="The SDXL Refiner model used"
+    )
+    refiner_cfg_scale: Union[float, None] = Field(
+        default=None,
+        description="The classifier-free guidance scale parameter used for the refiner",
+    )
+    refiner_steps: Union[int, None] = Field(
+        default=None, description="The number of steps used for the refiner"
+    )
+    refiner_scheduler: Union[str, None] = Field(
+        default=None, description="The scheduler used for the refiner"
+    )
+    refiner_aesthetic_store: Union[float, None] = Field(
+        default=None, description="The aesthetic score used for the refiner"
+    )
+    refiner_start: Union[float, None] = Field(
+        default=None, description="The start value used for refiner denoising"
+    )
+
    class Config(InvocationConfig):
        schema_extra = {
            "ui": {
                "title": "Metadata Accumulator",
-                "tags": ["image", "metadata", "generation"]
+                "tags": ["image", "metadata", "generation"],
            },
        }

-
    def invoke(self, context: InvocationContext) -> MetadataAccumulatorOutput:
        """Collects and outputs a CoreMetadata object"""

-        return MetadataAccumulatorOutput(
-            metadata=CoreMetadata(
-                generation_mode=self.generation_mode,
-                positive_prompt=self.positive_prompt,
-                negative_prompt=self.negative_prompt,
-                width=self.width,
-                height=self.height,
-                seed=self.seed,
-                rand_device=self.rand_device,
-                cfg_scale=self.cfg_scale,
-                steps=self.steps,
-                scheduler=self.scheduler,
-                model=self.model,
-                strength=self.strength,
-                init_image=self.init_image,
-                vae=self.vae,
-                controlnets=self.controlnets,
-                loras=self.loras,
-                clip_skip=self.clip_skip,
-            )
-        )
+        return MetadataAccumulatorOutput(metadata=CoreMetadata(**self.dict()))
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts
@ -5,6 +5,7 @@ import {
  IMAGE_TO_LATENTS,
  LATENTS_TO_IMAGE,
  METADATA_ACCUMULATOR,
+  SDXL_LATENTS_TO_LATENTS,
  SDXL_MODEL_LOADER,
  SDXL_REFINER_LATENTS_TO_LATENTS,
  SDXL_REFINER_MODEL_LOADER,
@ -35,6 +36,15 @@ export const addSDXLRefinerToGraph = (
    | MetadataAccumulatorInvocation
    | undefined;

+  if (metadataAccumulator) {
+    metadataAccumulator.refiner_model = refinerModel;
+    metadataAccumulator.refiner_aesthetic_store = refinerAestheticScore;
+    metadataAccumulator.refiner_cfg_scale = refinerCFGScale;
+    metadataAccumulator.refiner_scheduler = refinerScheduler;
+    metadataAccumulator.refiner_start = refinerStart;
+    metadataAccumulator.refiner_steps = refinerSteps;
+  }
+
  // Unplug SDXL Latents Generation To Latents To Image
  graph.edges = graph.edges.filter(
    (e) =>
@ -49,6 +59,21 @@ export const addSDXLRefinerToGraph = (
      )
  );

+  // connect the VAE back to the i2l, which we just removed in the filter
+  // but only if we are doing l2l
+  if (baseNodeId === SDXL_LATENTS_TO_LATENTS) {
+    graph.edges.push({
+      source: {
+        node_id: SDXL_MODEL_LOADER,
+        field: 'vae',
+      },
+      destination: {
+        node_id: IMAGE_TO_LATENTS,
+        field: 'vae',
+      },
+    });
+  }
+
  graph.nodes[SDXL_REFINER_MODEL_LOADER] = {
    type: 'sdxl_refiner_model_loader',
    id: SDXL_REFINER_MODEL_LOADER,
@ -76,16 +101,6 @@ export const addSDXLRefinerToGraph = (
  };

  graph.edges.push(
-    {
-      source: {
-        node_id: SDXL_MODEL_LOADER,
-        field: 'vae',
-      },
-      destination: {
-        node_id: IMAGE_TO_LATENTS,
-        field: 'vae',
-      },
-    },
    {
      source: {
        node_id: SDXL_REFINER_MODEL_LOADER,
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts
@ -7,9 +7,8 @@ import {
  ImageToLatentsInvocation,
 } from 'services/api/types';
 import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
-import { addSDXLRefinerToGraph } from './buildSDXLRefinerGraph';
+import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
 import {
-  IMAGE_TO_IMAGE_GRAPH,
  IMAGE_TO_LATENTS,
  LATENTS_TO_IMAGE,
  METADATA_ACCUMULATOR,
@ -17,6 +16,7 @@ import {
  NOISE,
  POSITIVE_CONDITIONING,
  RESIZE,
+  SDXL_IMAGE_TO_IMAGE_GRAPH,
  SDXL_LATENTS_TO_LATENTS,
  SDXL_MODEL_LOADER,
 } from './constants';
@ -53,16 +53,6 @@ export const buildLinearSDXLImageToImageGraph = (
    refinerStart,
  } = state.sdxl;

-  // TODO: add batch functionality
-  // const {
-  //   isEnabled: isBatchEnabled,
-  //   imageNames: batchImageNames,
-  //   asInitialImage,
-  // } = state.batch;
-
-  // const shouldBatch =
-  //   isBatchEnabled && batchImageNames.length > 0 && asInitialImage;
-
  /**
   * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the
   * full graph here as a template. Then use the parameters from app state and set friendlier node
@ -88,7 +78,7 @@ export const buildLinearSDXLImageToImageGraph = (

  // copy-pasted graph from node editor, filled in with state values & friendly node ids
  const graph: NonNullableGraph = {
-    id: IMAGE_TO_IMAGE_GRAPH,
+    id: SDXL_IMAGE_TO_IMAGE_GRAPH,
    nodes: {
      [SDXL_MODEL_LOADER]: {
        type: 'sdxl_model_loader',
@ -328,42 +318,6 @@ export const buildLinearSDXLImageToImageGraph = (
    });
  }

-  // TODO: add batch functionality
-  // if (isBatchEnabled && asInitialImage && batchImageNames.length > 0) {
-  //   // we are going to connect an iterate up to the init image
-  //   delete (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image;
-
-  //   const imageCollection: ImageCollectionInvocation = {
-  //     id: IMAGE_COLLECTION,
-  //     type: 'image_collection',
-  //     images: batchImageNames.map((image_name) => ({ image_name })),
-  //   };
-
-  //   const imageCollectionIterate: IterateInvocation = {
-  //     id: IMAGE_COLLECTION_ITERATE,
-  //     type: 'iterate',
-  //   };
-
-  //   graph.nodes[IMAGE_COLLECTION] = imageCollection;
-  //   graph.nodes[IMAGE_COLLECTION_ITERATE] = imageCollectionIterate;
-
-  //   graph.edges.push({
-  //     source: { node_id: IMAGE_COLLECTION, field: 'collection' },
-  //     destination: {
-  //       node_id: IMAGE_COLLECTION_ITERATE,
-  //       field: 'collection',
-  //     },
-  //   });
-
-  //   graph.edges.push({
-  //     source: { node_id: IMAGE_COLLECTION_ITERATE, field: 'item' },
-  //     destination: {
-  //       node_id: IMAGE_TO_LATENTS,
-  //       field: 'image',
-  //     },
-  //   });
-  // }
-
  // add metadata accumulator, which is only mostly populated - some fields are added later
  graph.nodes[METADATA_ACCUMULATOR] = {
    id: METADATA_ACCUMULATOR,
@ -385,6 +339,8 @@ export const buildLinearSDXLImageToImageGraph = (
    clip_skip: clipSkip,
    strength,
    init_image: initialImage.imageName,
+    positive_style_prompt: positiveStylePrompt,
+    negative_style_prompt: negativeStylePrompt,
  };

  graph.edges.push({
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts
@ -3,7 +3,7 @@ import { RootState } from 'app/store/store';
 import { NonNullableGraph } from 'features/nodes/types/types';
 import { initialGenerationState } from 'features/parameters/store/generationSlice';
 import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
-import { addSDXLRefinerToGraph } from './buildSDXLRefinerGraph';
+import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
 import {
  LATENTS_TO_IMAGE,
  METADATA_ACCUMULATOR,
@ -224,6 +224,8 @@ export const buildLinearSDXLTextToImageGraph = (
    controlnets: [],
    loras: [],
    clip_skip: clipSkip,
+    positive_style_prompt: positiveStylePrompt,
+    negative_style_prompt: negativeStylePrompt,
  };

  graph.edges.push({
--- a/invokeai/frontend/web/src/services/api/schema.d.ts
+++ b/invokeai/frontend/web/src/services/api/schema.d.ts
@ -1014,6 +1014,11 @@ export type components = {
       * @description The LoRAs used for inference
       */
      loras: (components["schemas"]["LoRAMetadataField"])[];
+      /**
+       * Vae 
+       * @description The VAE used for decoding, if the main model's default was not used
+       */
+      vae?: components["schemas"]["VAEModelField"];
      /**
       * Strength 
       * @description The strength used for latents-to-latents
@ -1025,10 +1030,45 @@ export type components = {
       */
      init_image?: string;
      /**
-       * Vae 
-       * @description The VAE used for decoding, if the main model's default was not used
+       * Positive Style Prompt 
+       * @description The positive style prompt parameter
       */
-      vae?: components["schemas"]["VAEModelField"];
+      positive_style_prompt?: string;
+      /**
+       * Negative Style Prompt 
+       * @description The negative style prompt parameter
+       */
+      negative_style_prompt?: string;
+      /**
+       * Refiner Model 
+       * @description The SDXL Refiner model used
+       */
+      refiner_model?: components["schemas"]["MainModelField"];
+      /**
+       * Refiner Cfg Scale 
+       * @description The classifier-free guidance scale parameter used for the refiner
+       */
+      refiner_cfg_scale?: number;
+      /**
+       * Refiner Steps 
+       * @description The number of steps used for the refiner
+       */
+      refiner_steps?: number;
+      /**
+       * Refiner Scheduler 
+       * @description The scheduler used for the refiner
+       */
+      refiner_scheduler?: string;
+      /**
+       * Refiner Aesthetic Store 
+       * @description The aesthetic score used for the refiner
+       */
+      refiner_aesthetic_store?: number;
+      /**
+       * Refiner Start 
+       * @description The start value used for refiner denoising
+       */
+      refiner_start?: number;
    };
    /**
     * CvInpaintInvocation 
@ -3268,6 +3308,46 @@ export type components = {
       * @description The VAE used for decoding, if the main model's default was not used
       */
      vae?: components["schemas"]["VAEModelField"];
+      /**
+       * Positive Style Prompt 
+       * @description The positive style prompt parameter
+       */
+      positive_style_prompt?: string;
+      /**
+       * Negative Style Prompt 
+       * @description The negative style prompt parameter
+       */
+      negative_style_prompt?: string;
+      /**
+       * Refiner Model 
+       * @description The SDXL Refiner model used
+       */
+      refiner_model?: components["schemas"]["MainModelField"];
+      /**
+       * Refiner Cfg Scale 
+       * @description The classifier-free guidance scale parameter used for the refiner
+       */
+      refiner_cfg_scale?: number;
+      /**
+       * Refiner Steps 
+       * @description The number of steps used for the refiner
+       */
+      refiner_steps?: number;
+      /**
+       * Refiner Scheduler 
+       * @description The scheduler used for the refiner
+       */
+      refiner_scheduler?: string;
+      /**
+       * Refiner Aesthetic Store 
+       * @description The aesthetic score used for the refiner
+       */
+      refiner_aesthetic_store?: number;
+      /**
+       * Refiner Start 
+       * @description The start value used for refiner denoising
+       */
+      refiner_start?: number;
    };
    /**
     * MetadataAccumulatorOutput 
@ -5355,6 +5435,12 @@ export type components = {
       */
      image?: components["schemas"]["ImageField"];
    };
+    /**
+     * StableDiffusion1ModelFormat 
+     * @description An enumeration. 
+     * @enum {string}
+     */
+    StableDiffusion1ModelFormat: "checkpoint" | "diffusers";
    /**
     * StableDiffusion2ModelFormat 
     * @description An enumeration. 
@ -5367,12 +5453,6 @@ export type components = {
     * @enum {string}
     */
    StableDiffusionXLModelFormat: "checkpoint" | "diffusers";
-    /**
-     * StableDiffusion1ModelFormat 
-     * @description An enumeration. 
-     * @enum {string}
-     */
-    StableDiffusion1ModelFormat: "checkpoint" | "diffusers";
  };
  responses: never;
  parameters: never;