mirror of
https://github.com/invoke-ai/InvokeAI
synced 2024-08-30 20:32:17 +00:00
feat: sdxl metadata
- update `CoreMetadata` class & `MetadataAccumulator` with fields for SDXL-specific metadata - update the linear UI graphs to populate this metadata
This commit is contained in:
parent
840205496a
commit
61291ea105
@ -2,16 +2,19 @@ from typing import Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import (BaseInvocation,
|
||||
BaseInvocationOutput, InvocationConfig,
|
||||
InvocationContext)
|
||||
from invokeai.app.invocations.baseinvocation import (
|
||||
BaseInvocation,
|
||||
BaseInvocationOutput,
|
||||
InvocationConfig,
|
||||
InvocationContext,
|
||||
)
|
||||
from invokeai.app.invocations.controlnet_image_processors import ControlField
|
||||
from invokeai.app.invocations.model import (LoRAModelField, MainModelField,
|
||||
VAEModelField)
|
||||
from invokeai.app.invocations.model import LoRAModelField, MainModelField, VAEModelField
|
||||
|
||||
|
||||
class LoRAMetadataField(BaseModel):
|
||||
"""LoRA metadata for an image generated in InvokeAI."""
|
||||
|
||||
lora: LoRAModelField = Field(description="The LoRA model")
|
||||
weight: float = Field(description="The weight of the LoRA model")
|
||||
|
||||
@ -19,7 +22,9 @@ class LoRAMetadataField(BaseModel):
|
||||
class CoreMetadata(BaseModel):
|
||||
"""Core generation metadata for an image generated in InvokeAI."""
|
||||
|
||||
generation_mode: str = Field(description="The generation mode that output this image",)
|
||||
generation_mode: str = Field(
|
||||
description="The generation mode that output this image",
|
||||
)
|
||||
positive_prompt: str = Field(description="The positive prompt parameter")
|
||||
negative_prompt: str = Field(description="The negative prompt parameter")
|
||||
width: int = Field(description="The width parameter")
|
||||
@ -29,10 +34,20 @@ class CoreMetadata(BaseModel):
|
||||
cfg_scale: float = Field(description="The classifier-free guidance scale parameter")
|
||||
steps: int = Field(description="The number of steps used for inference")
|
||||
scheduler: str = Field(description="The scheduler used for inference")
|
||||
clip_skip: int = Field(description="The number of skipped CLIP layers",)
|
||||
clip_skip: int = Field(
|
||||
description="The number of skipped CLIP layers",
|
||||
)
|
||||
model: MainModelField = Field(description="The main model used for inference")
|
||||
controlnets: list[ControlField]= Field(description="The ControlNets used for inference")
|
||||
controlnets: list[ControlField] = Field(
|
||||
description="The ControlNets used for inference"
|
||||
)
|
||||
loras: list[LoRAMetadataField] = Field(description="The LoRAs used for inference")
|
||||
vae: Union[VAEModelField, None] = Field(
|
||||
default=None,
|
||||
description="The VAE used for decoding, if the main model's default was not used",
|
||||
)
|
||||
|
||||
# Latents-to-Latents
|
||||
strength: Union[float, None] = Field(
|
||||
default=None,
|
||||
description="The strength used for latents-to-latents",
|
||||
@ -40,9 +55,34 @@ class CoreMetadata(BaseModel):
|
||||
init_image: Union[str, None] = Field(
|
||||
default=None, description="The name of the initial image"
|
||||
)
|
||||
vae: Union[VAEModelField, None] = Field(
|
||||
|
||||
# SDXL
|
||||
positive_style_prompt: Union[str, None] = Field(
|
||||
default=None, description="The positive style prompt parameter"
|
||||
)
|
||||
negative_style_prompt: Union[str, None] = Field(
|
||||
default=None, description="The negative style prompt parameter"
|
||||
)
|
||||
|
||||
# SDXL Refiner
|
||||
refiner_model: Union[MainModelField, None] = Field(
|
||||
default=None, description="The SDXL Refiner model used"
|
||||
)
|
||||
refiner_cfg_scale: Union[float, None] = Field(
|
||||
default=None,
|
||||
description="The VAE used for decoding, if the main model's default was not used",
|
||||
description="The classifier-free guidance scale parameter used for the refiner",
|
||||
)
|
||||
refiner_steps: Union[int, None] = Field(
|
||||
default=None, description="The number of steps used for the refiner"
|
||||
)
|
||||
refiner_scheduler: Union[str, None] = Field(
|
||||
default=None, description="The scheduler used for the refiner"
|
||||
)
|
||||
refiner_aesthetic_store: Union[float, None] = Field(
|
||||
default=None, description="The aesthetic score used for the refiner"
|
||||
)
|
||||
refiner_start: Union[float, None] = Field(
|
||||
default=None, description="The start value used for refiner denoising"
|
||||
)
|
||||
|
||||
|
||||
@ -71,7 +111,9 @@ class MetadataAccumulatorInvocation(BaseInvocation):
|
||||
|
||||
type: Literal["metadata_accumulator"] = "metadata_accumulator"
|
||||
|
||||
generation_mode: str = Field(description="The generation mode that output this image",)
|
||||
generation_mode: str = Field(
|
||||
description="The generation mode that output this image",
|
||||
)
|
||||
positive_prompt: str = Field(description="The positive prompt parameter")
|
||||
negative_prompt: str = Field(description="The negative prompt parameter")
|
||||
width: int = Field(description="The width parameter")
|
||||
@ -81,9 +123,13 @@ class MetadataAccumulatorInvocation(BaseInvocation):
|
||||
cfg_scale: float = Field(description="The classifier-free guidance scale parameter")
|
||||
steps: int = Field(description="The number of steps used for inference")
|
||||
scheduler: str = Field(description="The scheduler used for inference")
|
||||
clip_skip: int = Field(description="The number of skipped CLIP layers",)
|
||||
clip_skip: int = Field(
|
||||
description="The number of skipped CLIP layers",
|
||||
)
|
||||
model: MainModelField = Field(description="The main model used for inference")
|
||||
controlnets: list[ControlField]= Field(description="The ControlNets used for inference")
|
||||
controlnets: list[ControlField] = Field(
|
||||
description="The ControlNets used for inference"
|
||||
)
|
||||
loras: list[LoRAMetadataField] = Field(description="The LoRAs used for inference")
|
||||
strength: Union[float, None] = Field(
|
||||
default=None,
|
||||
@ -97,36 +143,44 @@ class MetadataAccumulatorInvocation(BaseInvocation):
|
||||
description="The VAE used for decoding, if the main model's default was not used",
|
||||
)
|
||||
|
||||
# SDXL
|
||||
positive_style_prompt: Union[str, None] = Field(
|
||||
default=None, description="The positive style prompt parameter"
|
||||
)
|
||||
negative_style_prompt: Union[str, None] = Field(
|
||||
default=None, description="The negative style prompt parameter"
|
||||
)
|
||||
|
||||
# SDXL Refiner
|
||||
refiner_model: Union[MainModelField, None] = Field(
|
||||
default=None, description="The SDXL Refiner model used"
|
||||
)
|
||||
refiner_cfg_scale: Union[float, None] = Field(
|
||||
default=None,
|
||||
description="The classifier-free guidance scale parameter used for the refiner",
|
||||
)
|
||||
refiner_steps: Union[int, None] = Field(
|
||||
default=None, description="The number of steps used for the refiner"
|
||||
)
|
||||
refiner_scheduler: Union[str, None] = Field(
|
||||
default=None, description="The scheduler used for the refiner"
|
||||
)
|
||||
refiner_aesthetic_store: Union[float, None] = Field(
|
||||
default=None, description="The aesthetic score used for the refiner"
|
||||
)
|
||||
refiner_start: Union[float, None] = Field(
|
||||
default=None, description="The start value used for refiner denoising"
|
||||
)
|
||||
|
||||
class Config(InvocationConfig):
|
||||
schema_extra = {
|
||||
"ui": {
|
||||
"title": "Metadata Accumulator",
|
||||
"tags": ["image", "metadata", "generation"]
|
||||
"tags": ["image", "metadata", "generation"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def invoke(self, context: InvocationContext) -> MetadataAccumulatorOutput:
|
||||
"""Collects and outputs a CoreMetadata object"""
|
||||
|
||||
return MetadataAccumulatorOutput(
|
||||
metadata=CoreMetadata(
|
||||
generation_mode=self.generation_mode,
|
||||
positive_prompt=self.positive_prompt,
|
||||
negative_prompt=self.negative_prompt,
|
||||
width=self.width,
|
||||
height=self.height,
|
||||
seed=self.seed,
|
||||
rand_device=self.rand_device,
|
||||
cfg_scale=self.cfg_scale,
|
||||
steps=self.steps,
|
||||
scheduler=self.scheduler,
|
||||
model=self.model,
|
||||
strength=self.strength,
|
||||
init_image=self.init_image,
|
||||
vae=self.vae,
|
||||
controlnets=self.controlnets,
|
||||
loras=self.loras,
|
||||
clip_skip=self.clip_skip,
|
||||
)
|
||||
)
|
||||
return MetadataAccumulatorOutput(metadata=CoreMetadata(**self.dict()))
|
||||
|
@ -5,6 +5,7 @@ import {
|
||||
IMAGE_TO_LATENTS,
|
||||
LATENTS_TO_IMAGE,
|
||||
METADATA_ACCUMULATOR,
|
||||
SDXL_LATENTS_TO_LATENTS,
|
||||
SDXL_MODEL_LOADER,
|
||||
SDXL_REFINER_LATENTS_TO_LATENTS,
|
||||
SDXL_REFINER_MODEL_LOADER,
|
||||
@ -35,6 +36,15 @@ export const addSDXLRefinerToGraph = (
|
||||
| MetadataAccumulatorInvocation
|
||||
| undefined;
|
||||
|
||||
if (metadataAccumulator) {
|
||||
metadataAccumulator.refiner_model = refinerModel;
|
||||
metadataAccumulator.refiner_aesthetic_store = refinerAestheticScore;
|
||||
metadataAccumulator.refiner_cfg_scale = refinerCFGScale;
|
||||
metadataAccumulator.refiner_scheduler = refinerScheduler;
|
||||
metadataAccumulator.refiner_start = refinerStart;
|
||||
metadataAccumulator.refiner_steps = refinerSteps;
|
||||
}
|
||||
|
||||
// Unplug SDXL Latents Generation To Latents To Image
|
||||
graph.edges = graph.edges.filter(
|
||||
(e) =>
|
||||
@ -49,6 +59,21 @@ export const addSDXLRefinerToGraph = (
|
||||
)
|
||||
);
|
||||
|
||||
// connect the VAE back to the i2l, which we just removed in the filter
|
||||
// but only if we are doing l2l
|
||||
if (baseNodeId === SDXL_LATENTS_TO_LATENTS) {
|
||||
graph.edges.push({
|
||||
source: {
|
||||
node_id: SDXL_MODEL_LOADER,
|
||||
field: 'vae',
|
||||
},
|
||||
destination: {
|
||||
node_id: IMAGE_TO_LATENTS,
|
||||
field: 'vae',
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
graph.nodes[SDXL_REFINER_MODEL_LOADER] = {
|
||||
type: 'sdxl_refiner_model_loader',
|
||||
id: SDXL_REFINER_MODEL_LOADER,
|
||||
@ -76,16 +101,6 @@ export const addSDXLRefinerToGraph = (
|
||||
};
|
||||
|
||||
graph.edges.push(
|
||||
{
|
||||
source: {
|
||||
node_id: SDXL_MODEL_LOADER,
|
||||
field: 'vae',
|
||||
},
|
||||
destination: {
|
||||
node_id: IMAGE_TO_LATENTS,
|
||||
field: 'vae',
|
||||
},
|
||||
},
|
||||
{
|
||||
source: {
|
||||
node_id: SDXL_REFINER_MODEL_LOADER,
|
@ -7,9 +7,8 @@ import {
|
||||
ImageToLatentsInvocation,
|
||||
} from 'services/api/types';
|
||||
import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
|
||||
import { addSDXLRefinerToGraph } from './buildSDXLRefinerGraph';
|
||||
import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
|
||||
import {
|
||||
IMAGE_TO_IMAGE_GRAPH,
|
||||
IMAGE_TO_LATENTS,
|
||||
LATENTS_TO_IMAGE,
|
||||
METADATA_ACCUMULATOR,
|
||||
@ -17,6 +16,7 @@ import {
|
||||
NOISE,
|
||||
POSITIVE_CONDITIONING,
|
||||
RESIZE,
|
||||
SDXL_IMAGE_TO_IMAGE_GRAPH,
|
||||
SDXL_LATENTS_TO_LATENTS,
|
||||
SDXL_MODEL_LOADER,
|
||||
} from './constants';
|
||||
@ -53,16 +53,6 @@ export const buildLinearSDXLImageToImageGraph = (
|
||||
refinerStart,
|
||||
} = state.sdxl;
|
||||
|
||||
// TODO: add batch functionality
|
||||
// const {
|
||||
// isEnabled: isBatchEnabled,
|
||||
// imageNames: batchImageNames,
|
||||
// asInitialImage,
|
||||
// } = state.batch;
|
||||
|
||||
// const shouldBatch =
|
||||
// isBatchEnabled && batchImageNames.length > 0 && asInitialImage;
|
||||
|
||||
/**
|
||||
* The easiest way to build linear graphs is to do it in the node editor, then copy and paste the
|
||||
* full graph here as a template. Then use the parameters from app state and set friendlier node
|
||||
@ -88,7 +78,7 @@ export const buildLinearSDXLImageToImageGraph = (
|
||||
|
||||
// copy-pasted graph from node editor, filled in with state values & friendly node ids
|
||||
const graph: NonNullableGraph = {
|
||||
id: IMAGE_TO_IMAGE_GRAPH,
|
||||
id: SDXL_IMAGE_TO_IMAGE_GRAPH,
|
||||
nodes: {
|
||||
[SDXL_MODEL_LOADER]: {
|
||||
type: 'sdxl_model_loader',
|
||||
@ -328,42 +318,6 @@ export const buildLinearSDXLImageToImageGraph = (
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: add batch functionality
|
||||
// if (isBatchEnabled && asInitialImage && batchImageNames.length > 0) {
|
||||
// // we are going to connect an iterate up to the init image
|
||||
// delete (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image;
|
||||
|
||||
// const imageCollection: ImageCollectionInvocation = {
|
||||
// id: IMAGE_COLLECTION,
|
||||
// type: 'image_collection',
|
||||
// images: batchImageNames.map((image_name) => ({ image_name })),
|
||||
// };
|
||||
|
||||
// const imageCollectionIterate: IterateInvocation = {
|
||||
// id: IMAGE_COLLECTION_ITERATE,
|
||||
// type: 'iterate',
|
||||
// };
|
||||
|
||||
// graph.nodes[IMAGE_COLLECTION] = imageCollection;
|
||||
// graph.nodes[IMAGE_COLLECTION_ITERATE] = imageCollectionIterate;
|
||||
|
||||
// graph.edges.push({
|
||||
// source: { node_id: IMAGE_COLLECTION, field: 'collection' },
|
||||
// destination: {
|
||||
// node_id: IMAGE_COLLECTION_ITERATE,
|
||||
// field: 'collection',
|
||||
// },
|
||||
// });
|
||||
|
||||
// graph.edges.push({
|
||||
// source: { node_id: IMAGE_COLLECTION_ITERATE, field: 'item' },
|
||||
// destination: {
|
||||
// node_id: IMAGE_TO_LATENTS,
|
||||
// field: 'image',
|
||||
// },
|
||||
// });
|
||||
// }
|
||||
|
||||
// add metadata accumulator, which is only mostly populated - some fields are added later
|
||||
graph.nodes[METADATA_ACCUMULATOR] = {
|
||||
id: METADATA_ACCUMULATOR,
|
||||
@ -385,6 +339,8 @@ export const buildLinearSDXLImageToImageGraph = (
|
||||
clip_skip: clipSkip,
|
||||
strength,
|
||||
init_image: initialImage.imageName,
|
||||
positive_style_prompt: positiveStylePrompt,
|
||||
negative_style_prompt: negativeStylePrompt,
|
||||
};
|
||||
|
||||
graph.edges.push({
|
||||
|
@ -3,7 +3,7 @@ import { RootState } from 'app/store/store';
|
||||
import { NonNullableGraph } from 'features/nodes/types/types';
|
||||
import { initialGenerationState } from 'features/parameters/store/generationSlice';
|
||||
import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
|
||||
import { addSDXLRefinerToGraph } from './buildSDXLRefinerGraph';
|
||||
import { addSDXLRefinerToGraph } from './addSDXLRefinerToGraph';
|
||||
import {
|
||||
LATENTS_TO_IMAGE,
|
||||
METADATA_ACCUMULATOR,
|
||||
@ -224,6 +224,8 @@ export const buildLinearSDXLTextToImageGraph = (
|
||||
controlnets: [],
|
||||
loras: [],
|
||||
clip_skip: clipSkip,
|
||||
positive_style_prompt: positiveStylePrompt,
|
||||
negative_style_prompt: negativeStylePrompt,
|
||||
};
|
||||
|
||||
graph.edges.push({
|
||||
|
@ -1014,6 +1014,11 @@ export type components = {
|
||||
* @description The LoRAs used for inference
|
||||
*/
|
||||
loras: (components["schemas"]["LoRAMetadataField"])[];
|
||||
/**
|
||||
* Vae
|
||||
* @description The VAE used for decoding, if the main model's default was not used
|
||||
*/
|
||||
vae?: components["schemas"]["VAEModelField"];
|
||||
/**
|
||||
* Strength
|
||||
* @description The strength used for latents-to-latents
|
||||
@ -1025,10 +1030,45 @@ export type components = {
|
||||
*/
|
||||
init_image?: string;
|
||||
/**
|
||||
* Vae
|
||||
* @description The VAE used for decoding, if the main model's default was not used
|
||||
* Positive Style Prompt
|
||||
* @description The positive style prompt parameter
|
||||
*/
|
||||
vae?: components["schemas"]["VAEModelField"];
|
||||
positive_style_prompt?: string;
|
||||
/**
|
||||
* Negative Style Prompt
|
||||
* @description The negative style prompt parameter
|
||||
*/
|
||||
negative_style_prompt?: string;
|
||||
/**
|
||||
* Refiner Model
|
||||
* @description The SDXL Refiner model used
|
||||
*/
|
||||
refiner_model?: components["schemas"]["MainModelField"];
|
||||
/**
|
||||
* Refiner Cfg Scale
|
||||
* @description The classifier-free guidance scale parameter used for the refiner
|
||||
*/
|
||||
refiner_cfg_scale?: number;
|
||||
/**
|
||||
* Refiner Steps
|
||||
* @description The number of steps used for the refiner
|
||||
*/
|
||||
refiner_steps?: number;
|
||||
/**
|
||||
* Refiner Scheduler
|
||||
* @description The scheduler used for the refiner
|
||||
*/
|
||||
refiner_scheduler?: string;
|
||||
/**
|
||||
* Refiner Aesthetic Store
|
||||
* @description The aesthetic score used for the refiner
|
||||
*/
|
||||
refiner_aesthetic_store?: number;
|
||||
/**
|
||||
* Refiner Start
|
||||
* @description The start value used for refiner denoising
|
||||
*/
|
||||
refiner_start?: number;
|
||||
};
|
||||
/**
|
||||
* CvInpaintInvocation
|
||||
@ -3268,6 +3308,46 @@ export type components = {
|
||||
* @description The VAE used for decoding, if the main model's default was not used
|
||||
*/
|
||||
vae?: components["schemas"]["VAEModelField"];
|
||||
/**
|
||||
* Positive Style Prompt
|
||||
* @description The positive style prompt parameter
|
||||
*/
|
||||
positive_style_prompt?: string;
|
||||
/**
|
||||
* Negative Style Prompt
|
||||
* @description The negative style prompt parameter
|
||||
*/
|
||||
negative_style_prompt?: string;
|
||||
/**
|
||||
* Refiner Model
|
||||
* @description The SDXL Refiner model used
|
||||
*/
|
||||
refiner_model?: components["schemas"]["MainModelField"];
|
||||
/**
|
||||
* Refiner Cfg Scale
|
||||
* @description The classifier-free guidance scale parameter used for the refiner
|
||||
*/
|
||||
refiner_cfg_scale?: number;
|
||||
/**
|
||||
* Refiner Steps
|
||||
* @description The number of steps used for the refiner
|
||||
*/
|
||||
refiner_steps?: number;
|
||||
/**
|
||||
* Refiner Scheduler
|
||||
* @description The scheduler used for the refiner
|
||||
*/
|
||||
refiner_scheduler?: string;
|
||||
/**
|
||||
* Refiner Aesthetic Store
|
||||
* @description The aesthetic score used for the refiner
|
||||
*/
|
||||
refiner_aesthetic_store?: number;
|
||||
/**
|
||||
* Refiner Start
|
||||
* @description The start value used for refiner denoising
|
||||
*/
|
||||
refiner_start?: number;
|
||||
};
|
||||
/**
|
||||
* MetadataAccumulatorOutput
|
||||
@ -5355,6 +5435,12 @@ export type components = {
|
||||
*/
|
||||
image?: components["schemas"]["ImageField"];
|
||||
};
|
||||
/**
|
||||
* StableDiffusion1ModelFormat
|
||||
* @description An enumeration.
|
||||
* @enum {string}
|
||||
*/
|
||||
StableDiffusion1ModelFormat: "checkpoint" | "diffusers";
|
||||
/**
|
||||
* StableDiffusion2ModelFormat
|
||||
* @description An enumeration.
|
||||
@ -5367,12 +5453,6 @@ export type components = {
|
||||
* @enum {string}
|
||||
*/
|
||||
StableDiffusionXLModelFormat: "checkpoint" | "diffusers";
|
||||
/**
|
||||
* StableDiffusion1ModelFormat
|
||||
* @description An enumeration.
|
||||
* @enum {string}
|
||||
*/
|
||||
StableDiffusion1ModelFormat: "checkpoint" | "diffusers";
|
||||
};
|
||||
responses: never;
|
||||
parameters: never;
|
||||
|
Loading…
Reference in New Issue
Block a user