feat: Add SDXL Base To Linear Text To Image

2024-08-30 20:32:17 +00:00 · 2023-07-25 15:15:57 +12:00
parent 3eaf8c3b2f
commit 57d833035d
5 changed files with 624 additions and 2 deletions
--- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/userInvokedTextToImage.ts
+++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/userInvokedTextToImage.ts
@ -2,6 +2,7 @@ import { logger } from 'app/logging/logger';
 import { userInvoked } from 'app/store/actions';
 import { parseify } from 'common/util/serialize';
 import { textToImageGraphBuilt } from 'features/nodes/store/actions';
+import { buildLinearSDXLTextToImageGraph } from 'features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph';
 import { buildLinearTextToImageGraph } from 'features/nodes/util/graphBuilders/buildLinearTextToImageGraph';
 import { sessionReadyToInvoke } from 'features/system/store/actions';
 import { sessionCreated } from 'services/api/thunks/session';
@ -14,8 +15,15 @@ export const addUserInvokedTextToImageListener = () => {
    effect: async (action, { getState, dispatch, take }) => {
      const log = logger('session');
      const state = getState();
+      const model = state.generation.model;

-      const graph = buildLinearTextToImageGraph(state);
+      let graph;
+
+      if (model && model.base_model === 'sdxl') {
+        graph = buildLinearSDXLTextToImageGraph(state);
+      } else {
+        graph = buildLinearTextToImageGraph(state);
+      }

      dispatch(textToImageGraphBuilt(graph));

--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLImageToImageGraph.ts
@ -0,0 +1,380 @@
+import { logger } from 'app/logging/logger';
+import { RootState } from 'app/store/store';
+import { NonNullableGraph } from 'features/nodes/types/types';
+import { initialGenerationState } from 'features/parameters/store/generationSlice';
+import {
+  ImageResizeInvocation,
+  ImageToLatentsInvocation,
+} from 'services/api/types';
+import { addControlNetToLinearGraph } from './addControlNetToLinearGraph';
+import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
+import { addLoRAsToGraph } from './addLoRAsToGraph';
+import { addVAEToGraph } from './addVAEToGraph';
+import {
+  CLIP_SKIP,
+  IMAGE_TO_IMAGE_GRAPH,
+  IMAGE_TO_LATENTS,
+  LATENTS_TO_IMAGE,
+  LATENTS_TO_LATENTS,
+  MAIN_MODEL_LOADER,
+  METADATA_ACCUMULATOR,
+  NEGATIVE_CONDITIONING,
+  NOISE,
+  POSITIVE_CONDITIONING,
+  RESIZE,
+} from './constants';
+
+/**
+ * Builds the Image to Image tab graph.
+ */
+export const buildLinearSDXLImageToImageGraph = (
+  state: RootState
+): NonNullableGraph => {
+  const log = logger('nodes');
+  const {
+    positivePrompt,
+    negativePrompt,
+    model,
+    cfgScale: cfg_scale,
+    scheduler,
+    steps,
+    initialImage,
+    img2imgStrength: strength,
+    shouldFitToWidthHeight,
+    width,
+    height,
+    clipSkip,
+    shouldUseCpuNoise,
+    shouldUseNoiseSettings,
+  } = state.generation;
+
+  // TODO: add batch functionality
+  // const {
+  //   isEnabled: isBatchEnabled,
+  //   imageNames: batchImageNames,
+  //   asInitialImage,
+  // } = state.batch;
+
+  // const shouldBatch =
+  //   isBatchEnabled && batchImageNames.length > 0 && asInitialImage;
+
+  /**
+   * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the
+   * full graph here as a template. Then use the parameters from app state and set friendlier node
+   * ids.
+   *
+   * The only thing we need extra logic for is handling randomized seed, control net, and for img2img,
+   * the `fit` param. These are added to the graph at the end.
+   */
+
+  if (!initialImage) {
+    log.error('No initial image found in state');
+    throw new Error('No initial image found in state');
+  }
+
+  if (!model) {
+    log.error('No model found in state');
+    throw new Error('No model found in state');
+  }
+
+  const use_cpu = shouldUseNoiseSettings
+    ? shouldUseCpuNoise
+    : initialGenerationState.shouldUseCpuNoise;
+
+  // copy-pasted graph from node editor, filled in with state values & friendly node ids
+  const graph: NonNullableGraph = {
+    id: IMAGE_TO_IMAGE_GRAPH,
+    nodes: {
+      [MAIN_MODEL_LOADER]: {
+        type: 'main_model_loader',
+        id: MAIN_MODEL_LOADER,
+        model,
+      },
+      [CLIP_SKIP]: {
+        type: 'clip_skip',
+        id: CLIP_SKIP,
+        skipped_layers: clipSkip,
+      },
+      [POSITIVE_CONDITIONING]: {
+        type: 'compel',
+        id: POSITIVE_CONDITIONING,
+        prompt: positivePrompt,
+      },
+      [NEGATIVE_CONDITIONING]: {
+        type: 'compel',
+        id: NEGATIVE_CONDITIONING,
+        prompt: negativePrompt,
+      },
+      [NOISE]: {
+        type: 'noise',
+        id: NOISE,
+        use_cpu,
+      },
+      [LATENTS_TO_IMAGE]: {
+        type: 'l2i',
+        id: LATENTS_TO_IMAGE,
+      },
+      [LATENTS_TO_LATENTS]: {
+        type: 'l2l',
+        id: LATENTS_TO_LATENTS,
+        cfg_scale,
+        scheduler,
+        steps,
+        strength,
+      },
+      [IMAGE_TO_LATENTS]: {
+        type: 'i2l',
+        id: IMAGE_TO_LATENTS,
+        // must be set manually later, bc `fit` parameter may require a resize node inserted
+        // image: {
+        //   image_name: initialImage.image_name,
+        // },
+      },
+    },
+    edges: [
+      {
+        source: {
+          node_id: MAIN_MODEL_LOADER,
+          field: 'unet',
+        },
+        destination: {
+          node_id: LATENTS_TO_LATENTS,
+          field: 'unet',
+        },
+      },
+      {
+        source: {
+          node_id: MAIN_MODEL_LOADER,
+          field: 'clip',
+        },
+        destination: {
+          node_id: CLIP_SKIP,
+          field: 'clip',
+        },
+      },
+      {
+        source: {
+          node_id: CLIP_SKIP,
+          field: 'clip',
+        },
+        destination: {
+          node_id: POSITIVE_CONDITIONING,
+          field: 'clip',
+        },
+      },
+      {
+        source: {
+          node_id: CLIP_SKIP,
+          field: 'clip',
+        },
+        destination: {
+          node_id: NEGATIVE_CONDITIONING,
+          field: 'clip',
+        },
+      },
+      {
+        source: {
+          node_id: LATENTS_TO_LATENTS,
+          field: 'latents',
+        },
+        destination: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'latents',
+        },
+      },
+      {
+        source: {
+          node_id: IMAGE_TO_LATENTS,
+          field: 'latents',
+        },
+        destination: {
+          node_id: LATENTS_TO_LATENTS,
+          field: 'latents',
+        },
+      },
+      {
+        source: {
+          node_id: NOISE,
+          field: 'noise',
+        },
+        destination: {
+          node_id: LATENTS_TO_LATENTS,
+          field: 'noise',
+        },
+      },
+      {
+        source: {
+          node_id: NEGATIVE_CONDITIONING,
+          field: 'conditioning',
+        },
+        destination: {
+          node_id: LATENTS_TO_LATENTS,
+          field: 'negative_conditioning',
+        },
+      },
+      {
+        source: {
+          node_id: POSITIVE_CONDITIONING,
+          field: 'conditioning',
+        },
+        destination: {
+          node_id: LATENTS_TO_LATENTS,
+          field: 'positive_conditioning',
+        },
+      },
+    ],
+  };
+
+  // handle `fit`
+  if (
+    shouldFitToWidthHeight &&
+    (initialImage.width !== width || initialImage.height !== height)
+  ) {
+    // The init image needs to be resized to the specified width and height before being passed to `IMAGE_TO_LATENTS`
+
+    // Create a resize node, explicitly setting its image
+    const resizeNode: ImageResizeInvocation = {
+      id: RESIZE,
+      type: 'img_resize',
+      image: {
+        image_name: initialImage.imageName,
+      },
+      is_intermediate: true,
+      width,
+      height,
+    };
+
+    graph.nodes[RESIZE] = resizeNode;
+
+    // The `RESIZE` node then passes its image to `IMAGE_TO_LATENTS`
+    graph.edges.push({
+      source: { node_id: RESIZE, field: 'image' },
+      destination: {
+        node_id: IMAGE_TO_LATENTS,
+        field: 'image',
+      },
+    });
+
+    // The `RESIZE` node also passes its width and height to `NOISE`
+    graph.edges.push({
+      source: { node_id: RESIZE, field: 'width' },
+      destination: {
+        node_id: NOISE,
+        field: 'width',
+      },
+    });
+
+    graph.edges.push({
+      source: { node_id: RESIZE, field: 'height' },
+      destination: {
+        node_id: NOISE,
+        field: 'height',
+      },
+    });
+  } else {
+    // We are not resizing, so we need to set the image on the `IMAGE_TO_LATENTS` node explicitly
+    (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image = {
+      image_name: initialImage.imageName,
+    };
+
+    // Pass the image's dimensions to the `NOISE` node
+    graph.edges.push({
+      source: { node_id: IMAGE_TO_LATENTS, field: 'width' },
+      destination: {
+        node_id: NOISE,
+        field: 'width',
+      },
+    });
+    graph.edges.push({
+      source: { node_id: IMAGE_TO_LATENTS, field: 'height' },
+      destination: {
+        node_id: NOISE,
+        field: 'height',
+      },
+    });
+  }
+
+  // TODO: add batch functionality
+  // if (isBatchEnabled && asInitialImage && batchImageNames.length > 0) {
+  //   // we are going to connect an iterate up to the init image
+  //   delete (graph.nodes[IMAGE_TO_LATENTS] as ImageToLatentsInvocation).image;
+
+  //   const imageCollection: ImageCollectionInvocation = {
+  //     id: IMAGE_COLLECTION,
+  //     type: 'image_collection',
+  //     images: batchImageNames.map((image_name) => ({ image_name })),
+  //   };
+
+  //   const imageCollectionIterate: IterateInvocation = {
+  //     id: IMAGE_COLLECTION_ITERATE,
+  //     type: 'iterate',
+  //   };
+
+  //   graph.nodes[IMAGE_COLLECTION] = imageCollection;
+  //   graph.nodes[IMAGE_COLLECTION_ITERATE] = imageCollectionIterate;
+
+  //   graph.edges.push({
+  //     source: { node_id: IMAGE_COLLECTION, field: 'collection' },
+  //     destination: {
+  //       node_id: IMAGE_COLLECTION_ITERATE,
+  //       field: 'collection',
+  //     },
+  //   });
+
+  //   graph.edges.push({
+  //     source: { node_id: IMAGE_COLLECTION_ITERATE, field: 'item' },
+  //     destination: {
+  //       node_id: IMAGE_TO_LATENTS,
+  //       field: 'image',
+  //     },
+  //   });
+  // }
+
+  // add metadata accumulator, which is only mostly populated - some fields are added later
+  graph.nodes[METADATA_ACCUMULATOR] = {
+    id: METADATA_ACCUMULATOR,
+    type: 'metadata_accumulator',
+    generation_mode: 'img2img',
+    cfg_scale,
+    height,
+    width,
+    positive_prompt: '', // set in addDynamicPromptsToGraph
+    negative_prompt: negativePrompt,
+    model,
+    seed: 0, // set in addDynamicPromptsToGraph
+    steps,
+    rand_device: use_cpu ? 'cpu' : 'cuda',
+    scheduler,
+    vae: undefined, // option; set in addVAEToGraph
+    controlnets: [], // populated in addControlNetToLinearGraph
+    loras: [], // populated in addLoRAsToGraph
+    clip_skip: clipSkip,
+    strength,
+    init_image: initialImage.imageName,
+  };
+
+  graph.edges.push({
+    source: {
+      node_id: METADATA_ACCUMULATOR,
+      field: 'metadata',
+    },
+    destination: {
+      node_id: LATENTS_TO_IMAGE,
+      field: 'metadata',
+    },
+  });
+
+  // add LoRA support
+  addLoRAsToGraph(state, graph, LATENTS_TO_LATENTS);
+
+  // optionally add custom VAE
+  addVAEToGraph(state, graph);
+
+  // add dynamic prompts - also sets up core iteration and seed
+  addDynamicPromptsToGraph(state, graph);
+
+  // add controlnet, mutating `graph`
+  addControlNetToLinearGraph(state, graph, LATENTS_TO_LATENTS);
+
+  return graph;
+};
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/buildLinearSDXLTextToImageGraph.ts
@ -0,0 +1,231 @@
+import { logger } from 'app/logging/logger';
+import { RootState } from 'app/store/store';
+import { NonNullableGraph } from 'features/nodes/types/types';
+import { initialGenerationState } from 'features/parameters/store/generationSlice';
+import { addDynamicPromptsToGraph } from './addDynamicPromptsToGraph';
+import {
+  LATENTS_TO_IMAGE,
+  METADATA_ACCUMULATOR,
+  NEGATIVE_CONDITIONING,
+  NOISE,
+  POSITIVE_CONDITIONING,
+  SDXL_MODEL_LOADER,
+  SDXL_TEXT_TO_IMAGE_GRAPH,
+  SDXL_TEXT_TO_LATENTS,
+} from './constants';
+
+export const buildLinearSDXLTextToImageGraph = (
+  state: RootState
+): NonNullableGraph => {
+  const log = logger('nodes');
+  const {
+    positivePrompt,
+    negativePrompt,
+    model,
+    cfgScale: cfg_scale,
+    scheduler,
+    steps,
+    width,
+    height,
+    clipSkip,
+    shouldUseCpuNoise,
+    shouldUseNoiseSettings,
+  } = state.generation;
+
+  const use_cpu = shouldUseNoiseSettings
+    ? shouldUseCpuNoise
+    : initialGenerationState.shouldUseCpuNoise;
+
+  if (!model) {
+    log.error('No model found in state');
+    throw new Error('No model found in state');
+  }
+
+  /**
+   * The easiest way to build linear graphs is to do it in the node editor, then copy and paste the
+   * full graph here as a template. Then use the parameters from app state and set friendlier node
+   * ids.
+   *
+   * The only thing we need extra logic for is handling randomized seed, control net, and for img2img,
+   * the `fit` param. These are added to the graph at the end.
+   */
+
+  // copy-pasted graph from node editor, filled in with state values & friendly node ids
+  const graph: NonNullableGraph = {
+    id: SDXL_TEXT_TO_IMAGE_GRAPH,
+    nodes: {
+      [SDXL_MODEL_LOADER]: {
+        type: 'sdxl_model_loader',
+        id: SDXL_MODEL_LOADER,
+        model,
+      },
+      [POSITIVE_CONDITIONING]: {
+        type: 'sdxl_compel_prompt',
+        id: POSITIVE_CONDITIONING,
+        prompt: positivePrompt,
+      },
+      [NEGATIVE_CONDITIONING]: {
+        type: 'sdxl_compel_prompt',
+        id: NEGATIVE_CONDITIONING,
+        prompt: negativePrompt,
+      },
+      [NOISE]: {
+        type: 'noise',
+        id: NOISE,
+        width,
+        height,
+        use_cpu,
+      },
+      [SDXL_TEXT_TO_LATENTS]: {
+        type: 't2l_sdxl',
+        id: SDXL_TEXT_TO_LATENTS,
+        cfg_scale,
+        scheduler,
+        steps,
+      },
+      [LATENTS_TO_IMAGE]: {
+        type: 'l2i',
+        id: LATENTS_TO_IMAGE,
+      },
+    },
+    edges: [
+      {
+        source: {
+          node_id: SDXL_MODEL_LOADER,
+          field: 'unet',
+        },
+        destination: {
+          node_id: SDXL_TEXT_TO_LATENTS,
+          field: 'unet',
+        },
+      },
+      {
+        source: {
+          node_id: SDXL_MODEL_LOADER,
+          field: 'vae',
+        },
+        destination: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'vae',
+        },
+      },
+      {
+        source: {
+          node_id: SDXL_MODEL_LOADER,
+          field: 'clip',
+        },
+        destination: {
+          node_id: POSITIVE_CONDITIONING,
+          field: 'clip',
+        },
+      },
+      {
+        source: {
+          node_id: SDXL_MODEL_LOADER,
+          field: 'clip2',
+        },
+        destination: {
+          node_id: POSITIVE_CONDITIONING,
+          field: 'clip2',
+        },
+      },
+      {
+        source: {
+          node_id: SDXL_MODEL_LOADER,
+          field: 'clip',
+        },
+        destination: {
+          node_id: NEGATIVE_CONDITIONING,
+          field: 'clip',
+        },
+      },
+      {
+        source: {
+          node_id: SDXL_MODEL_LOADER,
+          field: 'clip2',
+        },
+        destination: {
+          node_id: NEGATIVE_CONDITIONING,
+          field: 'clip2',
+        },
+      },
+      {
+        source: {
+          node_id: POSITIVE_CONDITIONING,
+          field: 'conditioning',
+        },
+        destination: {
+          node_id: SDXL_TEXT_TO_LATENTS,
+          field: 'positive_conditioning',
+        },
+      },
+      {
+        source: {
+          node_id: NEGATIVE_CONDITIONING,
+          field: 'conditioning',
+        },
+        destination: {
+          node_id: SDXL_TEXT_TO_LATENTS,
+          field: 'negative_conditioning',
+        },
+      },
+      {
+        source: {
+          node_id: NOISE,
+          field: 'noise',
+        },
+        destination: {
+          node_id: SDXL_TEXT_TO_LATENTS,
+          field: 'noise',
+        },
+      },
+      {
+        source: {
+          node_id: SDXL_TEXT_TO_LATENTS,
+          field: 'latents',
+        },
+        destination: {
+          node_id: LATENTS_TO_IMAGE,
+          field: 'latents',
+        },
+      },
+    ],
+  };
+
+  // add metadata accumulator, which is only mostly populated - some fields are added later
+  graph.nodes[METADATA_ACCUMULATOR] = {
+    id: METADATA_ACCUMULATOR,
+    type: 'metadata_accumulator',
+    generation_mode: 'sdxl_txt2img',
+    cfg_scale,
+    height,
+    width,
+    positive_prompt: '', // set in addDynamicPromptsToGraph
+    negative_prompt: negativePrompt,
+    model,
+    seed: 0, // set in addDynamicPromptsToGraph
+    steps,
+    rand_device: use_cpu ? 'cpu' : 'cuda',
+    scheduler,
+    vae: undefined,
+    controlnets: [],
+    loras: [],
+    clip_skip: clipSkip,
+  };
+
+  graph.edges.push({
+    source: {
+      node_id: METADATA_ACCUMULATOR,
+      field: 'metadata',
+    },
+    destination: {
+      node_id: LATENTS_TO_IMAGE,
+      field: 'metadata',
+    },
+  });
+
+  // add dynamic prompts - also sets up core iteration and seed
+  addDynamicPromptsToGraph(state, graph);
+
+  return graph;
+};
--- a/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts
+++ b/invokeai/frontend/web/src/features/nodes/util/graphBuilders/constants.ts
@ -23,8 +23,11 @@ export const METADATA_ACCUMULATOR = 'metadata_accumulator';
 export const REALESRGAN = 'esrgan';
 export const DIVIDE = 'divide';
 export const SCALE = 'scale_image';
+export const SDXL_MODEL_LOADER = 'sdxl_model_loader';
+export const SDXL_TEXT_TO_LATENTS = 't2l_sdxl';

 // friendly graph ids
 export const TEXT_TO_IMAGE_GRAPH = 'text_to_image_graph';
+export const SDXL_TEXT_TO_IMAGE_GRAPH = 'sdxl_text_to_image_graph';
 export const IMAGE_TO_IMAGE_GRAPH = 'image_to_image_graph';
 export const INPAINT_GRAPH = 'inpaint_graph';
--- a/invokeai/frontend/web/src/features/parameters/components/Parameters/MainModel/ParamMainModelSelect.tsx
+++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/MainModel/ParamMainModelSelect.tsx
@ -40,7 +40,7 @@ const ParamMainModelSelect = () => {
    const data: SelectItem[] = [];

    forEach(mainModels.entities, (model, id) => {
-      if (!model || ['sdxl', 'sdxl-refiner'].includes(model.base_model)) {
+      if (!model || ['sdxl-refiner'].includes(model.base_model)) {
        return;
      }