use correct controlnet config file

Merge remote-tracking branch 'refs/remotes/origin/lstein/feat/diffusers-v0.30' into lstein/feat/diffusers-v0.30
pass configuration templates to from_single_file() using the config option
2024-08-30 20:32:17 +00:00 · 2024-08-27 11:39:34 -04:00 · 2024-08-17 15:58:58 -04:00 · 2024-08-17 15:57:02 -04:00 · 2024-08-17 14:13:33 -04:00 · 2024-08-17 14:06:55 -04:00
375 changed files with 622599 additions and 23754 deletions
--- a/.github/workflows/python-checks.yml
+++ b/.github/workflows/python-checks.yml
@ -62,7 +62,7 @@ jobs:

      - name: install ruff
        if: ${{ steps.changed-files.outputs.python_any_changed == 'true' || inputs.always_run == true }}
-        run: pip install ruff
+        run: pip install ruff==0.6.0
        shell: bash

      - name: ruff check
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -55,6 +55,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 FROM node:20-slim AS web-builder
 ENV PNPM_HOME="/pnpm"
 ENV PATH="$PNPM_HOME:$PATH"
+RUN corepack use pnpm@8.x
 RUN corepack enable

 WORKDIR /build
--- a/installer/templates/invoke.sh.in
+++ b/installer/templates/invoke.sh.in
@ -17,7 +17,7 @@
 set -eu

 # Ensure we're in the correct folder in case user's CWD is somewhere else
-scriptdir=$(dirname "$0")
+scriptdir=$(dirname $(readlink -f "$0"))
 cd "$scriptdir"

 . .venv/bin/activate
--- a/invokeai/app/api/dependencies.py
+++ b/invokeai/app/api/dependencies.py
@ -1,5 +1,6 @@
 # Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)

+import asyncio
 from logging import Logger

 import torch
@ -31,6 +32,8 @@ from invokeai.app.services.session_processor.session_processor_default import (
 )
 from invokeai.app.services.session_queue.session_queue_sqlite import SqliteSessionQueue
 from invokeai.app.services.shared.sqlite.sqlite_util import init_db
+from invokeai.app.services.style_preset_images.style_preset_images_disk import StylePresetImageFileStorageDisk
+from invokeai.app.services.style_preset_records.style_preset_records_sqlite import SqliteStylePresetRecordsStorage
 from invokeai.app.services.urls.urls_default import LocalUrlService
 from invokeai.app.services.workflow_records.workflow_records_sqlite import SqliteWorkflowRecordsStorage
 from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningFieldData
@ -63,7 +66,12 @@ class ApiDependencies:
    invoker: Invoker

    @staticmethod
-    def initialize(config: InvokeAIAppConfig, event_handler_id: int, logger: Logger = logger) -> None:
+    def initialize(
+        config: InvokeAIAppConfig,
+        event_handler_id: int,
+        loop: asyncio.AbstractEventLoop,
+        logger: Logger = logger,
+    ) -> None:
        logger.info(f"InvokeAI version {__version__}")
        logger.info(f"Root directory = {str(config.root_path)}")

@ -74,6 +82,7 @@ class ApiDependencies:
        image_files = DiskImageFileStorage(f"{output_folder}/images")

        model_images_folder = config.models_path
+        style_presets_folder = config.style_presets_path

        db = init_db(config=config, logger=logger, image_files=image_files)

@ -84,7 +93,7 @@ class ApiDependencies:
        board_images = BoardImagesService()
        board_records = SqliteBoardRecordStorage(db=db)
        boards = BoardService()
-        events = FastAPIEventService(event_handler_id)
+        events = FastAPIEventService(event_handler_id, loop=loop)
        bulk_download = BulkDownloadService()
        image_records = SqliteImageRecordStorage(db=db)
        images = ImageService()
@ -109,6 +118,8 @@ class ApiDependencies:
        session_queue = SqliteSessionQueue(db=db)
        urls = LocalUrlService()
        workflow_records = SqliteWorkflowRecordsStorage(db=db)
+        style_preset_records = SqliteStylePresetRecordsStorage(db=db)
+        style_preset_image_files = StylePresetImageFileStorageDisk(style_presets_folder / "images")

        services = InvocationServices(
            board_image_records=board_image_records,
@ -134,6 +145,8 @@ class ApiDependencies:
            workflow_records=workflow_records,
            tensors=tensors,
            conditioning=conditioning,
+            style_preset_records=style_preset_records,
+            style_preset_image_files=style_preset_image_files,
        )

        ApiDependencies.invoker = Invoker(services)
--- a/invokeai/app/api/routers/images.py
+++ b/invokeai/app/api/routers/images.py
@ -218,9 +218,8 @@ async def get_image_workflow(
        raise HTTPException(status_code=404)


-@images_router.api_route(
+@images_router.get(
    "/i/{image_name}/full",
-    methods=["GET", "HEAD"],
    operation_id="get_image_full",
    response_class=Response,
    responses={
@ -231,6 +230,18 @@ async def get_image_workflow(
        404: {"description": "Image not found"},
    },
 )
+@images_router.head(
+    "/i/{image_name}/full",
+    operation_id="get_image_full_head",
+    response_class=Response,
+    responses={
+        200: {
+            "description": "Return the full-resolution image",
+            "content": {"image/png": {}},
+        },
+        404: {"description": "Image not found"},
+    },
+)
 async def get_image_full(
    image_name: str = Path(description="The name of full-resolution image file to get"),
 ) -> Response:
@ -242,6 +253,7 @@ async def get_image_full(
            content = f.read()
        response = Response(content, media_type="image/png")
        response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
+        response.headers["Content-Disposition"] = f'inline; filename="{image_name}"'
        return response
    except Exception:
        raise HTTPException(status_code=404)
--- a/invokeai/app/api/routers/model_manager.py
+++ b/invokeai/app/api/routers/model_manager.py
@ -6,7 +6,7 @@ import pathlib
 import traceback
 from copy import deepcopy
 from tempfile import TemporaryDirectory
-from typing import Any, Dict, List, Optional, Type
+from typing import List, Optional, Type

 from fastapi import Body, Path, Query, Response, UploadFile
 from fastapi.responses import FileResponse, HTMLResponse
@ -430,13 +430,11 @@ async def delete_model_image(
 async def install_model(
    source: str = Query(description="Model source to install, can be a local path, repo_id, or remote URL"),
    inplace: Optional[bool] = Query(description="Whether or not to install a local model in place", default=False),
-    # TODO(MM2): Can we type this?
-    config: Optional[Dict[str, Any]] = Body(
-        description="Dict of fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
-        default=None,
+    access_token: Optional[str] = Query(description="access token for the remote resource", default=None),
+    config: ModelRecordChanges = Body(
+        description="Object containing fields that override auto-probed values in the model config record, such as name, description and prediction_type ",
        example={"name": "string", "description": "string"},
    ),
-    access_token: Optional[str] = None,
 ) -> ModelInstallJob:
    """Install a model using a string identifier.

@ -451,8 +449,9 @@ async def install_model(
       - model/name:fp16:path/to/model.safetensors
       - model/name::path/to/model.safetensors

-    `config` is an optional dict containing model configuration values that will override
-    the ones that are probed automatically.
+    `config` is a ModelRecordChanges object. Fields in this object will override
+    the ones that are probed automatically. Pass an empty object to accept
+    all the defaults.

    `access_token` is an optional access token for use with Urls that require
    authentication.
@ -737,7 +736,7 @@ async def convert_model(
        # write the converted file to the convert path
        raw_model = converted_model.model
        assert hasattr(raw_model, "save_pretrained")
-        raw_model.save_pretrained(convert_path)
+        raw_model.save_pretrained(convert_path)  # type: ignore
        assert convert_path.exists()

        # temporarily rename the original safetensors file so that there is no naming conflict
@ -750,12 +749,12 @@ async def convert_model(
        try:
            new_key = installer.install_path(
                convert_path,
-                config={
-                    "name": original_name,
-                    "description": model_config.description,
-                    "hash": model_config.hash,
-                    "source": model_config.source,
-                },
+                config=ModelRecordChanges(
+                    name=original_name,
+                    description=model_config.description,
+                    hash=model_config.hash,
+                    source=model_config.source,
+                ),
            )
        except Exception as e:
            logger.error(str(e))
--- a/invokeai/app/api/routers/style_presets.py
+++ b/invokeai/app/api/routers/style_presets.py
@ -0,0 +1,276 @@
+import csv
+import io
+import json
+import traceback
+from typing import Optional
+
+import pydantic
+from fastapi import APIRouter, File, Form, HTTPException, Path, Response, UploadFile
+from fastapi.responses import FileResponse
+from PIL import Image
+from pydantic import BaseModel, Field
+
+from invokeai.app.api.dependencies import ApiDependencies
+from invokeai.app.api.routers.model_manager import IMAGE_MAX_AGE
+from invokeai.app.services.style_preset_images.style_preset_images_common import StylePresetImageFileNotFoundException
+from invokeai.app.services.style_preset_records.style_preset_records_common import (
+    InvalidPresetImportDataError,
+    PresetData,
+    PresetType,
+    StylePresetChanges,
+    StylePresetNotFoundError,
+    StylePresetRecordWithImage,
+    StylePresetWithoutId,
+    UnsupportedFileTypeError,
+    parse_presets_from_file,
+)
+
+
+class StylePresetUpdateFormData(BaseModel):
+    name: str = Field(description="Preset name")
+    positive_prompt: str = Field(description="Positive prompt")
+    negative_prompt: str = Field(description="Negative prompt")
+
+
+class StylePresetCreateFormData(StylePresetUpdateFormData):
+    type: PresetType = Field(description="Preset type")
+
+
+style_presets_router = APIRouter(prefix="/v1/style_presets", tags=["style_presets"])
+
+
+@style_presets_router.get(
+    "/i/{style_preset_id}",
+    operation_id="get_style_preset",
+    responses={
+        200: {"model": StylePresetRecordWithImage},
+    },
+)
+async def get_style_preset(
+    style_preset_id: str = Path(description="The style preset to get"),
+) -> StylePresetRecordWithImage:
+    """Gets a style preset"""
+    try:
+        image = ApiDependencies.invoker.services.style_preset_image_files.get_url(style_preset_id)
+        style_preset = ApiDependencies.invoker.services.style_preset_records.get(style_preset_id)
+        return StylePresetRecordWithImage(image=image, **style_preset.model_dump())
+    except StylePresetNotFoundError:
+        raise HTTPException(status_code=404, detail="Style preset not found")
+
+
+@style_presets_router.patch(
+    "/i/{style_preset_id}",
+    operation_id="update_style_preset",
+    responses={
+        200: {"model": StylePresetRecordWithImage},
+    },
+)
+async def update_style_preset(
+    image: Optional[UploadFile] = File(description="The image file to upload", default=None),
+    style_preset_id: str = Path(description="The id of the style preset to update"),
+    data: str = Form(description="The data of the style preset to update"),
+) -> StylePresetRecordWithImage:
+    """Updates a style preset"""
+    if image is not None:
+        if not image.content_type or not image.content_type.startswith("image"):
+            raise HTTPException(status_code=415, detail="Not an image")
+
+        contents = await image.read()
+        try:
+            pil_image = Image.open(io.BytesIO(contents))
+
+        except Exception:
+            ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+            raise HTTPException(status_code=415, detail="Failed to read image")
+
+        try:
+            ApiDependencies.invoker.services.style_preset_image_files.save(style_preset_id, pil_image)
+        except ValueError as e:
+            raise HTTPException(status_code=409, detail=str(e))
+    else:
+        try:
+            ApiDependencies.invoker.services.style_preset_image_files.delete(style_preset_id)
+        except StylePresetImageFileNotFoundException:
+            pass
+
+    try:
+        parsed_data = json.loads(data)
+        validated_data = StylePresetUpdateFormData(**parsed_data)
+
+        name = validated_data.name
+        positive_prompt = validated_data.positive_prompt
+        negative_prompt = validated_data.negative_prompt
+
+    except pydantic.ValidationError:
+        raise HTTPException(status_code=400, detail="Invalid preset data")
+
+    preset_data = PresetData(positive_prompt=positive_prompt, negative_prompt=negative_prompt)
+    changes = StylePresetChanges(name=name, preset_data=preset_data)
+
+    style_preset_image = ApiDependencies.invoker.services.style_preset_image_files.get_url(style_preset_id)
+    style_preset = ApiDependencies.invoker.services.style_preset_records.update(
+        style_preset_id=style_preset_id, changes=changes
+    )
+    return StylePresetRecordWithImage(image=style_preset_image, **style_preset.model_dump())
+
+
+@style_presets_router.delete(
+    "/i/{style_preset_id}",
+    operation_id="delete_style_preset",
+)
+async def delete_style_preset(
+    style_preset_id: str = Path(description="The style preset to delete"),
+) -> None:
+    """Deletes a style preset"""
+    try:
+        ApiDependencies.invoker.services.style_preset_image_files.delete(style_preset_id)
+    except StylePresetImageFileNotFoundException:
+        pass
+
+    ApiDependencies.invoker.services.style_preset_records.delete(style_preset_id)
+
+
+@style_presets_router.post(
+    "/",
+    operation_id="create_style_preset",
+    responses={
+        200: {"model": StylePresetRecordWithImage},
+    },
+)
+async def create_style_preset(
+    image: Optional[UploadFile] = File(description="The image file to upload", default=None),
+    data: str = Form(description="The data of the style preset to create"),
+) -> StylePresetRecordWithImage:
+    """Creates a style preset"""
+
+    try:
+        parsed_data = json.loads(data)
+        validated_data = StylePresetCreateFormData(**parsed_data)
+
+        name = validated_data.name
+        type = validated_data.type
+        positive_prompt = validated_data.positive_prompt
+        negative_prompt = validated_data.negative_prompt
+
+    except pydantic.ValidationError:
+        raise HTTPException(status_code=400, detail="Invalid preset data")
+
+    preset_data = PresetData(positive_prompt=positive_prompt, negative_prompt=negative_prompt)
+    style_preset = StylePresetWithoutId(name=name, preset_data=preset_data, type=type)
+    new_style_preset = ApiDependencies.invoker.services.style_preset_records.create(style_preset=style_preset)
+
+    if image is not None:
+        if not image.content_type or not image.content_type.startswith("image"):
+            raise HTTPException(status_code=415, detail="Not an image")
+
+        contents = await image.read()
+        try:
+            pil_image = Image.open(io.BytesIO(contents))
+
+        except Exception:
+            ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+            raise HTTPException(status_code=415, detail="Failed to read image")
+
+        try:
+            ApiDependencies.invoker.services.style_preset_image_files.save(new_style_preset.id, pil_image)
+        except ValueError as e:
+            raise HTTPException(status_code=409, detail=str(e))
+
+    preset_image = ApiDependencies.invoker.services.style_preset_image_files.get_url(new_style_preset.id)
+    return StylePresetRecordWithImage(image=preset_image, **new_style_preset.model_dump())
+
+
+@style_presets_router.get(
+    "/",
+    operation_id="list_style_presets",
+    responses={
+        200: {"model": list[StylePresetRecordWithImage]},
+    },
+)
+async def list_style_presets() -> list[StylePresetRecordWithImage]:
+    """Gets a page of style presets"""
+    style_presets_with_image: list[StylePresetRecordWithImage] = []
+    style_presets = ApiDependencies.invoker.services.style_preset_records.get_many()
+    for preset in style_presets:
+        image = ApiDependencies.invoker.services.style_preset_image_files.get_url(preset.id)
+        style_preset_with_image = StylePresetRecordWithImage(image=image, **preset.model_dump())
+        style_presets_with_image.append(style_preset_with_image)
+
+    return style_presets_with_image
+
+
+@style_presets_router.get(
+    "/i/{style_preset_id}/image",
+    operation_id="get_style_preset_image",
+    responses={
+        200: {
+            "description": "The style preset image was fetched successfully",
+        },
+        400: {"description": "Bad request"},
+        404: {"description": "The style preset image could not be found"},
+    },
+    status_code=200,
+)
+async def get_style_preset_image(
+    style_preset_id: str = Path(description="The id of the style preset image to get"),
+) -> FileResponse:
+    """Gets an image file that previews the model"""
+
+    try:
+        path = ApiDependencies.invoker.services.style_preset_image_files.get_path(style_preset_id)
+
+        response = FileResponse(
+            path,
+            media_type="image/png",
+            filename=style_preset_id + ".png",
+            content_disposition_type="inline",
+        )
+        response.headers["Cache-Control"] = f"max-age={IMAGE_MAX_AGE}"
+        return response
+    except Exception:
+        raise HTTPException(status_code=404)
+
+
+@style_presets_router.get(
+    "/export",
+    operation_id="export_style_presets",
+    responses={200: {"content": {"text/csv": {}}, "description": "A CSV file with the requested data."}},
+    status_code=200,
+)
+async def export_style_presets():
+    # Create an in-memory stream to store the CSV data
+    output = io.StringIO()
+    writer = csv.writer(output)
+
+    # Write the header
+    writer.writerow(["name", "prompt", "negative_prompt"])
+
+    style_presets = ApiDependencies.invoker.services.style_preset_records.get_many(type=PresetType.User)
+
+    for preset in style_presets:
+        writer.writerow([preset.name, preset.preset_data.positive_prompt, preset.preset_data.negative_prompt])
+
+    csv_data = output.getvalue()
+    output.close()
+
+    return Response(
+        content=csv_data,
+        media_type="text/csv",
+        headers={"Content-Disposition": "attachment; filename=prompt_templates.csv"},
+    )
+
+
+@style_presets_router.post(
+    "/import",
+    operation_id="import_style_presets",
+)
+async def import_style_presets(file: UploadFile = File(description="The file to import")):
+    try:
+        style_presets = await parse_presets_from_file(file)
+        ApiDependencies.invoker.services.style_preset_records.create_many(style_presets)
+    except InvalidPresetImportDataError as e:
+        ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+        raise HTTPException(status_code=400, detail=str(e))
+    except UnsupportedFileTypeError as e:
+        ApiDependencies.invoker.services.logger.error(traceback.format_exc())
+        raise HTTPException(status_code=415, detail=str(e))
--- a/invokeai/app/api_app.py
+++ b/invokeai/app/api_app.py
@ -30,6 +30,7 @@ from invokeai.app.api.routers import (
    images,
    model_manager,
    session_queue,
+    style_presets,
    utilities,
    workflows,
 )
@ -55,11 +56,13 @@ mimetypes.add_type("text/css", ".css")
 torch_device_name = TorchDevice.get_torch_device_name()
 logger.info(f"Using torch device: {torch_device_name}")

+loop = asyncio.new_event_loop()
+

@asynccontextmanager
 async def lifespan(app: FastAPI):
    # Add startup event to load dependencies
-    ApiDependencies.initialize(config=app_config, event_handler_id=event_handler_id, logger=logger)
+    ApiDependencies.initialize(config=app_config, event_handler_id=event_handler_id, loop=loop, logger=logger)
    yield
    # Shut down threads
    ApiDependencies.shutdown()
@ -106,6 +109,7 @@ app.include_router(board_images.board_images_router, prefix="/api")
 app.include_router(app_info.app_router, prefix="/api")
 app.include_router(session_queue.session_queue_router, prefix="/api")
 app.include_router(workflows.workflows_router, prefix="/api")
+app.include_router(style_presets.style_presets_router, prefix="/api")

 app.openapi = get_openapi_func(app)

@ -184,8 +188,6 @@ def invoke_api() -> None:

    check_cudnn(logger)

-    # Start our own event loop for eventing usage
-    loop = asyncio.new_event_loop()
    config = uvicorn.Config(
        app=app,
        host=app_config.host,
--- a/invokeai/app/invocations/compel.py
+++ b/invokeai/app/invocations/compel.py
@ -80,12 +80,12 @@ class CompelInvocation(BaseInvocation):

        with (
            # apply all patches while the model is on the target device
-            text_encoder_info.model_on_device() as (model_state_dict, text_encoder),
+            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
            ModelPatcher.apply_lora_text_encoder(
                text_encoder,
                loras=_lora_loader(),
-                model_state_dict=model_state_dict,
+                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder, self.clip.skipped_layers),
@ -175,13 +175,13 @@ class SDXLPromptInvocationBase:

        with (
            # apply all patches while the model is on the target device
-            text_encoder_info.model_on_device() as (state_dict, text_encoder),
+            text_encoder_info.model_on_device() as (cached_weights, text_encoder),
            tokenizer_info as tokenizer,
            ModelPatcher.apply_lora(
                text_encoder,
                loras=_lora_loader(),
                prefix=lora_prefix,
-                model_state_dict=state_dict,
+                cached_weights=cached_weights,
            ),
            # Apply CLIP Skip after LoRA to prevent LoRA application from failing on skipped layers.
            ModelPatcher.apply_clip_skip(text_encoder, clip_field.skipped_layers),
--- a/invokeai/app/invocations/controlnet_image_processors.py
+++ b/invokeai/app/invocations/controlnet_image_processors.py
@ -21,6 +21,8 @@ from controlnet_aux import (
 from controlnet_aux.util import HWC3, ade_palette
 from PIL import Image
 from pydantic import BaseModel, Field, field_validator, model_validator
+from transformers import pipeline
+from transformers.pipelines import DepthEstimationPipeline

 from invokeai.app.invocations.baseinvocation import (
    BaseInvocation,
@ -44,13 +46,12 @@ from invokeai.app.invocations.util import validate_begin_end_step, validate_weig
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
 from invokeai.backend.image_util.canny import get_canny_edges
-from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
+from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import DepthAnythingPipeline
 from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
 from invokeai.backend.image_util.hed import HEDProcessor
 from invokeai.backend.image_util.lineart import LineartProcessor
 from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
 from invokeai.backend.image_util.util import np_to_pil, pil_to_np
-from invokeai.backend.util.devices import TorchDevice


 class ControlField(BaseModel):
@ -592,7 +593,14 @@ class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
        return color_map


-DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
+DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small", "small_v2"]
+# DepthAnything V2 Small model is licensed under Apache 2.0 but not the base and large models.
+DEPTH_ANYTHING_MODELS = {
+    "large": "LiheYoung/depth-anything-large-hf",
+    "base": "LiheYoung/depth-anything-base-hf",
+    "small": "LiheYoung/depth-anything-small-hf",
+    "small_v2": "depth-anything/Depth-Anything-V2-Small-hf",
+}


@invocation(
@ -600,28 +608,33 @@ DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
    title="Depth Anything Processor",
    tags=["controlnet", "depth", "depth anything"],
    category="controlnet",
-    version="1.1.2",
+    version="1.1.3",
 )
 class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
    """Generates a depth map based on the Depth Anything algorithm"""

    model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
-        default="small", description="The size of the depth model to use"
+        default="small_v2", description="The size of the depth model to use"
    )
    resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)

    def run_processor(self, image: Image.Image) -> Image.Image:
-        def loader(model_path: Path):
-            return DepthAnythingDetector.load_model(
-                model_path, model_size=self.model_size, device=TorchDevice.choose_torch_device()
-            )
+        def load_depth_anything(model_path: Path):
+            depth_anything_pipeline = pipeline(model=str(model_path), task="depth-estimation", local_files_only=True)
+            assert isinstance(depth_anything_pipeline, DepthEstimationPipeline)
+            return DepthAnythingPipeline(depth_anything_pipeline)

        with self._context.models.load_remote_model(
-            source=DEPTH_ANYTHING_MODELS[self.model_size], loader=loader
-        ) as model:
-            depth_anything_detector = DepthAnythingDetector(model, TorchDevice.choose_torch_device())
-            processed_image = depth_anything_detector(image=image, resolution=self.resolution)
-            return processed_image
+            source=DEPTH_ANYTHING_MODELS[self.model_size], loader=load_depth_anything
+        ) as depth_anything_detector:
+            assert isinstance(depth_anything_detector, DepthAnythingPipeline)
+            depth_map = depth_anything_detector.generate_depth(image)
+
+            # Resizing to user target specified size
+            new_height = int(image.size[1] * (self.resolution / image.size[0]))
+            depth_map = depth_map.resize((self.resolution, new_height))
+
+            return depth_map


@invocation(
--- a/invokeai/app/invocations/create_gradient_mask.py
+++ b/invokeai/app/invocations/create_gradient_mask.py
@ -39,7 +39,7 @@ class GradientMaskOutput(BaseInvocationOutput):
    title="Create Gradient Mask",
    tags=["mask", "denoise"],
    category="latents",
-    version="1.1.0",
+    version="1.2.0",
 )
 class CreateGradientMaskInvocation(BaseInvocation):
    """Creates mask for denoising model run."""
@ -93,6 +93,7 @@ class CreateGradientMaskInvocation(BaseInvocation):

            # redistribute blur so that the original edges are 0 and blur outwards to 1
            blur_tensor = (blur_tensor - 0.5) * 2
+            blur_tensor[blur_tensor < 0] = 0.0

            threshold = 1 - self.minimum_denoise

--- a/invokeai/app/invocations/denoise_latents.py
+++ b/invokeai/app/invocations/denoise_latents.py
@ -37,9 +37,9 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.controlnet_utils import prepare_control_image
 from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
 from invokeai.backend.lora import LoRAModelRaw
-from invokeai.backend.model_manager import BaseModelType
+from invokeai.backend.model_manager import BaseModelType, ModelVariantType
 from invokeai.backend.model_patcher import ModelPatcher
-from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
+from invokeai.backend.stable_diffusion import PipelineIntermediateState
 from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, DenoiseInputs
 from invokeai.backend.stable_diffusion.diffusers_pipeline import (
    ControlNetData,
@ -58,7 +58,15 @@ from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
 from invokeai.backend.stable_diffusion.diffusion.custom_atttention import CustomAttnProcessor2_0
 from invokeai.backend.stable_diffusion.diffusion_backend import StableDiffusionBackend
 from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
+from invokeai.backend.stable_diffusion.extensions.controlnet import ControlNetExt
+from invokeai.backend.stable_diffusion.extensions.freeu import FreeUExt
+from invokeai.backend.stable_diffusion.extensions.inpaint import InpaintExt
+from invokeai.backend.stable_diffusion.extensions.inpaint_model import InpaintModelExt
+from invokeai.backend.stable_diffusion.extensions.lora import LoRAExt
 from invokeai.backend.stable_diffusion.extensions.preview import PreviewExt
+from invokeai.backend.stable_diffusion.extensions.rescale_cfg import RescaleCFGExt
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
+from invokeai.backend.stable_diffusion.extensions.t2i_adapter import T2IAdapterExt
 from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
 from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@ -463,6 +471,65 @@ class DenoiseLatentsInvocation(BaseInvocation):

        return controlnet_data

+    @staticmethod
+    def parse_controlnet_field(
+        exit_stack: ExitStack,
+        context: InvocationContext,
+        control_input: ControlField | list[ControlField] | None,
+        ext_manager: ExtensionsManager,
+    ) -> None:
+        # Normalize control_input to a list.
+        control_list: list[ControlField]
+        if isinstance(control_input, ControlField):
+            control_list = [control_input]
+        elif isinstance(control_input, list):
+            control_list = control_input
+        elif control_input is None:
+            control_list = []
+        else:
+            raise ValueError(f"Unexpected control_input type: {type(control_input)}")
+
+        for control_info in control_list:
+            model = exit_stack.enter_context(context.models.load(control_info.control_model))
+            ext_manager.add_extension(
+                ControlNetExt(
+                    model=model,
+                    image=context.images.get_pil(control_info.image.image_name),
+                    weight=control_info.control_weight,
+                    begin_step_percent=control_info.begin_step_percent,
+                    end_step_percent=control_info.end_step_percent,
+                    control_mode=control_info.control_mode,
+                    resize_mode=control_info.resize_mode,
+                )
+            )
+
+    @staticmethod
+    def parse_t2i_adapter_field(
+        exit_stack: ExitStack,
+        context: InvocationContext,
+        t2i_adapters: Optional[Union[T2IAdapterField, list[T2IAdapterField]]],
+        ext_manager: ExtensionsManager,
+    ) -> None:
+        if t2i_adapters is None:
+            return
+
+        # Handle the possibility that t2i_adapters could be a list or a single T2IAdapterField.
+        if isinstance(t2i_adapters, T2IAdapterField):
+            t2i_adapters = [t2i_adapters]
+
+        for t2i_adapter_field in t2i_adapters:
+            ext_manager.add_extension(
+                T2IAdapterExt(
+                    node_context=context,
+                    model_id=t2i_adapter_field.t2i_adapter_model,
+                    image=context.images.get_pil(t2i_adapter_field.image.image_name),
+                    weight=t2i_adapter_field.weight,
+                    begin_step_percent=t2i_adapter_field.begin_step_percent,
+                    end_step_percent=t2i_adapter_field.end_step_percent,
+                    resize_mode=t2i_adapter_field.resize_mode,
+                )
+            )
+
    def prep_ip_adapter_image_prompts(
        self,
        context: InvocationContext,
@ -672,7 +739,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
        else:
            masked_latents = torch.where(mask < 0.5, 0.0, latents)

-        return 1 - mask, masked_latents, self.denoise_mask.gradient
+        return mask, masked_latents, self.denoise_mask.gradient

    @staticmethod
    def prepare_noise_and_latents(
@ -730,10 +797,6 @@ class DenoiseLatentsInvocation(BaseInvocation):
        dtype = TorchDevice.choose_torch_dtype()

        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
-        latents = latents.to(device=device, dtype=dtype)
-        if noise is not None:
-            noise = noise.to(device=device, dtype=dtype)
-
        _, _, latent_height, latent_width = latents.shape

        conditioning_data = self.get_conditioning_data(
@ -766,6 +829,52 @@ class DenoiseLatentsInvocation(BaseInvocation):
            denoising_end=self.denoising_end,
        )

+        # get the unet's config so that we can pass the base to sd_step_callback()
+        unet_config = context.models.get_config(self.unet.unet.key)
+
+        ### preview
+        def step_callback(state: PipelineIntermediateState) -> None:
+            context.util.sd_step_callback(state, unet_config.base)
+
+        ext_manager.add_extension(PreviewExt(step_callback))
+
+        ### cfg rescale
+        if self.cfg_rescale_multiplier > 0:
+            ext_manager.add_extension(RescaleCFGExt(self.cfg_rescale_multiplier))
+
+        ### freeu
+        if self.unet.freeu_config:
+            ext_manager.add_extension(FreeUExt(self.unet.freeu_config))
+
+        ### lora
+        if self.unet.loras:
+            for lora_field in self.unet.loras:
+                ext_manager.add_extension(
+                    LoRAExt(
+                        node_context=context,
+                        model_id=lora_field.lora,
+                        weight=lora_field.weight,
+                    )
+                )
+        ### seamless
+        if self.unet.seamless_axes:
+            ext_manager.add_extension(SeamlessExt(self.unet.seamless_axes))
+
+        ### inpaint
+        mask, masked_latents, is_gradient_mask = self.prep_inpaint_mask(context, latents)
+        # NOTE: We used to identify inpainting models by inpecting the shape of the loaded UNet model weights. Now we
+        # use the ModelVariantType config. During testing, there was a report of a user with models that had an
+        # incorrect ModelVariantType value. Re-installing the model fixed the issue. If this issue turns out to be
+        # prevalent, we will have to revisit how we initialize the inpainting extensions.
+        if unet_config.variant == ModelVariantType.Inpaint:
+            ext_manager.add_extension(InpaintModelExt(mask, masked_latents, is_gradient_mask))
+        elif mask is not None:
+            ext_manager.add_extension(InpaintExt(mask, is_gradient_mask))
+
+        # Initialize context for modular denoise
+        latents = latents.to(device=device, dtype=dtype)
+        if noise is not None:
+            noise = noise.to(device=device, dtype=dtype)
        denoise_ctx = DenoiseContext(
            inputs=DenoiseInputs(
                orig_latents=latents,
@ -781,31 +890,31 @@ class DenoiseLatentsInvocation(BaseInvocation):
            scheduler=scheduler,
        )

-        # get the unet's config so that we can pass the base to sd_step_callback()
-        unet_config = context.models.get_config(self.unet.unet.key)
+        # context for loading additional models
+        with ExitStack() as exit_stack:
+            # later should be smth like:
+            # for extension_field in self.extensions:
+            #    ext = extension_field.to_extension(exit_stack, context, ext_manager)
+            #    ext_manager.add_extension(ext)
+            self.parse_controlnet_field(exit_stack, context, self.control, ext_manager)
+            self.parse_t2i_adapter_field(exit_stack, context, self.t2i_adapter, ext_manager)

-        ### preview
-        def step_callback(state: PipelineIntermediateState) -> None:
-            context.util.sd_step_callback(state, unet_config.base)
+            # ext: t2i/ip adapter
+            ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)

-        ext_manager.add_extension(PreviewExt(step_callback))
-
-        # ext: t2i/ip adapter
-        ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)
-
-        unet_info = context.models.load(self.unet.unet)
-        assert isinstance(unet_info.model, UNet2DConditionModel)
-        with (
-            unet_info.model_on_device() as (model_state_dict, unet),
-            ModelPatcher.patch_unet_attention_processor(unet, denoise_ctx.inputs.attention_processor_cls),
-            # ext: controlnet
-            ext_manager.patch_extensions(unet),
-            # ext: freeu, seamless, ip adapter, lora
-            ext_manager.patch_unet(model_state_dict, unet),
-        ):
-            sd_backend = StableDiffusionBackend(unet, scheduler)
-            denoise_ctx.unet = unet
-            result_latents = sd_backend.latents_from_embeddings(denoise_ctx, ext_manager)
+            unet_info = context.models.load(self.unet.unet)
+            assert isinstance(unet_info.model, UNet2DConditionModel)
+            with (
+                unet_info.model_on_device() as (cached_weights, unet),
+                ModelPatcher.patch_unet_attention_processor(unet, denoise_ctx.inputs.attention_processor_cls),
+                # ext: controlnet
+                ext_manager.patch_extensions(denoise_ctx),
+                # ext: freeu, seamless, ip adapter, lora
+                ext_manager.patch_unet(unet, cached_weights),
+            ):
+                sd_backend = StableDiffusionBackend(unet, scheduler)
+                denoise_ctx.unet = unet
+                result_latents = sd_backend.latents_from_embeddings(denoise_ctx, ext_manager)

        # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
        result_latents = result_latents.detach().to("cpu")
@ -820,6 +929,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
        seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)

        mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+        # At this point, the mask ranges from 0 (leave unchanged) to 1 (inpaint).
+        # We invert the mask here for compatibility with the old backend implementation.
+        if mask is not None:
+            mask = 1 - mask

        # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
        # below. Investigate whether this is appropriate.
@ -862,14 +975,14 @@ class DenoiseLatentsInvocation(BaseInvocation):
        assert isinstance(unet_info.model, UNet2DConditionModel)
        with (
            ExitStack() as exit_stack,
-            unet_info.model_on_device() as (model_state_dict, unet),
+            unet_info.model_on_device() as (cached_weights, unet),
            ModelPatcher.apply_freeu(unet, self.unet.freeu_config),
-            set_seamless(unet, self.unet.seamless_axes),  # FIXME
+            SeamlessExt.static_patch_model(unet, self.unet.seamless_axes),  # FIXME
            # Apply the LoRA after unet has been moved to its target device for faster patching.
            ModelPatcher.apply_lora_unet(
                unet,
                loras=_lora_loader(),
-                model_state_dict=model_state_dict,
+                cached_weights=cached_weights,
            ),
        ):
            assert isinstance(unet, UNet2DConditionModel)
--- a/invokeai/app/invocations/fields.py
+++ b/invokeai/app/invocations/fields.py
@ -1,7 +1,7 @@
 from enum import Enum
 from typing import Any, Callable, Optional, Tuple

-from pydantic import BaseModel, ConfigDict, Field, RootModel, TypeAdapter
+from pydantic import BaseModel, ConfigDict, Field, RootModel, TypeAdapter, model_validator
 from pydantic.fields import _Unset
 from pydantic_core import PydanticUndefined

@ -242,6 +242,31 @@ class ConditioningField(BaseModel):
    )


+class BoundingBoxField(BaseModel):
+    """A bounding box primitive value."""
+
+    x_min: int = Field(ge=0, description="The minimum x-coordinate of the bounding box (inclusive).")
+    x_max: int = Field(ge=0, description="The maximum x-coordinate of the bounding box (exclusive).")
+    y_min: int = Field(ge=0, description="The minimum y-coordinate of the bounding box (inclusive).")
+    y_max: int = Field(ge=0, description="The maximum y-coordinate of the bounding box (exclusive).")
+
+    score: Optional[float] = Field(
+        default=None,
+        ge=0.0,
+        le=1.0,
+        description="The score associated with the bounding box. In the range [0, 1]. This value is typically set "
+        "when the bounding box was produced by a detector and has an associated confidence score.",
+    )
+
+    @model_validator(mode="after")
+    def check_coords(self):
+        if self.x_min > self.x_max:
+            raise ValueError(f"x_min ({self.x_min}) is greater than x_max ({self.x_max}).")
+        if self.y_min > self.y_max:
+            raise ValueError(f"y_min ({self.y_min}) is greater than y_max ({self.y_max}).")
+        return self
+
+
 class MetadataField(RootModel[dict[str, Any]]):
    """
    Pydantic model for metadata with custom root of type dict[str, Any].
--- a/invokeai/app/invocations/grounding_dino.py
+++ b/invokeai/app/invocations/grounding_dino.py
@ -0,0 +1,100 @@
+from pathlib import Path
+from typing import Literal
+
+import torch
+from PIL import Image
+from transformers import pipeline
+from transformers.pipelines import ZeroShotObjectDetectionPipeline
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import BoundingBoxField, ImageField, InputField
+from invokeai.app.invocations.primitives import BoundingBoxCollectionOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.grounding_dino.detection_result import DetectionResult
+from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
+
+GroundingDinoModelKey = Literal["grounding-dino-tiny", "grounding-dino-base"]
+GROUNDING_DINO_MODEL_IDS: dict[GroundingDinoModelKey, str] = {
+    "grounding-dino-tiny": "IDEA-Research/grounding-dino-tiny",
+    "grounding-dino-base": "IDEA-Research/grounding-dino-base",
+}
+
+
+@invocation(
+    "grounding_dino",
+    title="Grounding DINO (Text Prompt Object Detection)",
+    tags=["prompt", "object detection"],
+    category="image",
+    version="1.0.0",
+)
+class GroundingDinoInvocation(BaseInvocation):
+    """Runs a Grounding DINO model. Performs zero-shot bounding-box object detection from a text prompt."""
+
+    # Reference:
+    # - https://arxiv.org/pdf/2303.05499
+    # - https://huggingface.co/docs/transformers/v4.43.3/en/model_doc/grounding-dino#grounded-sam
+    # - https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+
+    model: GroundingDinoModelKey = InputField(description="The Grounding DINO model to use.")
+    prompt: str = InputField(description="The prompt describing the object to segment.")
+    image: ImageField = InputField(description="The image to segment.")
+    detection_threshold: float = InputField(
+        description="The detection threshold for the Grounding DINO model. All detected bounding boxes with scores above this threshold will be returned.",
+        ge=0.0,
+        le=1.0,
+        default=0.3,
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> BoundingBoxCollectionOutput:
+        # The model expects a 3-channel RGB image.
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        detections = self._detect(
+            context=context, image=image_pil, labels=[self.prompt], threshold=self.detection_threshold
+        )
+
+        # Convert detections to BoundingBoxCollectionOutput.
+        bounding_boxes: list[BoundingBoxField] = []
+        for detection in detections:
+            bounding_boxes.append(
+                BoundingBoxField(
+                    x_min=detection.box.xmin,
+                    x_max=detection.box.xmax,
+                    y_min=detection.box.ymin,
+                    y_max=detection.box.ymax,
+                    score=detection.score,
+                )
+            )
+        return BoundingBoxCollectionOutput(collection=bounding_boxes)
+
+    @staticmethod
+    def _load_grounding_dino(model_path: Path):
+        grounding_dino_pipeline = pipeline(
+            model=str(model_path),
+            task="zero-shot-object-detection",
+            local_files_only=True,
+            # TODO(ryand): Setting the torch_dtype here doesn't work. Investigate whether fp16 is supported by the
+            # model, and figure out how to make it work in the pipeline.
+            # torch_dtype=TorchDevice.choose_torch_dtype(),
+        )
+        assert isinstance(grounding_dino_pipeline, ZeroShotObjectDetectionPipeline)
+        return GroundingDinoPipeline(grounding_dino_pipeline)
+
+    def _detect(
+        self,
+        context: InvocationContext,
+        image: Image.Image,
+        labels: list[str],
+        threshold: float = 0.3,
+    ) -> list[DetectionResult]:
+        """Use Grounding DINO to detect bounding boxes for a set of labels in an image."""
+        # TODO(ryand): I copied this "."-handling logic from the transformers example code. Test it and see if it
+        # actually makes a difference.
+        labels = [label if label.endswith(".") else label + "." for label in labels]
+
+        with context.models.load_remote_model(
+            source=GROUNDING_DINO_MODEL_IDS[self.model], loader=GroundingDinoInvocation._load_grounding_dino
+        ) as detector:
+            assert isinstance(detector, GroundingDinoPipeline)
+            return detector.detect(image=image, candidate_labels=labels, threshold=threshold)
--- a/invokeai/app/invocations/latents_to_image.py
+++ b/invokeai/app/invocations/latents_to_image.py
@ -24,7 +24,7 @@ from invokeai.app.invocations.fields import (
 from invokeai.app.invocations.model import VAEField
 from invokeai.app.invocations.primitives import ImageOutput
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.stable_diffusion import set_seamless
+from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.stable_diffusion.vae_tiling import patch_vae_tiling_params
 from invokeai.backend.util.devices import TorchDevice

@ -59,7 +59,7 @@ class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):

        vae_info = context.models.load(self.vae.vae)
        assert isinstance(vae_info.model, (AutoencoderKL, AutoencoderTiny))
-        with set_seamless(vae_info.model, self.vae.seamless_axes), vae_info as vae:
+        with SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes), vae_info as vae:
            assert isinstance(vae, (AutoencoderKL, AutoencoderTiny))
            latents = latents.to(vae.device)
            if self.fp32:
--- a/invokeai/app/invocations/mask.py
+++ b/invokeai/app/invocations/mask.py
@ -1,9 +1,10 @@
 import numpy as np
 import torch
+from PIL import Image

 from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, InvocationContext, invocation
-from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithMetadata
-from invokeai.app.invocations.primitives import MaskOutput
+from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithBoard, WithMetadata
+from invokeai.app.invocations.primitives import ImageOutput, MaskOutput


@invocation(
@ -118,3 +119,27 @@ class ImageMaskToTensorInvocation(BaseInvocation, WithMetadata):
            height=mask.shape[1],
            width=mask.shape[2],
        )
+
+
+@invocation(
+    "tensor_mask_to_image",
+    title="Tensor Mask to Image",
+    tags=["mask"],
+    category="mask",
+    version="1.0.0",
+)
+class MaskTensorToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Convert a mask tensor to an image."""
+
+    mask: TensorField = InputField(description="The mask tensor to convert.")
+
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        mask = context.tensors.load(self.mask.tensor_name)
+        # Ensure that the mask is binary.
+        if mask.dtype != torch.bool:
+            mask = mask > 0.5
+        mask_np = (mask.float() * 255).byte().cpu().numpy()
+
+        mask_pil = Image.fromarray(mask_np, mode="L")
+        image_dto = context.images.save(image=mask_pil)
+        return ImageOutput.build(image_dto)
--- a/invokeai/app/invocations/primitives.py
+++ b/invokeai/app/invocations/primitives.py
@ -7,6 +7,7 @@ import torch
 from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
 from invokeai.app.invocations.constants import LATENT_SCALE_FACTOR
 from invokeai.app.invocations.fields import (
+    BoundingBoxField,
    ColorField,
    ConditioningField,
    DenoiseMaskField,
@ -469,3 +470,42 @@ class ConditioningCollectionInvocation(BaseInvocation):


 # endregion
+
+# region BoundingBox
+
+
+@invocation_output("bounding_box_output")
+class BoundingBoxOutput(BaseInvocationOutput):
+    """Base class for nodes that output a single bounding box"""
+
+    bounding_box: BoundingBoxField = OutputField(description="The output bounding box.")
+
+
+@invocation_output("bounding_box_collection_output")
+class BoundingBoxCollectionOutput(BaseInvocationOutput):
+    """Base class for nodes that output a collection of bounding boxes"""
+
+    collection: list[BoundingBoxField] = OutputField(description="The output bounding boxes.", title="Bounding Boxes")
+
+
+@invocation(
+    "bounding_box",
+    title="Bounding Box",
+    tags=["primitives", "segmentation", "collection", "bounding box"],
+    category="primitives",
+    version="1.0.0",
+)
+class BoundingBoxInvocation(BaseInvocation):
+    """Create a bounding box manually by supplying box coordinates"""
+
+    x_min: int = InputField(default=0, description="x-coordinate of the bounding box's top left vertex")
+    y_min: int = InputField(default=0, description="y-coordinate of the bounding box's top left vertex")
+    x_max: int = InputField(default=0, description="x-coordinate of the bounding box's bottom right vertex")
+    y_max: int = InputField(default=0, description="y-coordinate of the bounding box's bottom right vertex")
+
+    def invoke(self, context: InvocationContext) -> BoundingBoxOutput:
+        bounding_box = BoundingBoxField(x_min=self.x_min, y_min=self.y_min, x_max=self.x_max, y_max=self.y_max)
+        return BoundingBoxOutput(bounding_box=bounding_box)
+
+
+# endregion
--- a/invokeai/app/invocations/segment_anything.py
+++ b/invokeai/app/invocations/segment_anything.py
@ -0,0 +1,161 @@
+from pathlib import Path
+from typing import Literal
+
+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoModelForMaskGeneration, AutoProcessor
+from transformers.models.sam import SamModel
+from transformers.models.sam.processing_sam import SamProcessor
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
+from invokeai.app.invocations.fields import BoundingBoxField, ImageField, InputField, TensorField
+from invokeai.app.invocations.primitives import MaskOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.segment_anything.mask_refinement import mask_to_polygon, polygon_to_mask
+from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
+
+SegmentAnythingModelKey = Literal["segment-anything-base", "segment-anything-large", "segment-anything-huge"]
+SEGMENT_ANYTHING_MODEL_IDS: dict[SegmentAnythingModelKey, str] = {
+    "segment-anything-base": "facebook/sam-vit-base",
+    "segment-anything-large": "facebook/sam-vit-large",
+    "segment-anything-huge": "facebook/sam-vit-huge",
+}
+
+
+@invocation(
+    "segment_anything",
+    title="Segment Anything",
+    tags=["prompt", "segmentation"],
+    category="segmentation",
+    version="1.0.0",
+)
+class SegmentAnythingInvocation(BaseInvocation):
+    """Runs a Segment Anything Model."""
+
+    # Reference:
+    # - https://arxiv.org/pdf/2304.02643
+    # - https://huggingface.co/docs/transformers/v4.43.3/en/model_doc/grounding-dino#grounded-sam
+    # - https://github.com/NielsRogge/Transformers-Tutorials/blob/a39f33ac1557b02ebfb191ea7753e332b5ca933f/Grounding%20DINO/GroundingDINO_with_Segment_Anything.ipynb
+
+    model: SegmentAnythingModelKey = InputField(description="The Segment Anything model to use.")
+    image: ImageField = InputField(description="The image to segment.")
+    bounding_boxes: list[BoundingBoxField] = InputField(description="The bounding boxes to prompt the SAM model with.")
+    apply_polygon_refinement: bool = InputField(
+        description="Whether to apply polygon refinement to the masks. This will smooth the edges of the masks slightly and ensure that each mask consists of a single closed polygon (before merging).",
+        default=True,
+    )
+    mask_filter: Literal["all", "largest", "highest_box_score"] = InputField(
+        description="The filtering to apply to the detected masks before merging them into a final output.",
+        default="all",
+    )
+
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> MaskOutput:
+        # The models expect a 3-channel RGB image.
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        if len(self.bounding_boxes) == 0:
+            combined_mask = torch.zeros(image_pil.size[::-1], dtype=torch.bool)
+        else:
+            masks = self._segment(context=context, image=image_pil)
+            masks = self._filter_masks(masks=masks, bounding_boxes=self.bounding_boxes)
+
+            # masks contains bool values, so we merge them via max-reduce.
+            combined_mask, _ = torch.stack(masks).max(dim=0)
+
+        mask_tensor_name = context.tensors.save(combined_mask)
+        height, width = combined_mask.shape
+        return MaskOutput(mask=TensorField(tensor_name=mask_tensor_name), width=width, height=height)
+
+    @staticmethod
+    def _load_sam_model(model_path: Path):
+        sam_model = AutoModelForMaskGeneration.from_pretrained(
+            model_path,
+            local_files_only=True,
+            # TODO(ryand): Setting the torch_dtype here doesn't work. Investigate whether fp16 is supported by the
+            # model, and figure out how to make it work in the pipeline.
+            # torch_dtype=TorchDevice.choose_torch_dtype(),
+        )
+        assert isinstance(sam_model, SamModel)
+
+        sam_processor = AutoProcessor.from_pretrained(model_path, local_files_only=True)
+        assert isinstance(sam_processor, SamProcessor)
+        return SegmentAnythingPipeline(sam_model=sam_model, sam_processor=sam_processor)
+
+    def _segment(
+        self,
+        context: InvocationContext,
+        image: Image.Image,
+    ) -> list[torch.Tensor]:
+        """Use Segment Anything (SAM) to generate masks given an image + a set of bounding boxes."""
+        # Convert the bounding boxes to the SAM input format.
+        sam_bounding_boxes = [[bb.x_min, bb.y_min, bb.x_max, bb.y_max] for bb in self.bounding_boxes]
+
+        with (
+            context.models.load_remote_model(
+                source=SEGMENT_ANYTHING_MODEL_IDS[self.model], loader=SegmentAnythingInvocation._load_sam_model
+            ) as sam_pipeline,
+        ):
+            assert isinstance(sam_pipeline, SegmentAnythingPipeline)
+            masks = sam_pipeline.segment(image=image, bounding_boxes=sam_bounding_boxes)
+
+        masks = self._process_masks(masks)
+        if self.apply_polygon_refinement:
+            masks = self._apply_polygon_refinement(masks)
+
+        return masks
+
+    def _process_masks(self, masks: torch.Tensor) -> list[torch.Tensor]:
+        """Convert the tensor output from the Segment Anything model from a tensor of shape
+        [num_masks, channels, height, width] to a list of tensors of shape [height, width].
+        """
+        assert masks.dtype == torch.bool
+        # [num_masks, channels, height, width] -> [num_masks, height, width]
+        masks, _ = masks.max(dim=1)
+        # Split the first dimension into a list of masks.
+        return list(masks.cpu().unbind(dim=0))
+
+    def _apply_polygon_refinement(self, masks: list[torch.Tensor]) -> list[torch.Tensor]:
+        """Apply polygon refinement to the masks.
+
+        Convert each mask to a polygon, then back to a mask. This has the following effect:
+        - Smooth the edges of the mask slightly.
+        - Ensure that each mask consists of a single closed polygon
+            - Removes small mask pieces.
+            - Removes holes from the mask.
+        """
+        # Convert tensor masks to np masks.
+        np_masks = [mask.cpu().numpy().astype(np.uint8) for mask in masks]
+
+        # Apply polygon refinement.
+        for idx, mask in enumerate(np_masks):
+            shape = mask.shape
+            assert len(shape) == 2  # Assert length to satisfy type checker.
+            polygon = mask_to_polygon(mask)
+            mask = polygon_to_mask(polygon, shape)
+            np_masks[idx] = mask
+
+        # Convert np masks back to tensor masks.
+        masks = [torch.tensor(mask, dtype=torch.bool) for mask in np_masks]
+
+        return masks
+
+    def _filter_masks(self, masks: list[torch.Tensor], bounding_boxes: list[BoundingBoxField]) -> list[torch.Tensor]:
+        """Filter the detected masks based on the specified mask filter."""
+        assert len(masks) == len(bounding_boxes)
+
+        if self.mask_filter == "all":
+            return masks
+        elif self.mask_filter == "largest":
+            # Find the largest mask.
+            return [max(masks, key=lambda x: float(x.sum()))]
+        elif self.mask_filter == "highest_box_score":
+            # Find the index of the bounding box with the highest score.
+            # Note that we fallback to -1.0 if the score is None. This is mainly to satisfy the type checker. In most
+            # cases the scores should all be non-None when using this filtering mode. That being said, -1.0 is a
+            # reasonable fallback since the expected score range is [0.0, 1.0].
+            max_score_idx = max(range(len(bounding_boxes)), key=lambda i: bounding_boxes[i].score or -1.0)
+            return [masks[max_score_idx]]
+        else:
+            raise ValueError(f"Invalid mask filter: {self.mask_filter}")
--- a/invokeai/app/invocations/spandrel_image_to_image.py
+++ b/invokeai/app/invocations/spandrel_image_to_image.py
@ -1,3 +1,5 @@
+from typing import Callable
+
 import numpy as np
 import torch
 from PIL import Image
@ -21,7 +23,7 @@ from invokeai.backend.tiles.tiles import calc_tiles_min_overlap
 from invokeai.backend.tiles.utils import TBLR, Tile


-@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.1.0")
+@invocation("spandrel_image_to_image", title="Image-to-Image", tags=["upscale"], category="upscale", version="1.3.0")
 class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
    """Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel)."""

@ -35,7 +37,8 @@ class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
        default=512, description="The tile size for tiled image-to-image. Set to 0 to disable tiling."
    )

-    def _scale_tile(self, tile: Tile, scale: int) -> Tile:
+    @classmethod
+    def scale_tile(cls, tile: Tile, scale: int) -> Tile:
        return Tile(
            coords=TBLR(
                top=tile.coords.top * scale,
@ -51,20 +54,22 @@ class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
            ),
        )

-    @torch.inference_mode()
-    def invoke(self, context: InvocationContext) -> ImageOutput:
-        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
-        # revisit this.
-        image = context.images.get_pil(self.image.image_name, mode="RGB")
-
+    @classmethod
+    def upscale_image(
+        cls,
+        image: Image.Image,
+        tile_size: int,
+        spandrel_model: SpandrelImageToImageModel,
+        is_canceled: Callable[[], bool],
+    ) -> Image.Image:
        # Compute the image tiles.
-        if self.tile_size > 0:
+        if tile_size > 0:
            min_overlap = 20
            tiles = calc_tiles_min_overlap(
                image_height=image.height,
                image_width=image.width,
-                tile_height=self.tile_size,
-                tile_width=self.tile_size,
+                tile_height=tile_size,
+                tile_width=tile_size,
                min_overlap=min_overlap,
            )
        else:
@ -85,60 +90,164 @@ class SpandrelImageToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
        # Prepare input image for inference.
        image_tensor = SpandrelImageToImageModel.pil_to_tensor(image)

-        # Load the model.
-        spandrel_model_info = context.models.load(self.image_to_image_model)
+        # Scale the tiles for re-assembling the final image.
+        scale = spandrel_model.scale
+        scaled_tiles = [cls.scale_tile(tile, scale=scale) for tile in tiles]
+
+        # Prepare the output tensor.
+        _, channels, height, width = image_tensor.shape
+        output_tensor = torch.zeros(
+            (height * scale, width * scale, channels), dtype=torch.uint8, device=torch.device("cpu")
+        )
+
+        image_tensor = image_tensor.to(device=spandrel_model.device, dtype=spandrel_model.dtype)

        # Run the model on each tile.
-        with spandrel_model_info as spandrel_model:
-            assert isinstance(spandrel_model, SpandrelImageToImageModel)
+        for tile, scaled_tile in tqdm(list(zip(tiles, scaled_tiles, strict=True)), desc="Upscaling Tiles"):
+            # Exit early if the invocation has been canceled.
+            if is_canceled():
+                raise CanceledException

-            # Scale the tiles for re-assembling the final image.
-            scale = spandrel_model.scale
-            scaled_tiles = [self._scale_tile(tile, scale=scale) for tile in tiles]
+            # Extract the current tile from the input tensor.
+            input_tile = image_tensor[
+                :, :, tile.coords.top : tile.coords.bottom, tile.coords.left : tile.coords.right
+            ].to(device=spandrel_model.device, dtype=spandrel_model.dtype)

-            # Prepare the output tensor.
-            _, channels, height, width = image_tensor.shape
-            output_tensor = torch.zeros(
-                (height * scale, width * scale, channels), dtype=torch.uint8, device=torch.device("cpu")
-            )
+            # Run the model on the tile.
+            output_tile = spandrel_model.run(input_tile)

-            image_tensor = image_tensor.to(device=spandrel_model.device, dtype=spandrel_model.dtype)
+            # Convert the output tile into the output tensor's format.
+            # (N, C, H, W) -> (C, H, W)
+            output_tile = output_tile.squeeze(0)
+            # (C, H, W) -> (H, W, C)
+            output_tile = output_tile.permute(1, 2, 0)
+            output_tile = output_tile.clamp(0, 1)
+            output_tile = (output_tile * 255).to(dtype=torch.uint8, device=torch.device("cpu"))

-            for tile, scaled_tile in tqdm(list(zip(tiles, scaled_tiles, strict=True)), desc="Upscaling Tiles"):
-                # Exit early if the invocation has been canceled.
-                if context.util.is_canceled():
-                    raise CanceledException
-
-                # Extract the current tile from the input tensor.
-                input_tile = image_tensor[
-                    :, :, tile.coords.top : tile.coords.bottom, tile.coords.left : tile.coords.right
-                ].to(device=spandrel_model.device, dtype=spandrel_model.dtype)
-
-                # Run the model on the tile.
-                output_tile = spandrel_model.run(input_tile)
-
-                # Convert the output tile into the output tensor's format.
-                # (N, C, H, W) -> (C, H, W)
-                output_tile = output_tile.squeeze(0)
-                # (C, H, W) -> (H, W, C)
-                output_tile = output_tile.permute(1, 2, 0)
-                output_tile = output_tile.clamp(0, 1)
-                output_tile = (output_tile * 255).to(dtype=torch.uint8, device=torch.device("cpu"))
-
-                # Merge the output tile into the output tensor.
-                # We only keep half of the overlap on the top and left side of the tile. We do this in case there are
-                # edge artifacts. We don't bother with any 'blending' in the current implementation - for most upscalers
-                # it seems unnecessary, but we may find a need in the future.
-                top_overlap = scaled_tile.overlap.top // 2
-                left_overlap = scaled_tile.overlap.left // 2
-                output_tensor[
-                    scaled_tile.coords.top + top_overlap : scaled_tile.coords.bottom,
-                    scaled_tile.coords.left + left_overlap : scaled_tile.coords.right,
-                    :,
-                ] = output_tile[top_overlap:, left_overlap:, :]
+            # Merge the output tile into the output tensor.
+            # We only keep half of the overlap on the top and left side of the tile. We do this in case there are
+            # edge artifacts. We don't bother with any 'blending' in the current implementation - for most upscalers
+            # it seems unnecessary, but we may find a need in the future.
+            top_overlap = scaled_tile.overlap.top // 2
+            left_overlap = scaled_tile.overlap.left // 2
+            output_tensor[
+                scaled_tile.coords.top + top_overlap : scaled_tile.coords.bottom,
+                scaled_tile.coords.left + left_overlap : scaled_tile.coords.right,
+                :,
+            ] = output_tile[top_overlap:, left_overlap:, :]

        # Convert the output tensor to a PIL image.
        np_image = output_tensor.detach().numpy().astype(np.uint8)
        pil_image = Image.fromarray(np_image)
+
+        return pil_image
+
+    @torch.inference_mode()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
+        # revisit this.
+        image = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        # Load the model.
+        spandrel_model_info = context.models.load(self.image_to_image_model)
+
+        # Do the upscaling.
+        with spandrel_model_info as spandrel_model:
+            assert isinstance(spandrel_model, SpandrelImageToImageModel)
+
+            # Upscale the image
+            pil_image = self.upscale_image(image, self.tile_size, spandrel_model, context.util.is_canceled)
+
+        image_dto = context.images.save(image=pil_image)
+        return ImageOutput.build(image_dto)
+
+
+@invocation(
+    "spandrel_image_to_image_autoscale",
+    title="Image-to-Image (Autoscale)",
+    tags=["upscale"],
+    category="upscale",
+    version="1.0.0",
+)
+class SpandrelImageToImageAutoscaleInvocation(SpandrelImageToImageInvocation):
+    """Run any spandrel image-to-image model (https://github.com/chaiNNer-org/spandrel) until the target scale is reached."""
+
+    scale: float = InputField(
+        default=4.0,
+        gt=0.0,
+        le=16.0,
+        description="The final scale of the output image. If the model does not upscale the image, this will be ignored.",
+    )
+    fit_to_multiple_of_8: bool = InputField(
+        default=False,
+        description="If true, the output image will be resized to the nearest multiple of 8 in both dimensions.",
+    )
+
+    @torch.inference_mode()
+    def invoke(self, context: InvocationContext) -> ImageOutput:
+        # Images are converted to RGB, because most models don't support an alpha channel. In the future, we may want to
+        # revisit this.
+        image = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        # Load the model.
+        spandrel_model_info = context.models.load(self.image_to_image_model)
+
+        # The target size of the image, determined by the provided scale. We'll run the upscaler until we hit this size.
+        # Later, we may mutate this value if the model doesn't upscale the image or if the user requested a multiple of 8.
+        target_width = int(image.width * self.scale)
+        target_height = int(image.height * self.scale)
+
+        # Do the upscaling.
+        with spandrel_model_info as spandrel_model:
+            assert isinstance(spandrel_model, SpandrelImageToImageModel)
+
+            # First pass of upscaling. Note: `pil_image` will be mutated.
+            pil_image = self.upscale_image(image, self.tile_size, spandrel_model, context.util.is_canceled)
+
+            # Some models don't upscale the image, but we have no way to know this in advance. We'll check if the model
+            # upscaled the image and run the loop below if it did. We'll require the model to upscale both dimensions
+            # to be considered an upscale model.
+            is_upscale_model = pil_image.width > image.width and pil_image.height > image.height
+
+            if is_upscale_model:
+                # This is an upscale model, so we should keep upscaling until we reach the target size.
+                iterations = 1
+                while pil_image.width < target_width or pil_image.height < target_height:
+                    pil_image = self.upscale_image(pil_image, self.tile_size, spandrel_model, context.util.is_canceled)
+                    iterations += 1
+
+                    # Sanity check to prevent excessive or infinite loops. All known upscaling models are at least 2x.
+                    # Our max scale is 16x, so with a 2x model, we should never exceed 16x == 2^4 -> 4 iterations.
+                    # We'll allow one extra iteration "just in case" and bail at 5 upscaling iterations. In practice,
+                    # we should never reach this limit.
+                    if iterations >= 5:
+                        context.logger.warning(
+                            "Upscale loop reached maximum iteration count of 5, stopping upscaling early."
+                        )
+                        break
+            else:
+                # This model doesn't upscale the image. We should ignore the scale parameter, modifying the output size
+                # to be the same as the processed image size.
+
+                # The output size is now the size of the processed image.
+                target_width = pil_image.width
+                target_height = pil_image.height
+
+                # Warn the user if they requested a scale greater than 1.
+                if self.scale > 1:
+                    context.logger.warning(
+                        "Model does not increase the size of the image, but a greater scale than 1 was requested. Image will not be scaled."
+                    )
+
+        # We may need to resize the image to a multiple of 8. Use floor division to ensure we don't scale the image up
+        # in the final resize
+        if self.fit_to_multiple_of_8:
+            target_width = int(target_width // 8 * 8)
+            target_height = int(target_height // 8 * 8)
+
+        # Final resize. Per PIL documentation, Lanczos provides the best quality for both upscale and downscale.
+        # See: https://pillow.readthedocs.io/en/stable/handbook/concepts.html#filters-comparison-table
+        pil_image = pil_image.resize((target_width, target_height), resample=Image.Resampling.LANCZOS)
+
        image_dto = context.images.save(image=pil_image)
        return ImageOutput.build(image_dto)
--- a/invokeai/app/services/config/config_default.py
+++ b/invokeai/app/services/config/config_default.py
@ -91,6 +91,7 @@ class InvokeAIAppConfig(BaseSettings):
        db_dir: Path to InvokeAI databases directory.
        outputs_dir: Path to directory for outputs.
        custom_nodes_dir: Path to directory for custom nodes.
+        style_presets_dir: Path to directory for style presets.
        log_handlers: Log handler. Valid options are "console", "file=<path>", "syslog=path|address:host:port", "http=<url>".
        log_format: Log format. Use "plain" for text-only, "color" for colorized output, "legacy" for 2.3-style logging and "syslog" for syslog-style.<br>Valid values: `plain`, `color`, `syslog`, `legacy`
        log_level: Emit logging messages at this level or higher.<br>Valid values: `debug`, `info`, `warning`, `error`, `critical`
@ -153,6 +154,7 @@ class InvokeAIAppConfig(BaseSettings):
    db_dir:                        Path = Field(default=Path("databases"),  description="Path to InvokeAI databases directory.")
    outputs_dir:                   Path = Field(default=Path("outputs"),    description="Path to directory for outputs.")
    custom_nodes_dir:              Path = Field(default=Path("nodes"),      description="Path to directory for custom nodes.")
+    style_presets_dir:      Path = Field(default=Path("style_presets"),      description="Path to directory for style presets.")

    # LOGGING
    log_handlers:             list[str] = Field(default=["console"],        description='Log handler. Valid options are "console", "file=<path>", "syslog=path|address:host:port", "http=<url>".')
@ -300,6 +302,11 @@ class InvokeAIAppConfig(BaseSettings):
        """Path to the models directory, resolved to an absolute path.."""
        return self._resolve(self.models_dir)

+    @property
+    def style_presets_path(self) -> Path:
+        """Path to the style presets directory, resolved to an absolute path.."""
+        return self._resolve(self.style_presets_dir)
+
    @property
    def convert_cache_path(self) -> Path:
        """Path to the converted cache models directory, resolved to an absolute path.."""
--- a/invokeai/app/services/events/events_fastapievents.py
+++ b/invokeai/app/services/events/events_fastapievents.py
@ -1,46 +1,44 @@
-# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)
-
 import asyncio
 import threading
-from queue import Empty, Queue

 from fastapi_events.dispatcher import dispatch

 from invokeai.app.services.events.events_base import EventServiceBase
-from invokeai.app.services.events.events_common import (
-    EventBase,
-)
+from invokeai.app.services.events.events_common import EventBase


 class FastAPIEventService(EventServiceBase):
-    def __init__(self, event_handler_id: int) -> None:
+    def __init__(self, event_handler_id: int, loop: asyncio.AbstractEventLoop) -> None:
        self.event_handler_id = event_handler_id
-        self._queue = Queue[EventBase | None]()
+        self._queue = asyncio.Queue[EventBase | None]()
        self._stop_event = threading.Event()
-        asyncio.create_task(self._dispatch_from_queue(stop_event=self._stop_event))
+        self._loop = loop
+
+        # We need to store a reference to the task so it doesn't get GC'd
+        # See: https://docs.python.org/3/library/asyncio-task.html#creating-tasks
+        self._background_tasks: set[asyncio.Task[None]] = set()
+        task = self._loop.create_task(self._dispatch_from_queue(stop_event=self._stop_event))
+        self._background_tasks.add(task)
+        task.add_done_callback(self._background_tasks.remove)

        super().__init__()

    def stop(self, *args, **kwargs):
        self._stop_event.set()
-        self._queue.put(None)
+        self._loop.call_soon_threadsafe(self._queue.put_nowait, None)

    def dispatch(self, event: EventBase) -> None:
-        self._queue.put(event)
+        self._loop.call_soon_threadsafe(self._queue.put_nowait, event)

    async def _dispatch_from_queue(self, stop_event: threading.Event):
        """Get events on from the queue and dispatch them, from the correct thread"""
        while not stop_event.is_set():
            try:
-                event = self._queue.get(block=False)
+                event = await self._queue.get()
                if not event:  # Probably stopping
                    continue
                # Leave the payloads as live pydantic models
                dispatch(event, middleware_id=self.event_handler_id, payload_schema_dump=False)

-            except Empty:
-                await asyncio.sleep(0.1)
-                pass
-
            except asyncio.CancelledError as e:
                raise e  # Raise a proper error
--- a/invokeai/app/services/image_files/image_files_disk.py
+++ b/invokeai/app/services/image_files/image_files_disk.py
@ -1,11 +1,10 @@
 # Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654) and the InvokeAI Team
 from pathlib import Path
 from queue import Queue
-from typing import Dict, Optional, Union
+from typing import Optional, Union

 from PIL import Image, PngImagePlugin
 from PIL.Image import Image as PILImageType
-from send2trash import send2trash

 from invokeai.app.services.image_files.image_files_base import ImageFileStorageBase
 from invokeai.app.services.image_files.image_files_common import (
@ -20,18 +19,12 @@ from invokeai.app.util.thumbnails import get_thumbnail_name, make_thumbnail
 class DiskImageFileStorage(ImageFileStorageBase):
    """Stores images on disk"""

-    __output_folder: Path
-    __cache_ids: Queue  # TODO: this is an incredibly naive cache
-    __cache: Dict[Path, PILImageType]
-    __max_cache_size: int
-    __invoker: Invoker
-
    def __init__(self, output_folder: Union[str, Path]):
-        self.__cache = {}
-        self.__cache_ids = Queue()
+        self.__cache: dict[Path, PILImageType] = {}
+        self.__cache_ids = Queue[Path]()
        self.__max_cache_size = 10  # TODO: get this from config

-        self.__output_folder: Path = output_folder if isinstance(output_folder, Path) else Path(output_folder)
+        self.__output_folder = output_folder if isinstance(output_folder, Path) else Path(output_folder)
        self.__thumbnails_folder = self.__output_folder / "thumbnails"
        # Validate required output folders at launch
        self.__validate_storage_folders()
@ -103,7 +96,7 @@ class DiskImageFileStorage(ImageFileStorageBase):
            image_path = self.get_path(image_name)

            if image_path.exists():
-                send2trash(image_path)
+                image_path.unlink()
            if image_path in self.__cache:
                del self.__cache[image_path]

@ -111,7 +104,7 @@ class DiskImageFileStorage(ImageFileStorageBase):
            thumbnail_path = self.get_path(thumbnail_name, True)

            if thumbnail_path.exists():
-                send2trash(thumbnail_path)
+                thumbnail_path.unlink()
            if thumbnail_path in self.__cache:
                del self.__cache[thumbnail_path]
        except Exception as e:
--- a/invokeai/app/services/invocation_services.py
+++ b/invokeai/app/services/invocation_services.py
@ -4,6 +4,8 @@ from __future__ import annotations
 from typing import TYPE_CHECKING

 from invokeai.app.services.object_serializer.object_serializer_base import ObjectSerializerBase
+from invokeai.app.services.style_preset_images.style_preset_images_base import StylePresetImageFileStorageBase
+from invokeai.app.services.style_preset_records.style_preset_records_base import StylePresetRecordsStorageBase

 if TYPE_CHECKING:
    from logging import Logger
@ -61,6 +63,8 @@ class InvocationServices:
        workflow_records: "WorkflowRecordsStorageBase",
        tensors: "ObjectSerializerBase[torch.Tensor]",
        conditioning: "ObjectSerializerBase[ConditioningFieldData]",
+        style_preset_records: "StylePresetRecordsStorageBase",
+        style_preset_image_files: "StylePresetImageFileStorageBase",
    ):
        self.board_images = board_images
        self.board_image_records = board_image_records
@ -85,3 +89,5 @@ class InvocationServices:
        self.workflow_records = workflow_records
        self.tensors = tensors
        self.conditioning = conditioning
+        self.style_preset_records = style_preset_records
+        self.style_preset_image_files = style_preset_image_files
--- a/invokeai/app/services/model_images/model_images_default.py
+++ b/invokeai/app/services/model_images/model_images_default.py
@ -2,7 +2,6 @@ from pathlib import Path

 from PIL import Image
 from PIL.Image import Image as PILImageType
-from send2trash import send2trash

 from invokeai.app.services.invoker import Invoker
 from invokeai.app.services.model_images.model_images_base import ModelImageFileStorageBase
@ -70,7 +69,7 @@ class ModelImageFileStorageDisk(ModelImageFileStorageBase):
            if not self._validate_path(path):
                raise ModelImageFileNotFoundException

-            send2trash(path)
+            path.unlink()

        except Exception as e:
            raise ModelImageFileDeleteException from e
--- a/invokeai/app/services/model_install/model_install_base.py
+++ b/invokeai/app/services/model_install/model_install_base.py
@ -3,7 +3,7 @@

 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import List, Optional, Union

 from pydantic.networks import AnyHttpUrl

@ -12,7 +12,7 @@ from invokeai.app.services.download import DownloadQueueServiceBase
 from invokeai.app.services.events.events_base import EventServiceBase
 from invokeai.app.services.invoker import Invoker
 from invokeai.app.services.model_install.model_install_common import ModelInstallJob, ModelSource
-from invokeai.app.services.model_records import ModelRecordServiceBase
+from invokeai.app.services.model_records import ModelRecordChanges, ModelRecordServiceBase
 from invokeai.backend.model_manager import AnyModelConfig


@ -64,7 +64,7 @@ class ModelInstallServiceBase(ABC):
    def register_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:
        """
        Probe and register the model at model_path.
@ -72,7 +72,7 @@ class ModelInstallServiceBase(ABC):
        This keeps the model in its current location.

        :param model_path: Filesystem Path to the model.
-        :param config: Dict of attributes that will override autoassigned values.
+        :param config: ModelRecordChanges object that will override autoassigned model record values.
        :returns id: The string ID of the registered model.
        """

@ -92,7 +92,7 @@ class ModelInstallServiceBase(ABC):
    def install_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:
        """
        Probe, register and install the model in the models directory.
@ -101,7 +101,7 @@ class ModelInstallServiceBase(ABC):
        the models directory handled by InvokeAI.

        :param model_path: Filesystem Path to the model.
-        :param config: Dict of attributes that will override autoassigned values.
+        :param config: ModelRecordChanges object that will override autoassigned model record values.
        :returns id: The string ID of the registered model.
        """

@ -109,14 +109,14 @@ class ModelInstallServiceBase(ABC):
    def heuristic_import(
        self,
        source: str,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
        access_token: Optional[str] = None,
        inplace: Optional[bool] = False,
    ) -> ModelInstallJob:
        r"""Install the indicated model using heuristics to interpret user intentions.

        :param source: String source
-        :param config: Optional dict. Any fields in this dict
+        :param config: Optional ModelRecordChanges object. Any fields in this object
         will override corresponding autoassigned probe fields in the
         model's config record as described in `import_model()`.
        :param access_token: Optional access token for remote sources.
@ -147,7 +147,7 @@ class ModelInstallServiceBase(ABC):
    def import_model(
        self,
        source: ModelSource,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> ModelInstallJob:
        """Install the indicated model.

--- a/invokeai/app/services/model_install/model_install_common.py
+++ b/invokeai/app/services/model_install/model_install_common.py
@ -2,13 +2,14 @@ import re
 import traceback
 from enum import Enum
 from pathlib import Path
-from typing import Any, Dict, Literal, Optional, Set, Union
+from typing import Literal, Optional, Set, Union

 from pydantic import BaseModel, Field, PrivateAttr, field_validator
 from pydantic.networks import AnyHttpUrl
 from typing_extensions import Annotated

 from invokeai.app.services.download import DownloadJob, MultiFileDownloadJob
+from invokeai.app.services.model_records import ModelRecordChanges
 from invokeai.backend.model_manager import AnyModelConfig, ModelRepoVariant
 from invokeai.backend.model_manager.config import ModelSourceType
 from invokeai.backend.model_manager.metadata import AnyModelRepoMetadata
@ -133,8 +134,9 @@ class ModelInstallJob(BaseModel):
    id: int = Field(description="Unique ID for this job")
    status: InstallStatus = Field(default=InstallStatus.WAITING, description="Current status of install process")
    error_reason: Optional[str] = Field(default=None, description="Information about why the job failed")
-    config_in: Dict[str, Any] = Field(
-        default_factory=dict, description="Configuration information (e.g. 'description') to apply to model."
+    config_in: ModelRecordChanges = Field(
+        default_factory=ModelRecordChanges,
+        description="Configuration information (e.g. 'description') to apply to model.",
    )
    config_out: Optional[AnyModelConfig] = Field(
        default=None, description="After successful installation, this will hold the configuration object."
--- a/invokeai/app/services/model_install/model_install_default.py
+++ b/invokeai/app/services/model_install/model_install_default.py
@ -163,26 +163,27 @@ class ModelInstallService(ModelInstallServiceBase):
    def register_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:  # noqa D102
        model_path = Path(model_path)
-        config = config or {}
-        if not config.get("source"):
-            config["source"] = model_path.resolve().as_posix()
-        config["source_type"] = ModelSourceType.Path
+        config = config or ModelRecordChanges()
+        if not config.source:
+            config.source = model_path.resolve().as_posix()
+        config.source_type = ModelSourceType.Path
        return self._register(model_path, config)

    def install_path(
        self,
        model_path: Union[Path, str],
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> str:  # noqa D102
        model_path = Path(model_path)
-        config = config or {}
+        config = config or ModelRecordChanges()
+        info: AnyModelConfig = ModelProbe.probe(
+            Path(model_path), config.model_dump(), hash_algo=self._app_config.hashing_algorithm
+        )  # type: ignore

-        info: AnyModelConfig = ModelProbe.probe(Path(model_path), config, hash_algo=self._app_config.hashing_algorithm)
-
-        if preferred_name := config.get("name"):
+        if preferred_name := config.name:
            preferred_name = Path(preferred_name).with_suffix(model_path.suffix)

        dest_path = (
@ -204,7 +205,7 @@ class ModelInstallService(ModelInstallServiceBase):
    def heuristic_import(
        self,
        source: str,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
        access_token: Optional[str] = None,
        inplace: Optional[bool] = False,
    ) -> ModelInstallJob:
@ -216,7 +217,7 @@ class ModelInstallService(ModelInstallServiceBase):
            source_obj.access_token = access_token
        return self.import_model(source_obj, config)

-    def import_model(self, source: ModelSource, config: Optional[Dict[str, Any]] = None) -> ModelInstallJob:  # noqa D102
+    def import_model(self, source: ModelSource, config: Optional[ModelRecordChanges] = None) -> ModelInstallJob:  # noqa D102
        similar_jobs = [x for x in self.list_jobs() if x.source == source and not x.in_terminal_state]
        if similar_jobs:
            self._logger.warning(f"There is already an active install job for {source}. Not enqueuing.")
@ -318,16 +319,17 @@ class ModelInstallService(ModelInstallServiceBase):
                        model_path = self._app_config.models_path / model_path
                    model_path = model_path.resolve()

-                    config: dict[str, Any] = {}
-                    config["name"] = model_name
-                    config["description"] = stanza.get("description")
+                    config = ModelRecordChanges(
+                        name=model_name,
+                        description=stanza.get("description"),
+                    )
                    legacy_config_path = stanza.get("config")
                    if legacy_config_path:
                        # In v3, these paths were relative to the root. Migrate them to be relative to the legacy_conf_dir.
                        legacy_config_path = self._app_config.root_path / legacy_config_path
                        if legacy_config_path.is_relative_to(self._app_config.legacy_conf_path):
                            legacy_config_path = legacy_config_path.relative_to(self._app_config.legacy_conf_path)
-                        config["config_path"] = str(legacy_config_path)
+                        config.config_path = str(legacy_config_path)
                    try:
                        id = self.register_path(model_path=model_path, config=config)
                        self._logger.info(f"Migrated {model_name} with id {id}")
@ -500,11 +502,11 @@ class ModelInstallService(ModelInstallServiceBase):
        job.total_bytes = self._stat_size(job.local_path)
        job.bytes = job.total_bytes
        self._signal_job_running(job)
-        job.config_in["source"] = str(job.source)
-        job.config_in["source_type"] = MODEL_SOURCE_TO_TYPE_MAP[job.source.__class__]
+        job.config_in.source = str(job.source)
+        job.config_in.source_type = MODEL_SOURCE_TO_TYPE_MAP[job.source.__class__]
        # enter the metadata, if there is any
        if isinstance(job.source_metadata, (HuggingFaceMetadata)):
-            job.config_in["source_api_response"] = job.source_metadata.api_response
+            job.config_in.source_api_response = job.source_metadata.api_response

        if job.inplace:
            key = self.register_path(job.local_path, job.config_in)
@ -639,11 +641,11 @@ class ModelInstallService(ModelInstallServiceBase):
        return new_path

    def _register(
-        self, model_path: Path, config: Optional[Dict[str, Any]] = None, info: Optional[AnyModelConfig] = None
+        self, model_path: Path, config: Optional[ModelRecordChanges] = None, info: Optional[AnyModelConfig] = None
    ) -> str:
-        config = config or {}
+        config = config or ModelRecordChanges()

-        info = info or ModelProbe.probe(model_path, config, hash_algo=self._app_config.hashing_algorithm)
+        info = info or ModelProbe.probe(model_path, config.model_dump(), hash_algo=self._app_config.hashing_algorithm)  # type: ignore

        model_path = model_path.resolve()

@ -674,11 +676,13 @@ class ModelInstallService(ModelInstallServiceBase):
        precision = TorchDevice.choose_torch_dtype()
        return ModelRepoVariant.FP16 if precision == torch.float16 else None

-    def _import_local_model(self, source: LocalModelSource, config: Optional[Dict[str, Any]]) -> ModelInstallJob:
+    def _import_local_model(
+        self, source: LocalModelSource, config: Optional[ModelRecordChanges] = None
+    ) -> ModelInstallJob:
        return ModelInstallJob(
            id=self._next_id(),
            source=source,
-            config_in=config or {},
+            config_in=config or ModelRecordChanges(),
            local_path=Path(source.path),
            inplace=source.inplace or False,
        )
@ -686,7 +690,7 @@ class ModelInstallService(ModelInstallServiceBase):
    def _import_from_hf(
        self,
        source: HFModelSource,
-        config: Optional[Dict[str, Any]] = None,
+        config: Optional[ModelRecordChanges] = None,
    ) -> ModelInstallJob:
        # Add user's cached access token to HuggingFace requests
        if source.access_token is None:
@ -702,7 +706,7 @@ class ModelInstallService(ModelInstallServiceBase):
    def _import_from_url(
        self,
        source: URLModelSource,
-        config: Optional[Dict[str, Any]],
+        config: Optional[ModelRecordChanges] = None,
    ) -> ModelInstallJob:
        remote_files, metadata = self._remote_files_from_source(source)
        return self._import_remote_model(
@ -717,7 +721,7 @@ class ModelInstallService(ModelInstallServiceBase):
        source: HFModelSource | URLModelSource,
        remote_files: List[RemoteModelFile],
        metadata: Optional[AnyModelRepoMetadata],
-        config: Optional[Dict[str, Any]],
+        config: Optional[ModelRecordChanges],
    ) -> ModelInstallJob:
        if len(remote_files) == 0:
            raise ValueError(f"{source}: No downloadable files found")
@ -730,7 +734,7 @@ class ModelInstallService(ModelInstallServiceBase):
        install_job = ModelInstallJob(
            id=self._next_id(),
            source=source,
-            config_in=config or {},
+            config_in=config or ModelRecordChanges(),
            source_metadata=metadata,
            local_path=destdir,  # local path may change once the download has started due to content-disposition handling
            bytes=0,
--- a/invokeai/app/services/model_records/model_records_base.py
+++ b/invokeai/app/services/model_records/model_records_base.py
@ -18,6 +18,7 @@ from invokeai.backend.model_manager.config import (
    ControlAdapterDefaultSettings,
    MainModelDefaultSettings,
    ModelFormat,
+    ModelSourceType,
    ModelType,
    ModelVariantType,
    SchedulerPredictionType,
@ -66,10 +67,16 @@ class ModelRecordChanges(BaseModelExcludeNull):
    """A set of changes to apply to a model."""

    # Changes applicable to all models
+    source: Optional[str] = Field(description="original source of the model", default=None)
+    source_type: Optional[ModelSourceType] = Field(description="type of model source", default=None)
+    source_api_response: Optional[str] = Field(description="metadata from remote source", default=None)
    name: Optional[str] = Field(description="Name of the model.", default=None)
    path: Optional[str] = Field(description="Path to the model.", default=None)
    description: Optional[str] = Field(description="Model description", default=None)
    base: Optional[BaseModelType] = Field(description="The base model.", default=None)
+    type: Optional[ModelType] = Field(description="Type of model", default=None)
+    key: Optional[str] = Field(description="Database ID for this model", default=None)
+    hash: Optional[str] = Field(description="hash of model file", default=None)
    trigger_phrases: Optional[set[str]] = Field(description="Set of trigger phrases for this model", default=None)
    default_settings: Optional[MainModelDefaultSettings | ControlAdapterDefaultSettings] = Field(
        description="Default settings for this model", default=None
--- a/invokeai/app/services/shared/sqlite/sqlite_util.py
+++ b/invokeai/app/services/shared/sqlite/sqlite_util.py
@ -16,6 +16,7 @@ from invokeai.app.services.shared.sqlite_migrator.migrations.migration_10 import
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_11 import build_migration_11
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_12 import build_migration_12
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_13 import build_migration_13
+from invokeai.app.services.shared.sqlite_migrator.migrations.migration_14 import build_migration_14
 from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_impl import SqliteMigrator


@ -49,6 +50,7 @@ def init_db(config: InvokeAIAppConfig, logger: Logger, image_files: ImageFileSto
    migrator.register_migration(build_migration_11(app_config=config, logger=logger))
    migrator.register_migration(build_migration_12(app_config=config))
    migrator.register_migration(build_migration_13())
+    migrator.register_migration(build_migration_14())
    migrator.run_migrations()

    return db
--- a/invokeai/app/services/shared/sqlite_migrator/migrations/migration_14.py
+++ b/invokeai/app/services/shared/sqlite_migrator/migrations/migration_14.py
@ -0,0 +1,61 @@
+import sqlite3
+
+from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_common import Migration
+
+
+class Migration14Callback:
+    def __call__(self, cursor: sqlite3.Cursor) -> None:
+        self._create_style_presets(cursor)
+
+    def _create_style_presets(self, cursor: sqlite3.Cursor) -> None:
+        """Create the table used to store style presets."""
+        tables = [
+            """--sql
+            CREATE TABLE IF NOT EXISTS style_presets (
+                id TEXT NOT NULL PRIMARY KEY,
+                name TEXT NOT NULL,
+                preset_data TEXT NOT NULL,
+                type TEXT NOT NULL DEFAULT "user",
+                created_at DATETIME NOT NULL DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW')),
+                -- Updated via trigger
+                updated_at DATETIME NOT NULL DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))
+            );
+            """
+        ]
+
+        # Add trigger for `updated_at`.
+        triggers = [
+            """--sql
+            CREATE TRIGGER IF NOT EXISTS style_presets
+            AFTER UPDATE
+            ON style_presets FOR EACH ROW
+            BEGIN
+                UPDATE style_presets SET updated_at = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW')
+                    WHERE id = old.id;
+            END;
+            """
+        ]
+
+        # Add indexes for searchable fields
+        indices = [
+            "CREATE INDEX IF NOT EXISTS idx_style_presets_name ON style_presets(name);",
+        ]
+
+        for stmt in tables + indices + triggers:
+            cursor.execute(stmt)
+
+
+def build_migration_14() -> Migration:
+    """
+    Build the migration from database version 13 to 14..
+
+    This migration does the following:
+    - Create the table used to store style presets.
+    """
+    migration_14 = Migration(
+        from_version=13,
+        to_version=14,
+        callback=Migration14Callback(),
+    )
+
+    return migration_14
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Anime.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Anime.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Architectural
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Architectural
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Concept
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Environment
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Environment
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Illustration.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Illustration.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Interior
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Interior
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Line
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Line
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Photography
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Product
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Product
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Sketch.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Sketch.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/Vehicles.png
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/Vehicles.png
--- a/invokeai/app/services/style_preset_images/default_style_preset_images/init.py
+++ b/invokeai/app/services/style_preset_images/default_style_preset_images/init.py
--- a/invokeai/app/services/style_preset_images/style_preset_images_base.py
+++ b/invokeai/app/services/style_preset_images/style_preset_images_base.py
@ -0,0 +1,33 @@
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from PIL.Image import Image as PILImageType
+
+
+class StylePresetImageFileStorageBase(ABC):
+    """Low-level service responsible for storing and retrieving image files."""
+
+    @abstractmethod
+    def get(self, style_preset_id: str) -> PILImageType:
+        """Retrieves a style preset image as PIL Image."""
+        pass
+
+    @abstractmethod
+    def get_path(self, style_preset_id: str) -> Path:
+        """Gets the internal path to a style preset image."""
+        pass
+
+    @abstractmethod
+    def get_url(self, style_preset_id: str) -> str | None:
+        """Gets the URL to fetch a style preset image."""
+        pass
+
+    @abstractmethod
+    def save(self, style_preset_id: str, image: PILImageType) -> None:
+        """Saves a style preset image."""
+        pass
+
+    @abstractmethod
+    def delete(self, style_preset_id: str) -> None:
+        """Deletes a style preset image."""
+        pass
--- a/invokeai/app/services/style_preset_images/style_preset_images_common.py
+++ b/invokeai/app/services/style_preset_images/style_preset_images_common.py
@ -0,0 +1,19 @@
+class StylePresetImageFileNotFoundException(Exception):
+    """Raised when an image file is not found in storage."""
+
+    def __init__(self, message: str = "Style preset image file not found"):
+        super().__init__(message)
+
+
+class StylePresetImageFileSaveException(Exception):
+    """Raised when an image cannot be saved."""
+
+    def __init__(self, message: str = "Style preset image file not saved"):
+        super().__init__(message)
+
+
+class StylePresetImageFileDeleteException(Exception):
+    """Raised when an image cannot be deleted."""
+
+    def __init__(self, message: str = "Style preset image file not deleted"):
+        super().__init__(message)
--- a/invokeai/app/services/style_preset_images/style_preset_images_disk.py
+++ b/invokeai/app/services/style_preset_images/style_preset_images_disk.py
@ -0,0 +1,88 @@
+from pathlib import Path
+
+from PIL import Image
+from PIL.Image import Image as PILImageType
+
+from invokeai.app.services.invoker import Invoker
+from invokeai.app.services.style_preset_images.style_preset_images_base import StylePresetImageFileStorageBase
+from invokeai.app.services.style_preset_images.style_preset_images_common import (
+    StylePresetImageFileDeleteException,
+    StylePresetImageFileNotFoundException,
+    StylePresetImageFileSaveException,
+)
+from invokeai.app.services.style_preset_records.style_preset_records_common import PresetType
+from invokeai.app.util.misc import uuid_string
+from invokeai.app.util.thumbnails import make_thumbnail
+
+
+class StylePresetImageFileStorageDisk(StylePresetImageFileStorageBase):
+    """Stores images on disk"""
+
+    def __init__(self, style_preset_images_folder: Path):
+        self._style_preset_images_folder = style_preset_images_folder
+        self._validate_storage_folders()
+
+    def start(self, invoker: Invoker) -> None:
+        self._invoker = invoker
+
+    def get(self, style_preset_id: str) -> PILImageType:
+        try:
+            path = self.get_path(style_preset_id)
+
+            return Image.open(path)
+        except FileNotFoundError as e:
+            raise StylePresetImageFileNotFoundException from e
+
+    def save(self, style_preset_id: str, image: PILImageType) -> None:
+        try:
+            self._validate_storage_folders()
+            image_path = self._style_preset_images_folder / (style_preset_id + ".webp")
+            thumbnail = make_thumbnail(image, 256)
+            thumbnail.save(image_path, format="webp")
+
+        except Exception as e:
+            raise StylePresetImageFileSaveException from e
+
+    def get_path(self, style_preset_id: str) -> Path:
+        style_preset = self._invoker.services.style_preset_records.get(style_preset_id)
+        if style_preset.type is PresetType.Default:
+            default_images_dir = Path(__file__).parent / Path("default_style_preset_images")
+            path = default_images_dir / (style_preset.name + ".png")
+        else:
+            path = self._style_preset_images_folder / (style_preset_id + ".webp")
+
+        return path
+
+    def get_url(self, style_preset_id: str) -> str | None:
+        path = self.get_path(style_preset_id)
+        if not self._validate_path(path):
+            return
+
+        url = self._invoker.services.urls.get_style_preset_image_url(style_preset_id)
+
+        # The image URL never changes, so we must add random query string to it to prevent caching
+        url += f"?{uuid_string()}"
+
+        return url
+
+    def delete(self, style_preset_id: str) -> None:
+        try:
+            path = self.get_path(style_preset_id)
+
+            if not self._validate_path(path):
+                raise StylePresetImageFileNotFoundException
+
+            path.unlink()
+
+        except StylePresetImageFileNotFoundException as e:
+            raise StylePresetImageFileNotFoundException from e
+        except Exception as e:
+            raise StylePresetImageFileDeleteException from e
+
+    def _validate_path(self, path: Path) -> bool:
+        """Validates the path given for an image."""
+        return path.exists()
+
+    def _validate_storage_folders(self) -> None:
+        """Checks if the required folders exist and create them if they don't"""
+        self._style_preset_images_folder.mkdir(parents=True, exist_ok=True)
--- a/invokeai/app/services/style_preset_records/init.py
+++ b/invokeai/app/services/style_preset_records/init.py
--- a/invokeai/app/services/style_preset_records/default_style_presets.json
+++ b/invokeai/app/services/style_preset_records/default_style_presets.json
@ -0,0 +1,146 @@
+[
+  {
+    "name": "Photography (General)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}. photography. f/2.8 macro photo, bokeh, photorealism",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Studio Lighting)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}, photography. f/8 photo. centered subject, studio lighting.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Landscape)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}, landscape photograph, f/12, lifelike,  highly detailed.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Portrait)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}. photography. portraiture. catch light in eyes. one flash. rembrandt lighting. Soft box. dark shadows. High contrast. 80mm lens. F2.8.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Photography (Black and White)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} photography. natural light. 80mm lens. F1.4. strong contrast, hard light. dark contrast. blurred background. black and white",
+      "negative_prompt": "painting, digital art. sketch, colour+"
+    }
+  },
+  {
+    "name": "Architectural Visualization",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt}. architectural photography, f/12, luxury, aesthetically pleasing form and function.",
+      "negative_prompt": "painting, digital art. sketch, blurry"
+    }
+  },
+  {
+    "name": "Concept Art (Fantasy)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "concept artwork of a {prompt}. (digital painterly art style)++, mythological, (textured 2d dry media brushpack)++, glazed brushstrokes, otherworldly. painting+, illustration+",
+      "negative_prompt": "photo. distorted, blurry, out of focus. sketch. (cgi, 3d.)++"
+    }
+  },
+  {
+    "name": "Concept Art (Sci-Fi)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "(concept art)++, {prompt}, (sleek futurism)++, (textured 2d dry media)++, metallic highlights, digital painting style",
+      "negative_prompt": "photo. distorted, blurry, out of focus. sketch. (cgi, 3d.)++"
+    }
+  },
+  {
+    "name": "Concept Art (Character)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "(character concept art)++, stylized painterly digital painting of {prompt}, (painterly, impasto. Dry brush.)++",
+      "negative_prompt": "photo. distorted, blurry, out of focus. sketch. (cgi, 3d.)++"
+    }
+  },
+  {
+    "name": "Concept Art (Painterly)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} oil painting. high contrast. impasto. sfumato. chiaroscuro. Palette knife.",
+      "negative_prompt": "photo. smooth. border. frame"
+    }
+  },
+  {
+    "name": "Environment Art",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} environment artwork, hyper-realistic digital painting style with cinematic composition, atmospheric, depth and detail, voluminous. textured dry brush 2d media",
+      "negative_prompt": "photo, distorted, blurry, out of focus. sketch."
+    }
+  },
+  {
+    "name": "Interior Design (Visualization)",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} interior design photo, gentle shadows, light mid-tones, dimension, mix of smooth and textured surfaces, focus on negative space and clean lines, focus",
+      "negative_prompt": "photo, distorted. sketch."
+    }
+  },
+  {
+    "name": "Product Rendering",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} high quality product photography, 3d rendering with key lighting, shallow depth of field, simple plain background, studio lighting.",
+      "negative_prompt": "blurry, sketch, messy, dirty. unfinished."
+    }
+  },
+  {
+    "name": "Sketch",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} black and white pencil drawing, off-center composition, cross-hatching for shadows, bold strokes, textured paper. sketch+++",
+      "negative_prompt": "blurry, photo, painting, color. messy, dirty. unfinished. frame, borders."
+    }
+  },
+  {
+    "name": "Line Art",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} Line art. bold outline. simplistic. white background. 2d",
+      "negative_prompt": "photo. digital art. greyscale. solid black. painting"
+    }
+  },
+  {
+    "name": "Anime",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} anime++, bold outline, cel-shaded coloring, shounen, seinen",
+      "negative_prompt": "(photo)+++. greyscale. solid black. painting"
+    }
+  },
+  {
+    "name": "Illustration",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "{prompt} illustration, bold linework, illustrative details, vector art style, flat coloring",
+      "negative_prompt": "(photo)+++. greyscale. painting, black and white."
+    }
+  },
+  {
+    "name": "Vehicles",
+    "type": "default",
+    "preset_data": {
+      "positive_prompt": "A weird futuristic normal auto, {prompt} elegant design, nice color, nice wheels",
+      "negative_prompt": "sketch. digital art. greyscale. painting"
+    }
+  }
+]
--- a/invokeai/app/services/style_preset_records/style_preset_records_base.py
+++ b/invokeai/app/services/style_preset_records/style_preset_records_base.py
@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+
+from invokeai.app.services.style_preset_records.style_preset_records_common import (
+    PresetType,
+    StylePresetChanges,
+    StylePresetRecordDTO,
+    StylePresetWithoutId,
+)
+
+
+class StylePresetRecordsStorageBase(ABC):
+    """Base class for style preset storage services."""
+
+    @abstractmethod
+    def get(self, style_preset_id: str) -> StylePresetRecordDTO:
+        """Get style preset by id."""
+        pass
+
+    @abstractmethod
+    def create(self, style_preset: StylePresetWithoutId) -> StylePresetRecordDTO:
+        """Creates a style preset."""
+        pass
+
+    @abstractmethod
+    def create_many(self, style_presets: list[StylePresetWithoutId]) -> None:
+        """Creates many style presets."""
+        pass
+
+    @abstractmethod
+    def update(self, style_preset_id: str, changes: StylePresetChanges) -> StylePresetRecordDTO:
+        """Updates a style preset."""
+        pass
+
+    @abstractmethod
+    def delete(self, style_preset_id: str) -> None:
+        """Deletes a style preset."""
+        pass
+
+    @abstractmethod
+    def get_many(self, type: PresetType | None = None) -> list[StylePresetRecordDTO]:
+        """Gets many workflows."""
+        pass
--- a/invokeai/app/services/style_preset_records/style_preset_records_common.py
+++ b/invokeai/app/services/style_preset_records/style_preset_records_common.py
@ -0,0 +1,138 @@
+import codecs
+import csv
+import json
+from enum import Enum
+from typing import Any, Optional
+
+import pydantic
+from fastapi import UploadFile
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field, TypeAdapter
+
+from invokeai.app.util.metaenum import MetaEnum
+
+
+class StylePresetNotFoundError(Exception):
+    """Raised when a style preset is not found"""
+
+
+class PresetData(BaseModel, extra="forbid"):
+    positive_prompt: str = Field(description="Positive prompt")
+    negative_prompt: str = Field(description="Negative prompt")
+
+
+PresetDataValidator = TypeAdapter(PresetData)
+
+
+class PresetType(str, Enum, metaclass=MetaEnum):
+    User = "user"
+    Default = "default"
+    Project = "project"
+
+
+class StylePresetChanges(BaseModel, extra="forbid"):
+    name: Optional[str] = Field(default=None, description="The style preset's new name.")
+    preset_data: Optional[PresetData] = Field(default=None, description="The updated data for style preset.")
+
+
+class StylePresetWithoutId(BaseModel):
+    name: str = Field(description="The name of the style preset.")
+    preset_data: PresetData = Field(description="The preset data")
+    type: PresetType = Field(description="The type of style preset")
+
+
+class StylePresetRecordDTO(StylePresetWithoutId):
+    id: str = Field(description="The style preset ID.")
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "StylePresetRecordDTO":
+        data["preset_data"] = PresetDataValidator.validate_json(data.get("preset_data", ""))
+        return StylePresetRecordDTOValidator.validate_python(data)
+
+
+StylePresetRecordDTOValidator = TypeAdapter(StylePresetRecordDTO)
+
+
+class StylePresetRecordWithImage(StylePresetRecordDTO):
+    image: Optional[str] = Field(description="The path for image")
+
+
+class StylePresetImportRow(BaseModel):
+    name: str = Field(min_length=1, description="The name of the preset.")
+    positive_prompt: str = Field(
+        default="",
+        description="The positive prompt for the preset.",
+        validation_alias=AliasChoices("positive_prompt", "prompt"),
+    )
+    negative_prompt: str = Field(default="", description="The negative prompt for the preset.")
+
+    model_config = ConfigDict(str_strip_whitespace=True, extra="forbid")
+
+
+StylePresetImportList = list[StylePresetImportRow]
+StylePresetImportListTypeAdapter = TypeAdapter(StylePresetImportList)
+
+
+class UnsupportedFileTypeError(ValueError):
+    """Raised when an unsupported file type is encountered"""
+
+    pass
+
+
+class InvalidPresetImportDataError(ValueError):
+    """Raised when invalid preset import data is encountered"""
+
+    pass
+
+
+async def parse_presets_from_file(file: UploadFile) -> list[StylePresetWithoutId]:
+    """Parses style presets from a file. The file must be a CSV or JSON file.
+
+    If CSV, the file must have the following columns:
+    - name
+    - prompt (or positive_prompt)
+    - negative_prompt
+
+    If JSON, the file must be a list of objects with the following keys:
+    - name
+    - prompt (or positive_prompt)
+    - negative_prompt
+
+    Args:
+        file (UploadFile): The file to parse.
+
+    Returns:
+        list[StylePresetWithoutId]: The parsed style presets.
+
+    Raises:
+        UnsupportedFileTypeError: If the file type is not supported.
+        InvalidPresetImportDataError: If the data in the file is invalid.
+    """
+    if file.content_type not in ["text/csv", "application/json"]:
+        raise UnsupportedFileTypeError()
+
+    if file.content_type == "text/csv":
+        csv_reader = csv.DictReader(codecs.iterdecode(file.file, "utf-8"))
+        data = list(csv_reader)
+    else:  # file.content_type == "application/json":
+        json_data = await file.read()
+        data = json.loads(json_data)
+
+    try:
+        imported_presets = StylePresetImportListTypeAdapter.validate_python(data)
+
+        style_presets: list[StylePresetWithoutId] = []
+
+        for imported in imported_presets:
+            preset_data = PresetData(positive_prompt=imported.positive_prompt, negative_prompt=imported.negative_prompt)
+            style_preset = StylePresetWithoutId(name=imported.name, preset_data=preset_data, type=PresetType.User)
+            style_presets.append(style_preset)
+    except pydantic.ValidationError as e:
+        if file.content_type == "text/csv":
+            msg = "Invalid CSV format: must include columns 'name', 'prompt', and 'negative_prompt' and name cannot be blank"
+        else:  # file.content_type == "application/json":
+            msg = "Invalid JSON format: must be a list of objects with keys 'name', 'prompt', and 'negative_prompt' and name cannot be blank"
+        raise InvalidPresetImportDataError(msg) from e
+    finally:
+        file.file.close()
+
+    return style_presets
--- a/invokeai/app/services/style_preset_records/style_preset_records_sqlite.py
+++ b/invokeai/app/services/style_preset_records/style_preset_records_sqlite.py
@ -0,0 +1,215 @@
+import json
+from pathlib import Path
+
+from invokeai.app.services.invoker import Invoker
+from invokeai.app.services.shared.sqlite.sqlite_database import SqliteDatabase
+from invokeai.app.services.style_preset_records.style_preset_records_base import StylePresetRecordsStorageBase
+from invokeai.app.services.style_preset_records.style_preset_records_common import (
+    PresetType,
+    StylePresetChanges,
+    StylePresetNotFoundError,
+    StylePresetRecordDTO,
+    StylePresetWithoutId,
+)
+from invokeai.app.util.misc import uuid_string
+
+
+class SqliteStylePresetRecordsStorage(StylePresetRecordsStorageBase):
+    def __init__(self, db: SqliteDatabase) -> None:
+        super().__init__()
+        self._lock = db.lock
+        self._conn = db.conn
+        self._cursor = self._conn.cursor()
+
+    def start(self, invoker: Invoker) -> None:
+        self._invoker = invoker
+        self._sync_default_style_presets()
+
+    def get(self, style_preset_id: str) -> StylePresetRecordDTO:
+        """Gets a style preset by ID."""
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                SELECT *
+                FROM style_presets
+                WHERE id = ?;
+                """,
+                (style_preset_id,),
+            )
+            row = self._cursor.fetchone()
+            if row is None:
+                raise StylePresetNotFoundError(f"Style preset with id {style_preset_id} not found")
+            return StylePresetRecordDTO.from_dict(dict(row))
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+
+    def create(self, style_preset: StylePresetWithoutId) -> StylePresetRecordDTO:
+        style_preset_id = uuid_string()
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                INSERT OR IGNORE INTO style_presets (
+                    id,
+                    name,
+                    preset_data,
+                    type
+                )
+                VALUES (?, ?, ?, ?);
+                """,
+                (
+                    style_preset_id,
+                    style_preset.name,
+                    style_preset.preset_data.model_dump_json(),
+                    style_preset.type,
+                ),
+            )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        return self.get(style_preset_id)
+
+    def create_many(self, style_presets: list[StylePresetWithoutId]) -> None:
+        style_preset_ids = []
+        try:
+            self._lock.acquire()
+            for style_preset in style_presets:
+                style_preset_id = uuid_string()
+                style_preset_ids.append(style_preset_id)
+                self._cursor.execute(
+                    """--sql
+                    INSERT OR IGNORE INTO style_presets (
+                        id,
+                        name,
+                        preset_data,
+                        type
+                    )
+                    VALUES (?, ?, ?, ?);
+                    """,
+                    (
+                        style_preset_id,
+                        style_preset.name,
+                        style_preset.preset_data.model_dump_json(),
+                        style_preset.type,
+                    ),
+                )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+
+        return None
+
+    def update(self, style_preset_id: str, changes: StylePresetChanges) -> StylePresetRecordDTO:
+        try:
+            self._lock.acquire()
+            # Change the name of a style preset
+            if changes.name is not None:
+                self._cursor.execute(
+                    """--sql
+                    UPDATE style_presets
+                    SET name = ?
+                    WHERE id = ?;
+                    """,
+                    (changes.name, style_preset_id),
+                )
+
+            # Change the preset data for a style preset
+            if changes.preset_data is not None:
+                self._cursor.execute(
+                    """--sql
+                    UPDATE style_presets
+                    SET preset_data = ?
+                    WHERE id = ?;
+                    """,
+                    (changes.preset_data.model_dump_json(), style_preset_id),
+                )
+
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        return self.get(style_preset_id)
+
+    def delete(self, style_preset_id: str) -> None:
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                DELETE from style_presets
+                WHERE id = ?;
+                """,
+                (style_preset_id,),
+            )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        return None
+
+    def get_many(self, type: PresetType | None = None) -> list[StylePresetRecordDTO]:
+        try:
+            self._lock.acquire()
+            main_query = """
+                SELECT
+                    *
+                FROM style_presets
+                """
+
+            if type is not None:
+                main_query += "WHERE type = ? "
+
+            main_query += "ORDER BY LOWER(name) ASC"
+
+            if type is not None:
+                self._cursor.execute(main_query, (type,))
+            else:
+                self._cursor.execute(main_query)
+
+            rows = self._cursor.fetchall()
+            style_presets = [StylePresetRecordDTO.from_dict(dict(row)) for row in rows]
+
+            return style_presets
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+
+    def _sync_default_style_presets(self) -> None:
+        """Syncs default style presets to the database. Internal use only."""
+
+        # First delete all existing default style presets
+        try:
+            self._lock.acquire()
+            self._cursor.execute(
+                """--sql
+                DELETE FROM style_presets
+                WHERE type = "default";
+                """
+            )
+            self._conn.commit()
+        except Exception:
+            self._conn.rollback()
+            raise
+        finally:
+            self._lock.release()
+        # Next, parse and create the default style presets
+        with self._lock, open(Path(__file__).parent / Path("default_style_presets.json"), "r") as file:
+            presets = json.load(file)
+            for preset in presets:
+                style_preset = StylePresetWithoutId.model_validate(preset)
+                self.create(style_preset)
--- a/invokeai/app/services/urls/urls_base.py
+++ b/invokeai/app/services/urls/urls_base.py
@ -13,3 +13,8 @@ class UrlServiceBase(ABC):
    def get_model_image_url(self, model_key: str) -> str:
        """Gets the URL for a model image"""
        pass
+
+    @abstractmethod
+    def get_style_preset_image_url(self, style_preset_id: str) -> str:
+        """Gets the URL for a style preset image"""
+        pass
--- a/invokeai/app/services/urls/urls_default.py
+++ b/invokeai/app/services/urls/urls_default.py
@ -19,3 +19,6 @@ class LocalUrlService(UrlServiceBase):

    def get_model_image_url(self, model_key: str) -> str:
        return f"{self._base_url_v2}/models/i/{model_key}/image"
+
+    def get_style_preset_image_url(self, style_preset_id: str) -> str:
+        return f"{self._base_url}/style_presets/i/{style_preset_id}/image"
--- a/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
+++ b/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
--- a/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
+++ b/invokeai/app/services/workflow_records/default_workflows/MultiDiffusion
--- a/invokeai/app/util/custom_openapi.py
+++ b/invokeai/app/util/custom_openapi.py
@ -81,7 +81,7 @@ def get_openapi_func(
        # Add the output map to the schema
        openapi_schema["components"]["schemas"]["InvocationOutputMap"] = {
            "type": "object",
-            "properties": invocation_output_map_properties,
+            "properties": dict(sorted(invocation_output_map_properties.items())),
            "required": invocation_output_map_required,
        }

--- a/invokeai/backend/assets/sd_base_conf_files/controlnet_sd15/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/controlnet_sd15/config.json
@ -0,0 +1,42 @@
+{
+  "_class_name": "ControlNetModel",
+  "_diffusers_version": "0.16.0.dev0",
+  "_name_or_path": "/home/patrick/controlnet_v1_1/control_v11p_sd15_canny",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "class_embed_type": null,
+  "conditioning_embedding_out_channels": [
+    16,
+    32,
+    96,
+    256
+  ],
+  "controlnet_conditioning_channel_order": "rgb",
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "projection_class_embeddings_input_dim": null,
+  "resnet_time_scale_shift": "default",
+  "upcast_attention": false,
+  "use_linear_projection": false
+}
--- a/invokeai/backend/assets/sd_base_conf_files/controlnet_sdxl/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/controlnet_sdxl/config.json
@ -0,0 +1,56 @@
+{
+  "_class_name": "ControlNetModel",
+  "_diffusers_version": "0.19.3",
+  "act_fn": "silu",
+  "addition_embed_type": "text_time",
+  "addition_embed_type_num_heads": 64,
+  "addition_time_embed_dim": 256,
+  "attention_head_dim": [
+    5,
+    10,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280
+  ],
+  "class_embed_type": null,
+  "conditioning_channels": 3,
+  "conditioning_embedding_out_channels": [
+    16,
+    32,
+    96,
+    256
+  ],
+  "controlnet_conditioning_channel_order": "rgb",
+  "cross_attention_dim": 2048,
+  "down_block_types": [
+    "DownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "global_pool_conditions": false,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "projection_class_embeddings_input_dim": 2816,
+  "resnet_time_scale_shift": "default",
+  "transformer_layers_per_block": [
+    1,
+    2,
+    10
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": true
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/feature_extractor/preprocessor_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/feature_extractor/preprocessor_config.json
@ -0,0 +1,20 @@
+{
+  "crop_size": 224,
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "size": 224
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/model_index.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/model_index.json
@ -0,0 +1,32 @@
+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.6.0",
+  "feature_extractor": [
+    "transformers",
+    "CLIPImageProcessor"
+  ],
+  "safety_checker": [
+    "stable_diffusion",
+    "StableDiffusionSafetyChecker"
+  ],
+  "scheduler": [
+    "diffusers",
+    "PNDMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/safety_checker/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/safety_checker/config.json
@ -0,0 +1,175 @@
+{
+  "_commit_hash": "4bb648a606ef040e7685bde262611766a5fdd67b",
+  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
+  "architectures": [
+    "StableDiffusionSafetyChecker"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 768,
+  "text_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 77,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.22.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vocab_size": 49408
+  },
+  "text_config_dict": {
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12
+  },
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 14,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.22.0.dev0",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "vision_config_dict": {
+    "hidden_size": 1024,
+    "intermediate_size": 4096,
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "patch_size": 14
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/scheduler/scheduler_config.json
@ -0,0 +1,13 @@
+{
+  "_class_name": "PNDMScheduler",
+  "_diffusers_version": "0.6.0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "num_train_timesteps": 1000,
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "trained_betas": null,
+  "clip_sample": false
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/text_encoder/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/text_encoder/config.json
@ -0,0 +1,25 @@
+{
+  "_name_or_path": "openai/clip-vit-large-patch14",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float32",
+  "transformers_version": "4.22.0.dev0",
+  "vocab_size": 49408
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/merges.txt
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/merges.txt
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/special_tokens_map.json
@ -0,0 +1,24 @@
+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/tokenizer_config.json
@ -0,0 +1,34 @@
+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 77,
+  "name_or_path": "openai/clip-vit-large-patch14",
+  "pad_token": "<|endoftext|>",
+  "special_tokens_map_file": "./special_tokens_map.json",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/vocab.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/tokenizer/vocab.json
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/unet/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/unet/config.json
@ -0,0 +1,36 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.6.0",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/vae/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-epsilon/vae/config.json
@ -0,0 +1,29 @@
+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.6.0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/feature_extractor/preprocessor_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/feature_extractor/preprocessor_config.json
@ -0,0 +1,28 @@
+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPFeatureExtractor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/model_index.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/model_index.json
@ -0,0 +1,33 @@
+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.18.0.dev0",
+  "feature_extractor": [
+    "transformers",
+    "CLIPFeatureExtractor"
+  ],
+  "requires_safety_checker": true,
+  "safety_checker": [
+    "stable_diffusion",
+    "StableDiffusionSafetyChecker"
+  ],
+  "scheduler": [
+    "diffusers",
+    "DPMSolverMultistepScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/safety_checker/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/safety_checker/config.json
@ -0,0 +1,168 @@
+{
+  "_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
+  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
+  "architectures": [
+    "StableDiffusionSafetyChecker"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 768,
+  "text_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 77,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 512,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.30.2",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vocab_size": 49408
+  },
+  "torch_dtype": "float16",
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 14,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 512,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.30.2",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/scheduler/scheduler_config.json
@ -0,0 +1,26 @@
+{
+  "_class_name": "DPMSolverMultistepScheduler",
+  "_diffusers_version": "0.18.0.dev0",
+  "algorithm_type": "dpmsolver++",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "clip_sample_range": 1.0,
+  "dynamic_thresholding_ratio": 0.995,
+  "lambda_min_clipped": -Infinity,
+  "lower_order_final": true,
+  "num_train_timesteps": 1000,
+  "prediction_type": "v_prediction",
+  "rescale_betas_zero_snr": false,
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": false,
+  "solver_order": 2,
+  "solver_type": "midpoint",
+  "steps_offset": 1,
+  "thresholding": false,
+  "timestep_spacing": "leading",
+  "trained_betas": null,
+  "use_karras_sigmas": false,
+  "variance_type": null
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/text_encoder/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/text_encoder/config.json
@ -0,0 +1,25 @@
+{
+  "_name_or_path": "openai/clip-vit-large-patch14",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float16",
+  "transformers_version": "4.30.2",
+  "vocab_size": 49408
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/merges.txt
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/merges.txt
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/special_tokens_map.json
@ -0,0 +1,24 @@
+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/tokenizer_config.json
@ -0,0 +1,33 @@
+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 77,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/vocab.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/tokenizer/vocab.json
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/unet/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/unet/config.json
@ -0,0 +1,62 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.18.0.dev0",
+  "act_fn": "silu",
+  "addition_embed_type": null,
+  "addition_embed_type_num_heads": 64,
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "class_embeddings_concat": false,
+  "conv_in_kernel": 3,
+  "conv_out_kernel": 3,
+  "cross_attention_dim": 768,
+  "cross_attention_norm": null,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "encoder_hid_dim": null,
+  "encoder_hid_dim_type": null,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_only_cross_attention": null,
+  "mid_block_scale_factor": 1,
+  "mid_block_type": "UNetMidBlock2DCrossAttn",
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_attention_heads": null,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": null,
+  "resnet_out_scale_factor": 1.0,
+  "resnet_skip_time_act": false,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 96,
+  "time_cond_proj_dim": null,
+  "time_embedding_act_fn": null,
+  "time_embedding_dim": null,
+  "time_embedding_type": "positional",
+  "timestep_post_act": null,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "upcast_attention": null,
+  "use_linear_projection": false
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/vae/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-1.5-v_prediction/vae/config.json
@ -0,0 +1,30 @@
+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.18.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 768,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/feature_extractor/preprocessor_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/feature_extractor/preprocessor_config.json
@ -0,0 +1,20 @@
+{
+  "crop_size": 224,
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "size": 224
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/model_index.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/model_index.json
@ -0,0 +1,33 @@
+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.8.0",
+  "feature_extractor": [
+    "transformers",
+    "CLIPImageProcessor"
+  ],
+  "requires_safety_checker": false,
+  "safety_checker": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "DDIMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/scheduler/scheduler_config.json
@ -0,0 +1,14 @@
+{
+  "_class_name": "DDIMScheduler",
+  "_diffusers_version": "0.8.0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "num_train_timesteps": 1000,
+  "prediction_type": "v_prediction",
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "trained_betas": null
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/text_encoder/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/text_encoder/config.json
@ -0,0 +1,25 @@
+{
+  "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_size": 1024,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 23,
+  "pad_token_id": 1,
+  "projection_dim": 512,
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.0.dev0",
+  "vocab_size": 49408
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/merges.txt
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/merges.txt
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/special_tokens_map.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/special_tokens_map.json
@ -0,0 +1,24 @@
+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "!",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/tokenizer_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/tokenizer_config.json
@ -0,0 +1,34 @@
+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 77,
+  "name_or_path": "hf-models/stable-diffusion-v2-768x768/tokenizer",
+  "pad_token": "<|endoftext|>",
+  "special_tokens_map_file": "./special_tokens_map.json",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/vocab.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/tokenizer/vocab.json
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/unet/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/unet/config.json
@ -0,0 +1,46 @@
+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.10.0.dev0",
+  "act_fn": "silu",
+  "attention_head_dim": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 1024,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "sample_size": 96,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "use_linear_projection": true,
+  "upcast_attention": true
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/vae/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-2.0-v_prediction/vae/config.json
@ -0,0 +1,30 @@
+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.8.0",
+  "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 768,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/model_index.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/model_index.json
@ -0,0 +1,34 @@
+{
+  "_class_name": "StableDiffusionXLPipeline",
+  "_diffusers_version": "0.19.0.dev0",
+  "force_zeros_for_empty_prompt": true,
+  "add_watermarker": null,
+  "scheduler": [
+    "diffusers",
+    "EulerDiscreteScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "text_encoder_2": [
+    "transformers",
+    "CLIPTextModelWithProjection"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "tokenizer_2": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/scheduler/scheduler_config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/scheduler/scheduler_config.json
@ -0,0 +1,18 @@
+{
+  "_class_name": "EulerDiscreteScheduler",
+  "_diffusers_version": "0.19.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "interpolation_type": "linear",
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "timestep_spacing": "leading",
+  "trained_betas": null,
+  "use_karras_sigmas": false
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder/config.json
@ -0,0 +1,24 @@
+{
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float16",
+  "transformers_version": "4.32.0.dev0",
+  "vocab_size": 49408
+}
--- a/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder_2/config.json
+++ b/invokeai/backend/assets/sd_base_conf_files/stable-diffusion-xl-base-1.0/text_encoder_2/config.json
@ -0,0 +1,24 @@
+{
+  "architectures": [
+    "CLIPTextModelWithProjection"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 20,
+  "num_hidden_layers": 32,
+  "pad_token_id": 1,
+  "projection_dim": 1280,
+  "torch_dtype": "float16",
+  "transformers_version": "4.32.0.dev0",
+  "vocab_size": 49408
+}
--- a/Show More
+++ b/Show More