from abc import ABC, abstractmethod from enum import Enum import enum import sqlite3 import threading from typing import Optional, Type, TypeVar, Union from PIL.Image import Image as PILImage from pydantic import BaseModel, Field from torch import Tensor from invokeai.app.services.item_storage import PaginatedResults """ Substantial proposed changes to the management of images and tensor. tl;dr: With the upcoming move to latents-only nodes, we need to handle metadata differently. After struggling with this unsuccessfully - trying to smoosh it in to the existing setup - I believe we need to expand the scope of the refactor to include the management of images and latents - and make `latents` a special case of `tensor`. full story: The consensus for tensor-only nodes' metadata was to traverse the execution graph and grab the core parameters to write to the image. This was straightforward, and I've written functions to find the nearest t2l/l2l, noise, and compel nodes and build the metadata from those. But struggling to integrate this and the associated edge cases this brought up a number of issues deeper in the system (some of which I had previously implemented). The ImageStorageService is doing way too much, and we have a need to be able to retrieve sessions the session given image/latents id, which is not currently feasible due to SQLite's JSON parsing performance. I made a new ResultsService and `results` table in the db to facilitate this. This first attempt failed because it doesn't handle uploads and leaves the codebase messy. So I've spent the day trying to figure out to handle this in a sane way and think I've got something decent. I've described some changes to service bases and the database below. The gist of it is to store the core parameters for an image in its metadata when the image is saved, but never to read from it. Instead, the same metadata is stored in the database, which will be set up for efficient access. So when a page of images is requested, the metadata comes from the db instead of a filesystem operation. The URL generation responsibilities have been split off the image storage service in to a URL service. New database services/tables for images and tensor are added. These services will provide paginated images/tensors for the API to serve. This also paves the way for handling tensors as first-class outputs. """ # TODO: Make a new model for this class ResourceOrigin(str, Enum): """The origin of a resource (eg image or tensor).""" RESULTS = "results" UPLOADS = "uploads" INTERMEDIATES = "intermediates" class ImageKind(str, Enum): """The kind of an image.""" IMAGE = "image" CONTROL_IMAGE = "control_image" class TensorKind(str, Enum): """The kind of a tensor.""" IMAGE_TENSOR = "tensor" CONDITIONING = "conditioning" """ Core Generation Metadata Pydantic Model I've already implemented the code to traverse a session to build this object. """ class CoreGenerationMetadata(BaseModel): """Core generation metadata for an image/tensor generated in InvokeAI. Generated by traversing the execution graph, collecting the parameters of the nearest ancestors of a given node. Full metadata may be accessed by querying for the session in the `graph_executions` table. """ positive_conditioning: Optional[str] = Field( description="The positive conditioning." ) negative_conditioning: Optional[str] = Field( description="The negative conditioning." ) width: Optional[int] = Field(description="Width of the image/tensor in pixels.") height: Optional[int] = Field(description="Height of the image/tensor in pixels.") seed: Optional[int] = Field(description="The seed used for noise generation.") cfg_scale: Optional[float] = Field( description="The classifier-free guidance scale." ) steps: Optional[int] = Field(description="The number of steps used for inference.") scheduler: Optional[str] = Field(description="The scheduler used for inference.") model: Optional[str] = Field(description="The model used for inference.") strength: Optional[float] = Field( description="The strength used for image-to-image/tensor-to-tensor." ) image: Optional[str] = Field(description="The ID of the initial image.") tensor: Optional[str] = Field(description="The ID of the initial tensor.") # Pending model refactor: # vae: Optional[str] = Field(description="The VAE used for decoding.") # unet: Optional[str] = Field(description="The UNet used dor inference.") # clip: Optional[str] = Field(description="The CLIP Encoder used for conditioning.") """ Minimal Uploads Metadata Model """ class UploadsMetadata(BaseModel): """Limited metadata for an uploaded image/tensor.""" width: Optional[int] = Field(description="Width of the image/tensor in pixels.") height: Optional[int] = Field(description="Height of the image/tensor in pixels.") # The extra field will be the contents of the PNG file's tEXt chunk. It may have come # from another SD application or InvokeAI, so we need to make it very flexible. I think it's # best to just store it as a string and let the frontend parse it. # If the upload is a tensor type, this will be omitted. extra: Optional[str] = Field( description="Extra metadata, extracted from the PNG tEXt chunk." ) """ Slimmed-down Image Storage Service Base - No longer lists images or generates URLs - only stores and retrieves images. - OSS implementation for disk storage """ class ImageStorageBase(ABC): """Responsible for storing and retrieving images.""" @abstractmethod def save( self, image: PILImage, image_kind: ImageKind, origin: ResourceOrigin, context_id: str, node_id: str, metadata: CoreGenerationMetadata, ) -> str: """Saves an image and its thumbnail, returning its unique identifier.""" pass @abstractmethod def get(self, id: str, thumbnail: bool = False) -> Union[PILImage, None]: """Retrieves an image as a PIL Image.""" pass @abstractmethod def delete(self, id: str) -> None: """Deletes an image.""" pass class TensorStorageBase(ABC): """Responsible for storing and retrieving tensors.""" @abstractmethod def save( self, tensor: Tensor, tensor_kind: TensorKind, origin: ResourceOrigin, context_id: str, node_id: str, metadata: CoreGenerationMetadata, ) -> str: """Saves a tensor, returning its unique identifier.""" pass @abstractmethod def get(self, id: str, thumbnail: bool = False) -> Union[Tensor, None]: """Retrieves a tensor as a torch Tensor.""" pass @abstractmethod def delete(self, id: str) -> None: """Deletes a tensor.""" pass """ New Url Service Base - Abstracts the logic for generating URLs out of the storage service - OSS implementation for locally-hosted URLs - Also provides a method to get the internal path to a resource (for OSS, the FS path) """ class ResourceLocationServiceBase(ABC): """Responsible for locating resources (eg images or tensors).""" @abstractmethod def get_url(self, id: str) -> str: """Gets the URL for a resource.""" pass @abstractmethod def get_path(self, id: str) -> str: """Gets the path for a resource.""" pass """ New Images Database Service Base This is a new service that will be responsible for the new `images` table(s): - Storing images in the table - Retrieving individual images and pages of images - Deleting individual images Operations will typically use joins with the various `images` tables. """ class ImagesDbServiceBase(ABC): """Responsible for interfacing with `images` table.""" class GeneratedImageEntity(BaseModel): id: str = Field(description="The unique identifier for the image.") session_id: str = Field(description="The session ID.") node_id: str = Field(description="The node ID.") metadata: CoreGenerationMetadata = Field( description="The metadata for the image." ) class UploadedImageEntity(BaseModel): id: str = Field(description="The unique identifier for the image.") metadata: UploadsMetadata = Field(description="The metadata for the image.") @abstractmethod def get(self, id: str) -> Union[GeneratedImageEntity, UploadedImageEntity, None]: """Gets an image from the `images` table.""" pass @abstractmethod def get_many( self, image_kind: ImageKind, page: int = 0, per_page: int = 10 ) -> PaginatedResults[Union[GeneratedImageEntity, UploadedImageEntity]]: """Gets a page of images from the `images` table.""" pass @abstractmethod def delete(self, id: str) -> None: """Deletes an image from the `images` table.""" pass @abstractmethod def set( self, id: str, image_kind: ImageKind, session_id: Optional[str], node_id: Optional[str], metadata: CoreGenerationMetadata | UploadsMetadata, ) -> None: """Sets an image in the `images` table.""" pass """ New Tensor Database Service Base This is a new service that will be responsible for the new `tensor` table: - Storing tensor in the table - Retrieving individual tensor and pages of tensor - Deleting individual tensor Operations will always use joins with the `tensor_metadata` table. """ class TensorDbServiceBase(ABC): """Responsible for interfacing with `tensor` table.""" class GeneratedTensorEntity(BaseModel): id: str = Field(description="The unique identifier for the tensor.") session_id: str = Field(description="The session ID.") node_id: str = Field(description="The node ID.") metadata: CoreGenerationMetadata = Field( description="The metadata for the tensor." ) class UploadedTensorEntity(BaseModel): id: str = Field(description="The unique identifier for the tensor.") metadata: UploadsMetadata = Field(description="The metadata for the tensor.") @abstractmethod def get(self, id: str) -> Union[GeneratedTensorEntity, UploadedTensorEntity, None]: """Gets a tensor from the `tensor` table.""" pass @abstractmethod def get_many( self, tensor_kind: TensorKind, page: int = 0, per_page: int = 10 ) -> PaginatedResults[Union[GeneratedTensorEntity, UploadedTensorEntity]]: """Gets a page of tensor from the `tensor` table.""" pass @abstractmethod def delete(self, id: str) -> None: """Deletes a tensor from the `tensor` table.""" pass @abstractmethod def set( self, id: str, tensor_kind: TensorKind, session_id: Optional[str], node_id: Optional[str], metadata: CoreGenerationMetadata | UploadsMetadata, ) -> None: """Sets a tensor in the `tensor` table.""" pass """ Database Changes The existing tables will remain as-is, new tables will be added. Tensor now also have the same types as images - `results`, `intermediates`, `uploads`. Storage, retrieval, and operations may diverge from images in the future, so they are managed separately. A few `images` tables are created to store all images: - `results` and `intermediates` images have additional data: `session_id` and `node_id`, and may be further differentiated in the future. For this reason, they each get their own table. - `uploads` do not get their own table, as they are never going to have more than an `id`, `image_kind` and `timestamp`. - `images_metadata` holds the same image metadata that is written to the image. This table, along with the URL service, allow us to more efficiently serve images without having to read the image from storage. The same tables are made for `tensor` and for the moment, implementation is expected to be identical. Schemas for each table below. Insertions and updates of ancillary tables (e.g. `results_images`, `images_metadata`, etc) will need to be done manually in the services, but should be straightforward. Deletion via cascading will be handled by the database. """ def create_sql_values_string_from_string_enum(enum: Type[Enum]): """ Creates a string of the form "('value1'), ('value2'), ..., ('valueN')" from a StrEnum. """ delimiter = ", " values = [f"('{e.value}')" for e in enum] return delimiter.join(values) def create_sql_table_from_enum( enum: Type[Enum], table_name: str, primary_key_name: str, cursor: sqlite3.Cursor, lock: threading.Lock, ): """ Creates and populates a table to be used as a functional enum. """ try: lock.acquire() values_string = create_sql_values_string_from_string_enum(enum) cursor.execute( f"""--sql CREATE TABLE IF NOT EXISTS {table_name} ( {primary_key_name} TEXT PRIMARY KEY ); """ ) cursor.execute( f"""--sql INSERT OR IGNORE INTO {table_name} ({primary_key_name}) VALUES {values_string}; """ ) finally: lock.release() """ `resource_origins` functions as an enum for the ResourceOrigin model. """ def create_resource_origins_table(cursor: sqlite3.Cursor, lock: threading.Lock): create_sql_table_from_enum( enum=ResourceOrigin, table_name="resource_origins", primary_key_name="origin_name", cursor=cursor, lock=lock, ) """ `image_kinds` functions as an enum for the ImageType model. """ def create_image_kinds_table(cursor: sqlite3.Cursor, lock: threading.Lock): create_sql_table_from_enum( enum=ImageKind, table_name="image_kinds", primary_key_name="kind_name", cursor=cursor, lock=lock, ) """ `tensor_kinds` functions as an enum for the TensorType model. """ def create_tensor_kinds_table(cursor: sqlite3.Cursor, lock: threading.Lock): create_sql_table_from_enum( enum=TensorKind, table_name="tensor_kinds", primary_key_name="kind_name", cursor=cursor, lock=lock, ) """ `images` stores all images, regardless of type """ def create_images_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS images ( id TEXT PRIMARY KEY, origin TEXT, image_kind TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(origin) REFERENCES resource_origins(origin_name), FOREIGN KEY(image_kind) REFERENCES image_kinds(kind_name) ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_images_id ON images(id); """ ) cursor.execute( """--sql CREATE INDEX IF NOT EXISTS idx_images_origin ON images(origin); """ ) cursor.execute( """--sql CREATE INDEX IF NOT EXISTS idx_images_image_kind ON images(image_kind); """ ) finally: lock.release() """ `image_results` stores additional data specific to `results` images. """ def create_image_results_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS image_results ( images_id TEXT PRIMARY KEY, session_id TEXT NOT NULL, node_id TEXT NOT NULL, FOREIGN KEY(images_id) REFERENCES images(id) ON DELETE CASCADE ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_image_results_images_id ON image_results(id); """ ) finally: lock.release() """ `image_intermediates` stores additional data specific to `intermediates` images """ def create_image_intermediates_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS image_intermediates ( images_id TEXT PRIMARY KEY, session_id TEXT NOT NULL, node_id TEXT NOT NULL, FOREIGN KEY(images_id) REFERENCES images(id) ON DELETE CASCADE ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_image_intermediates_images_id ON image_intermediates(id); """ ) finally: lock.release() """ `images_metadata` stores basic metadata for any image type """ def create_images_metadata_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS images_metadata ( images_id TEXT PRIMARY KEY, metadata TEXT, FOREIGN KEY(images_id) REFERENCES images(id) ON DELETE CASCADE ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_images_metadata_images_id ON images_metadata(images_id); """ ) finally: lock.release() # `tensor` table: stores references to tensor def create_tensors_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS tensors ( id TEXT PRIMARY KEY, origin TEXT, tensor_kind TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY(origin) REFERENCES resource_origins(origin_name), FOREIGN KEY(tensor_kind) REFERENCES tensor_kinds(kind_name), ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_tensors_id ON tensors(id); """ ) cursor.execute( """--sql CREATE INDEX IF NOT EXISTS idx_tensors_origin ON tensors(origin); """ ) cursor.execute( """--sql CREATE INDEX IF NOT EXISTS idx_tensors_tensor_kind ON tensors(tensor_kind); """ ) finally: lock.release() # `results_tensor` stores additional data specific to `result` tensor def create_tensor_results_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS tensor_results ( tensor_id TEXT PRIMARY KEY, session_id TEXT NOT NULL, node_id TEXT NOT NULL, FOREIGN KEY(tensor_id) REFERENCES tensors(id) ON DELETE CASCADE ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_tensor_results_tensor_id ON tensor_results(tensor_id); """ ) finally: lock.release() # `tensor_intermediates` stores additional data specific to `intermediate` tensor def create_tensor_intermediates_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS tensor_intermediates ( tensor_id TEXT PRIMARY KEY, session_id TEXT NOT NULL, node_id TEXT NOT NULL, FOREIGN KEY(tensor_id) REFERENCES tensors(id) ON DELETE CASCADE ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_tensor_intermediates_tensor_id ON tensor_intermediates(tensor_id); """ ) finally: lock.release() # `tensors_metadata` table: stores generated/transformed metadata for tensor def create_tensors_metadata_table(cursor: sqlite3.Cursor, lock: threading.Lock): try: lock.acquire() cursor.execute( """--sql CREATE TABLE IF NOT EXISTS tensors_metadata ( tensor_id TEXT PRIMARY KEY, metadata TEXT, FOREIGN KEY(tensor_id) REFERENCES tensors(id) ON DELETE CASCADE ); """ ) cursor.execute( """--sql CREATE UNIQUE INDEX IF NOT EXISTS idx_tensors_metadata_tensor_id ON tensors_metadata(tensor_id); """ ) finally: lock.release()