From ff0a25bd9c800868656bf5a08b826aa68f75af48 Mon Sep 17 00:00:00 2001 From: skunkworxdark Date: Tue, 28 Nov 2023 12:07:29 +0000 Subject: [PATCH 01/30] Update communityNodes.md Added New Match Histogram node Updated XYGrid nodes and Prompt Tools nodes --- docs/nodes/communityNodes.md | 64 ++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/docs/nodes/communityNodes.md b/docs/nodes/communityNodes.md index 3879cdc3c3..6007b3338b 100644 --- a/docs/nodes/communityNodes.md +++ b/docs/nodes/communityNodes.md @@ -26,6 +26,7 @@ To use a community workflow, download the the `.json` node graph file and load i + [Image Picker](#image-picker) + [Load Video Frame](#load-video-frame) + [Make 3D](#make-3d) + + [Match Histogram](#match-histogram) + [Oobabooga](#oobabooga) + [Prompt Tools](#prompt-tools) + [Remote Image](#remote-image) @@ -208,6 +209,23 @@ This includes 15 Nodes: +-------------------------------- +### Match Histogram + +**Description:** An InvokeAI node to match a histogram from one image to another. This is a bit like the `color correct` node in the main InvokeAI but this works in the YCbCr colourspace and can handle images of different sizes. Also does not require a mask input. +- Option to only transfer luminance channel. +- Option to save output as grayscale + +A good use case for this node is to normalize the colors of an image that has been through the tiled scaling workflow of my XYGrid Nodes. + +See full docs here: https://github.com/skunkworxdark/Prompt-tools-nodes/edit/main/README.md + +**Node Link:** https://github.com/skunkworxdark/match_histogram + +**Output Examples** + + + -------------------------------- ### Oobabooga @@ -237,22 +255,30 @@ This node works best with SDXL models, especially as the style can be described -------------------------------- ### Prompt Tools -**Description:** A set of InvokeAI nodes that add general prompt manipulation tools. These were written to accompany the PromptsFromFile node and other prompt generation nodes. +**Description:** A set of InvokeAI nodes that add general prompt (string) manipulation tools. Designed to accompany the `Prompts From File` node and other prompt generation nodes. + +1. `Prompt To File` - saves a prompt or collection of prompts to a file. one per line. There is an append/overwrite option. +2. `PTFields Collect` - Converts image generation fields into a Json format string that can be passed to Prompt to file. +3. `PTFields Expand` - Takes Json string and converts it to individual generation parameters. This can be fed from the Prompts to file node. +4. `Prompt Strength` - Formats prompt with strength like the weighted format of compel +5. `Prompt Strength Combine` - Combines weighted prompts for .and()/.blend() +6. `CSV To Index String` - Gets a string from a CSV by index. Includes a Random index option + +The following Nodes are now included in v3.2 of Invoke and are nolonger in this set of tools.
+- `Prompt Join` -> `String Join` +- `Prompt Join Three` -> `String Join Three` +- `Prompt Replace` -> `String Replace` +- `Prompt Split Neg` -> `String Split Neg` -1. PromptJoin - Joins to prompts into one. -2. PromptReplace - performs a search and replace on a prompt. With the option of using regex. -3. PromptSplitNeg - splits a prompt into positive and negative using the old V2 method of [] for negative. -4. PromptToFile - saves a prompt or collection of prompts to a file. one per line. There is an append/overwrite option. -5. PTFieldsCollect - Converts image generation fields into a Json format string that can be passed to Prompt to file. -6. PTFieldsExpand - Takes Json string and converts it to individual generation parameters This can be fed from the Prompts to file node. -7. PromptJoinThree - Joins 3 prompt together. -8. PromptStrength - This take a string and float and outputs another string in the format of (string)strength like the weighted format of compel. -9. PromptStrengthCombine - This takes a collection of prompt strength strings and outputs a string in the .and() or .blend() format that can be fed into a proper prompt node. See full docs here: https://github.com/skunkworxdark/Prompt-tools-nodes/edit/main/README.md **Node Link:** https://github.com/skunkworxdark/Prompt-tools-nodes +**Workflow Examples** + + + -------------------------------- ### Remote Image @@ -339,15 +365,27 @@ Highlights/Midtones/Shadows (with LUT blur enabled): -------------------------------- ### XY Image to Grid and Images to Grids nodes -**Description:** Image to grid nodes and supporting tools. +**Description:** These nodes add the following to InvokeAI: +- Generate grids of images from multiple input images +- Create XY grid images with labels from parameters +- Split images into overlapping tiles for processing (for super-resolution workflows) +- Recombine image tiles into a single output image blending the seams -1. "Images To Grids" node - Takes a collection of images and creates a grid(s) of images. If there are more images than the size of a single grid then multiple grids will be created until it runs out of images. -2. "XYImage To Grid" node - Converts a collection of XYImages into a labeled Grid of images. The XYImages collection has to be built using the supporting nodes. See example node setups for more details. +The nodes include: +1. `Images To Grids` - Combine multiple images into a grid of images +2. `XYImage To Grid` - Take X & Y params and creates a labeled image grid. +3. `XYImage Tiles` - Super-resolution (embiggen) style tiled resizing +4. `Image Tot XYImages` - Takes an image and cuts it up into a number of columns and rows. +5. Multiple supporting nodes - Helper nodes for data wrangling and building `XYImage` collections See full docs here: https://github.com/skunkworxdark/XYGrid_nodes/edit/main/README.md **Node Link:** https://github.com/skunkworxdark/XYGrid_nodes +**Output Examples** + + + -------------------------------- ### Example Node Template From 09cb40786f4d82eef4458213c268d78d44d663e8 Mon Sep 17 00:00:00 2001 From: Millun Atluri Date: Wed, 29 Nov 2023 14:08:50 +1100 Subject: [PATCH 02/30] (fix) Update communityNodes.md installation instructions Update custom node instructions to be clearer --- docs/nodes/communityNodes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/nodes/communityNodes.md b/docs/nodes/communityNodes.md index 6007b3338b..f3b8af0425 100644 --- a/docs/nodes/communityNodes.md +++ b/docs/nodes/communityNodes.md @@ -8,7 +8,7 @@ To use a node, add the node to the `nodes` folder found in your InvokeAI install The suggested method is to use `git clone` to clone the repository the node is found in. This allows for easy updates of the node in the future. -If you'd prefer, you can also just download the `.py` file from the linked repository and add it to the `nodes` folder. +If you'd prefer, you can also just download the whole node folder from the linked repository and add it to the `nodes` folder. To use a community workflow, download the the `.json` node graph file and load it into Invoke AI via the **Load Workflow** button in the Workflow Editor. From 2a087bf16140efb1b3567085d3c43e2f61b8ae4b Mon Sep 17 00:00:00 2001 From: skunkworxdark Date: Wed, 29 Nov 2023 09:14:36 +0000 Subject: [PATCH 03/30] Update prompt.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use UTF-8 encoding on reading prompts from files to allow Unicode characters to load correctly. The following examples currently will not load correctly from a file: Hello, 世界! 😭🤮 💔 --- invokeai/app/invocations/prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/invocations/prompt.py b/invokeai/app/invocations/prompt.py index cb43a52447..4a7f69d5a5 100644 --- a/invokeai/app/invocations/prompt.py +++ b/invokeai/app/invocations/prompt.py @@ -82,7 +82,7 @@ class PromptsFromFileInvocation(BaseInvocation): end_line = start_line + max_prompts if max_prompts <= 0: end_line = np.iinfo(np.int32).max - with open(file_path) as f: + with open(file_path, encoding="utf-8") as f: for i, line in enumerate(f): if i >= start_line and i < end_line: prompts.append((pre_prompt or "") + line.strip() + (post_prompt or "")) From 77933a0a85b07d7298098f9c8c923a5902f7d601 Mon Sep 17 00:00:00 2001 From: skunkworxdark Date: Wed, 29 Nov 2023 10:53:22 +0000 Subject: [PATCH 04/30] Update prompt.py bumped version to 1.0.1 --- invokeai/app/invocations/prompt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/app/invocations/prompt.py b/invokeai/app/invocations/prompt.py index 4a7f69d5a5..4778d98077 100644 --- a/invokeai/app/invocations/prompt.py +++ b/invokeai/app/invocations/prompt.py @@ -44,7 +44,7 @@ class DynamicPromptInvocation(BaseInvocation): title="Prompts from File", tags=["prompt", "file"], category="prompt", - version="1.0.0", + version="1.0.1", ) class PromptsFromFileInvocation(BaseInvocation): """Loads prompts from a text file""" From 693c6cf5e4eea760ca8b7bbece794edbcc8c41fc Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 29 Nov 2023 15:10:45 -0500 Subject: [PATCH 05/30] Add support for IPAdapterFull models. The changes are based on this upstream PR: https://github.com/tencent-ailab/IP-Adapter/pull/139 . --- invokeai/backend/ip_adapter/ip_adapter.py | 57 ++++++++++++++++++--- tests/backend/ip_adapter/test_ip_adapter.py | 8 +++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py index 826112156d..9176bf1f49 100644 --- a/invokeai/backend/ip_adapter/ip_adapter.py +++ b/invokeai/backend/ip_adapter/ip_adapter.py @@ -54,6 +54,44 @@ class ImageProjModel(torch.nn.Module): return clip_extra_context_tokens +class MLPProjModel(torch.nn.Module): + """SD model with image prompt""" + + def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024): + super().__init__() + + self.proj = torch.nn.Sequential( + torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim), + torch.nn.GELU(), + torch.nn.Linear(clip_embeddings_dim, cross_attention_dim), + torch.nn.LayerNorm(cross_attention_dim), + ) + + @classmethod + def from_state_dict(cls, state_dict: dict[torch.Tensor]): + """Initialize an MLPProjModel from a state_dict. + + The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict. + + Args: + state_dict (dict[torch.Tensor]): The state_dict of model weights. + + Returns: + MLPProjModel + """ + cross_attention_dim = state_dict["proj.3.weight"].shape[0] + clip_embeddings_dim = state_dict["proj.0.weight"].shape[0] + + model = cls(cross_attention_dim, clip_embeddings_dim) + + model.load_state_dict(state_dict) + return model + + def forward(self, image_embeds): + clip_extra_context_tokens = self.proj(image_embeds) + return clip_extra_context_tokens + + class IPAdapter: """IP-Adapter: https://arxiv.org/pdf/2308.06721.pdf""" @@ -130,6 +168,13 @@ class IPAdapterPlus(IPAdapter): return image_prompt_embeds, uncond_image_prompt_embeds +class IPAdapterFull(IPAdapterPlus): + """IP-Adapter Plus with full features.""" + + def _init_image_proj_model(self, state_dict: dict[torch.Tensor]): + return MLPProjModel.from_state_dict(state_dict).to(self.device, dtype=self.dtype) + + class IPAdapterPlusXL(IPAdapterPlus): """IP-Adapter Plus for SDXL.""" @@ -149,11 +194,9 @@ def build_ip_adapter( ) -> Union[IPAdapter, IPAdapterPlus]: state_dict = torch.load(ip_adapter_ckpt_path, map_location="cpu") - # Determine if the state_dict is from an IPAdapter or IPAdapterPlus based on the image_proj weights that it - # contains. - is_plus = "proj.weight" not in state_dict["image_proj"] - - if is_plus: + if "proj.weight" in state_dict["image_proj"]: # IPAdapter (with ImageProjModel). + return IPAdapter(state_dict, device=device, dtype=dtype) + elif "proj_in.weight" in state_dict["image_proj"]: # IPAdaterPlus or IPAdapterPlusXL (with Resampler). cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1] if cross_attention_dim == 768: # SD1 IP-Adapter Plus @@ -163,5 +206,7 @@ def build_ip_adapter( return IPAdapterPlusXL(state_dict, device=device, dtype=dtype) else: raise Exception(f"Unsupported IP-Adapter Plus cross-attention dimension: {cross_attention_dim}.") + elif "proj.0.weight" in state_dict["image_proj"]: # IPAdapterFull (with MLPProjModel). + return IPAdapterFull(state_dict, device=device, dtype=dtype) else: - return IPAdapter(state_dict, device=device, dtype=dtype) + raise ValueError(f"'{ip_adapter_ckpt_path}' has an unrecognized IP-Adapter model architecture.") diff --git a/tests/backend/ip_adapter/test_ip_adapter.py b/tests/backend/ip_adapter/test_ip_adapter.py index 6712196778..6a3ec510a2 100644 --- a/tests/backend/ip_adapter/test_ip_adapter.py +++ b/tests/backend/ip_adapter/test_ip_adapter.py @@ -37,6 +37,14 @@ def build_dummy_sd15_unet_input(torch_device): "unet_model_id": "runwayml/stable-diffusion-v1-5", "unet_model_name": "stable-diffusion-v1-5", }, + # SD1.5, IPAdapterFull + { + "ip_adapter_model_id": "InvokeAI/ip-adapter-full-face_sd15", + "ip_adapter_model_name": "ip-adapter-full-face_sd15", + "base_model": BaseModelType.StableDiffusion1, + "unet_model_id": "runwayml/stable-diffusion-v1-5", + "unet_model_name": "stable-diffusion-v1-5", + }, ], ) @pytest.mark.slow From 0beb08686c64acb451dbda0dc4b6a257433fbac8 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 30 Nov 2023 10:55:20 +0100 Subject: [PATCH 06/30] Add CFG Rescale option for supporting zero-terminal SNR models (#4335) * add support for CFG rescale * fix typo * move rescale position and tweak docs * move input position * implement suggestions from github and discord * cleanup unused code * add back dropped FieldDescription * fix(ui): revert unrelated UI changes * chore(nodes): bump denoise_latents version 1.4.0 -> 1.5.0 * feat(nodes): add cfg_rescale_multiplier to metadata node * feat(ui): add cfg rescale multiplier to linear UI - add param to state - update graph builders - add UI under advanced - add metadata handling & recall - regen types * chore: black * fix(backend): make `StableDiffusionGeneratorPipeline._rescale_cfg()` staticmethod This doesn't need access to class. * feat(backend): add docstring for `_rescale_cfg()` method * feat(ui): update cfg rescale mult translation string --------- Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com> --- invokeai/app/invocations/latent.py | 6 +- invokeai/app/invocations/metadata.py | 3 + invokeai/app/shared/fields.py | 1 + .../stable_diffusion/diffusers_pipeline.py | 23 +++++-- .../diffusion/conditioning_data.py | 6 +- invokeai/frontend/web/public/locales/en.json | 9 +++ .../IAIInformationalPopover/constants.ts | 1 + .../ImageMetadataActions.tsx | 13 ++++ .../web/src/features/nodes/types/metadata.ts | 1 + .../graph/buildCanvasImageToImageGraph.ts | 2 + .../graph/buildCanvasSDXLImageToImageGraph.ts | 2 + .../graph/buildCanvasSDXLTextToImageGraph.ts | 2 + .../util/graph/buildCanvasTextToImageGraph.ts | 2 + .../graph/buildLinearImageToImageGraph.ts | 2 + .../graph/buildLinearSDXLImageToImageGraph.ts | 2 + .../graph/buildLinearSDXLTextToImageGraph.ts | 2 + .../util/graph/buildLinearTextToImageGraph.ts | 3 + .../Advanced/ParamAdvancedCollapse.tsx | 46 ++++++++++++-- .../Advanced/ParamCFGRescaleMultiplier.tsx | 60 +++++++++++++++++++ .../parameters/hooks/useRecallParameters.ts | 23 +++++++ .../parameters/store/generationSlice.ts | 12 +++- .../parameters/types/parameterSchemas.ts | 11 ++++ .../frontend/web/src/services/api/schema.d.ts | 51 +++++++++------- 23 files changed, 249 insertions(+), 34 deletions(-) create mode 100644 invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index d438bcae02..ab59b41865 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -215,7 +215,7 @@ def get_scheduler( title="Denoise Latents", tags=["latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"], category="latents", - version="1.4.0", + version="1.5.0", ) class DenoiseLatentsInvocation(BaseInvocation): """Denoises noisy latents to decodable images""" @@ -273,6 +273,9 @@ class DenoiseLatentsInvocation(BaseInvocation): input=Input.Connection, ui_order=7, ) + cfg_rescale_multiplier: float = InputField( + default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier + ) latents: Optional[LatentsField] = InputField( default=None, description=FieldDescriptions.latents, @@ -332,6 +335,7 @@ class DenoiseLatentsInvocation(BaseInvocation): unconditioned_embeddings=uc, text_embeddings=c, guidance_scale=self.cfg_scale, + guidance_rescale_multiplier=self.cfg_rescale_multiplier, extra=extra_conditioning_info, postprocessing_settings=PostprocessingSettings( threshold=0.0, # threshold, diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index d837e6297f..14d66f8ef6 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -127,6 +127,9 @@ class CoreMetadataInvocation(BaseInvocation): seed: Optional[int] = InputField(default=None, description="The seed used for noise generation") rand_device: Optional[str] = InputField(default=None, description="The device used for random number generation") cfg_scale: Optional[float] = InputField(default=None, description="The classifier-free guidance scale parameter") + cfg_rescale_multiplier: Optional[float] = InputField( + default=None, description=FieldDescriptions.cfg_rescale_multiplier + ) steps: Optional[int] = InputField(default=None, description="The number of steps used for inference") scheduler: Optional[str] = InputField(default=None, description="The scheduler used for inference") seamless_x: Optional[bool] = InputField(default=None, description="Whether seamless tiling was used on the X axis") diff --git a/invokeai/app/shared/fields.py b/invokeai/app/shared/fields.py index dd9cbb7b82..3e841ffbf2 100644 --- a/invokeai/app/shared/fields.py +++ b/invokeai/app/shared/fields.py @@ -2,6 +2,7 @@ class FieldDescriptions: denoising_start = "When to start denoising, expressed a percentage of total steps" denoising_end = "When to stop denoising, expressed a percentage of total steps" cfg_scale = "Classifier-Free Guidance scale" + cfg_rescale_multiplier = "Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR" scheduler = "Scheduler to use during inference" positive_cond = "Positive conditioning tensor" negative_cond = "Negative conditioning tensor" diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 1353e804a7..ae0cc17203 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -607,11 +607,14 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): if isinstance(guidance_scale, list): guidance_scale = guidance_scale[step_index] - noise_pred = self.invokeai_diffuser._combine( - uc_noise_pred, - c_noise_pred, - guidance_scale, - ) + noise_pred = self.invokeai_diffuser._combine(uc_noise_pred, c_noise_pred, guidance_scale) + guidance_rescale_multiplier = conditioning_data.guidance_rescale_multiplier + if guidance_rescale_multiplier > 0: + noise_pred = self._rescale_cfg( + noise_pred, + c_noise_pred, + guidance_rescale_multiplier, + ) # compute the previous noisy sample x_t -> x_t-1 step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args) @@ -634,6 +637,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): return step_output + @staticmethod + def _rescale_cfg(total_noise_pred, pos_noise_pred, multiplier=0.7): + """Implementation of Algorithm 2 from https://arxiv.org/pdf/2305.08891.pdf.""" + ro_pos = torch.std(pos_noise_pred, dim=(1, 2, 3), keepdim=True) + ro_cfg = torch.std(total_noise_pred, dim=(1, 2, 3), keepdim=True) + + x_rescaled = total_noise_pred * (ro_pos / ro_cfg) + x_final = multiplier * x_rescaled + (1.0 - multiplier) * total_noise_pred + return x_final + def _unet_forward( self, latents, diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py index 6a63c225fc..3e38f9f78d 100644 --- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py @@ -67,13 +67,17 @@ class IPAdapterConditioningInfo: class ConditioningData: unconditioned_embeddings: BasicConditioningInfo text_embeddings: BasicConditioningInfo - guidance_scale: Union[float, List[float]] """ Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality. """ + guidance_scale: Union[float, List[float]] + """ for models trained using zero-terminal SNR ("ztsnr"), it's suggested to use guidance_rescale_multiplier of 0.7 . + ref [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf) + """ + guidance_rescale_multiplier: float = 0 extra: Optional[ExtraConditioningInfo] = None scheduler_args: dict[str, Any] = field(default_factory=dict) """ diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 8b5afce4a7..52bf4ff8f9 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -599,6 +599,7 @@ }, "metadata": { "cfgScale": "CFG scale", + "cfgRescaleMultiplier": "$t(parameters.cfgRescaleMultiplier)", "createdBy": "Created By", "fit": "Image to image fit", "generationMode": "Generation Mode", @@ -1032,6 +1033,8 @@ "setType": "Set cancel type" }, "cfgScale": "CFG Scale", + "cfgRescaleMultiplier": "CFG Rescale Multiplier", + "cfgRescale": "CFG Rescale", "clipSkip": "CLIP Skip", "clipSkipWithLayerCount": "CLIP Skip {{layerCount}}", "closeViewer": "Close Viewer", @@ -1470,6 +1473,12 @@ "Controls how much your prompt influences the generation process." ] }, + "paramCFGRescaleMultiplier": { + "heading": "CFG Rescale Multiplier", + "paragraphs": [ + "Rescale multiplier for CFG guidance, used for models trained using zero-terminal SNR (ztsnr). Suggested value 0.7." + ] + }, "paramDenoisingStrength": { "heading": "Denoising Strength", "paragraphs": [ diff --git a/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts b/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts index 197f5f4068..8960399b48 100644 --- a/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts +++ b/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts @@ -25,6 +25,7 @@ export type Feature = | 'lora' | 'noiseUseCPU' | 'paramCFGScale' + | 'paramCFGRescaleMultiplier' | 'paramDenoisingStrength' | 'paramIterations' | 'paramModel' diff --git a/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx b/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx index 8c2c053846..890b5f7330 100644 --- a/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx +++ b/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx @@ -29,6 +29,7 @@ const ImageMetadataActions = (props: Props) => { recallNegativePrompt, recallSeed, recallCfgScale, + recallCfgRescaleMultiplier, recallModel, recallScheduler, recallVaeModel, @@ -85,6 +86,10 @@ const ImageMetadataActions = (props: Props) => { recallCfgScale(metadata?.cfg_scale); }, [metadata?.cfg_scale, recallCfgScale]); + const handleRecallCfgRescaleMultiplier = useCallback(() => { + recallCfgRescaleMultiplier(metadata?.cfg_rescale_multiplier); + }, [metadata?.cfg_rescale_multiplier, recallCfgRescaleMultiplier]); + const handleRecallStrength = useCallback(() => { recallStrength(metadata?.strength); }, [metadata?.strength, recallStrength]); @@ -243,6 +248,14 @@ const ImageMetadataActions = (props: Props) => { onClick={handleRecallCfgScale} /> )} + {metadata.cfg_rescale_multiplier !== undefined && + metadata.cfg_rescale_multiplier !== null && ( + + )} {metadata.strength && ( { - const { clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise } = - state.generation; + const { + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + cfgRescaleMultiplier, + } = state.generation; - return { clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise }; + return { + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + cfgRescaleMultiplier, + }; }, defaultSelectorOptions ); export default function ParamAdvancedCollapse() { - const { clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise } = - useAppSelector(selector); + const { + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + cfgRescaleMultiplier, + } = useAppSelector(selector); const { t } = useTranslation(); const activeLabel = useMemo(() => { const activeLabel: string[] = []; @@ -46,8 +66,20 @@ export default function ParamAdvancedCollapse() { activeLabel.push(t('parameters.seamlessY')); } + if (cfgRescaleMultiplier) { + activeLabel.push(t('parameters.cfgRescale')); + } + return activeLabel.join(', '); - }, [clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise, t]); + }, [ + cfgRescaleMultiplier, + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + t, + ]); return ( @@ -61,6 +93,8 @@ export default function ParamAdvancedCollapse() { )} + + ); diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx new file mode 100644 index 0000000000..2a65b32028 --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx @@ -0,0 +1,60 @@ +import { createSelector } from '@reduxjs/toolkit'; +import { stateSelector } from 'app/store/store'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; +import IAIInformationalPopover from 'common/components/IAIInformationalPopover/IAIInformationalPopover'; +import IAISlider from 'common/components/IAISlider'; +import { setCfgRescaleMultiplier } from 'features/parameters/store/generationSlice'; +import { memo, useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; + +const selector = createSelector( + [stateSelector], + ({ generation, hotkeys }) => { + const { cfgRescaleMultiplier } = generation; + const { shift } = hotkeys; + + return { + cfgRescaleMultiplier, + shift, + }; + }, + defaultSelectorOptions +); + +const ParamCFGRescaleMultiplier = () => { + const { cfgRescaleMultiplier, shift } = useAppSelector(selector); + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + + const handleChange = useCallback( + (v: number) => dispatch(setCfgRescaleMultiplier(v)), + [dispatch] + ); + + const handleReset = useCallback( + () => dispatch(setCfgRescaleMultiplier(0)), + [dispatch] + ); + + return ( + + + + ); +}; + +export default memo(ParamCFGRescaleMultiplier); diff --git a/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts b/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts index c1b7dbabd6..3217ab7be7 100644 --- a/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts +++ b/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts @@ -57,6 +57,7 @@ import { modelSelected, } from 'features/parameters/store/actions'; import { + setCfgRescaleMultiplier, setCfgScale, setHeight, setHrfEnabled, @@ -94,6 +95,7 @@ import { isParameterStrength, isParameterVAEModel, isParameterWidth, + isParameterCFGRescaleMultiplier, } from 'features/parameters/types/parameterSchemas'; const selector = createSelector( @@ -282,6 +284,21 @@ export const useRecallParameters = () => { [dispatch, parameterSetToast, parameterNotSetToast] ); + /** + * Recall CFG rescale multiplier with toast + */ + const recallCfgRescaleMultiplier = useCallback( + (cfgRescaleMultiplier: unknown) => { + if (!isParameterCFGRescaleMultiplier(cfgRescaleMultiplier)) { + parameterNotSetToast(); + return; + } + dispatch(setCfgRescaleMultiplier(cfgRescaleMultiplier)); + parameterSetToast(); + }, + [dispatch, parameterSetToast, parameterNotSetToast] + ); + /** * Recall model with toast */ @@ -799,6 +816,7 @@ export const useRecallParameters = () => { const { cfg_scale, + cfg_rescale_multiplier, height, model, positive_prompt, @@ -831,6 +849,10 @@ export const useRecallParameters = () => { dispatch(setCfgScale(cfg_scale)); } + if (isParameterCFGRescaleMultiplier(cfg_rescale_multiplier)) { + dispatch(setCfgRescaleMultiplier(cfg_rescale_multiplier)); + } + if (isParameterModel(model)) { dispatch(modelSelected(model)); } @@ -985,6 +1007,7 @@ export const useRecallParameters = () => { recallSDXLNegativeStylePrompt, recallSeed, recallCfgScale, + recallCfgRescaleMultiplier, recallModel, recallScheduler, recallVaeModel, diff --git a/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts b/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts index 8b7b8cb487..49835601d2 100644 --- a/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts +++ b/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts @@ -24,6 +24,7 @@ import { ParameterVAEModel, ParameterWidth, zParameterModel, + ParameterCFGRescaleMultiplier, } from 'features/parameters/types/parameterSchemas'; export interface GenerationState { @@ -31,6 +32,7 @@ export interface GenerationState { hrfStrength: ParameterStrength; hrfMethod: ParameterHRFMethod; cfgScale: ParameterCFGScale; + cfgRescaleMultiplier: ParameterCFGRescaleMultiplier; height: ParameterHeight; img2imgStrength: ParameterStrength; infillMethod: string; @@ -76,6 +78,7 @@ export const initialGenerationState: GenerationState = { hrfEnabled: false, hrfMethod: 'ESRGAN', cfgScale: 7.5, + cfgRescaleMultiplier: 0, height: 512, img2imgStrength: 0.75, infillMethod: 'patchmatch', @@ -145,9 +148,15 @@ export const generationSlice = createSlice({ state.steps ); }, - setCfgScale: (state, action: PayloadAction) => { + setCfgScale: (state, action: PayloadAction) => { state.cfgScale = action.payload; }, + setCfgRescaleMultiplier: ( + state, + action: PayloadAction + ) => { + state.cfgRescaleMultiplier = action.payload; + }, setThreshold: (state, action: PayloadAction) => { state.threshold = action.payload; }, @@ -336,6 +345,7 @@ export const { resetParametersState, resetSeed, setCfgScale, + setCfgRescaleMultiplier, setWidth, setHeight, toggleSize, diff --git a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts index 99f58f721c..73e7d7d2c3 100644 --- a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts +++ b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts @@ -77,6 +77,17 @@ export const isParameterCFGScale = (val: unknown): val is ParameterCFGScale => zParameterCFGScale.safeParse(val).success; // #endregion +// #region CFG Rescale Multiplier +export const zParameterCFGRescaleMultiplier = z.number().gte(0).lt(1); +export type ParameterCFGRescaleMultiplier = z.infer< + typeof zParameterCFGRescaleMultiplier +>; +export const isParameterCFGRescaleMultiplier = ( + val: unknown +): val is ParameterCFGRescaleMultiplier => + zParameterCFGRescaleMultiplier.safeParse(val).success; +// #endregion + // #region Scheduler export const zParameterScheduler = zSchedulerField; export type ParameterScheduler = z.infer; diff --git a/invokeai/frontend/web/src/services/api/schema.d.ts b/invokeai/frontend/web/src/services/api/schema.d.ts index b4f9db1370..8204e50650 100644 --- a/invokeai/frontend/web/src/services/api/schema.d.ts +++ b/invokeai/frontend/web/src/services/api/schema.d.ts @@ -2067,6 +2067,11 @@ export type components = { * @description The classifier-free guidance scale parameter */ cfg_scale?: number | null; + /** + * Cfg Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + */ + cfg_rescale_multiplier?: number | null; /** * Steps * @description The number of steps used for inference @@ -2392,6 +2397,12 @@ export type components = { * @description T2I-Adapter(s) to apply */ t2i_adapter?: components["schemas"]["T2IAdapterField"] | components["schemas"]["T2IAdapterField"][] | null; + /** + * Cfg Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + * @default 0 + */ + cfg_rescale_multiplier?: number; /** @description Latents tensor */ latents?: components["schemas"]["LatentsField"] | null; /** @description The mask to use for the operation */ @@ -3220,7 +3231,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["RangeInvocation"]; + [key: string]: components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["BooleanCollectionInvocation"]; }; /** * Edges @@ -3257,7 +3268,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["String2Output"] | components["schemas"]["VAEOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SchedulerOutput"]; + [key: string]: components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["ConditioningOutput"]; }; /** * Errors @@ -9692,23 +9703,11 @@ export type components = { */ UIType: "SDXLMainModelField" | "SDXLRefinerModelField" | "ONNXModelField" | "VAEModelField" | "LoRAModelField" | "ControlNetModelField" | "IPAdapterModelField" | "SchedulerField" | "AnyField" | "CollectionField" | "CollectionItemField" | "DEPRECATED_Boolean" | "DEPRECATED_Color" | "DEPRECATED_Conditioning" | "DEPRECATED_Control" | "DEPRECATED_Float" | "DEPRECATED_Image" | "DEPRECATED_Integer" | "DEPRECATED_Latents" | "DEPRECATED_String" | "DEPRECATED_BooleanCollection" | "DEPRECATED_ColorCollection" | "DEPRECATED_ConditioningCollection" | "DEPRECATED_ControlCollection" | "DEPRECATED_FloatCollection" | "DEPRECATED_ImageCollection" | "DEPRECATED_IntegerCollection" | "DEPRECATED_LatentsCollection" | "DEPRECATED_StringCollection" | "DEPRECATED_BooleanPolymorphic" | "DEPRECATED_ColorPolymorphic" | "DEPRECATED_ConditioningPolymorphic" | "DEPRECATED_ControlPolymorphic" | "DEPRECATED_FloatPolymorphic" | "DEPRECATED_ImagePolymorphic" | "DEPRECATED_IntegerPolymorphic" | "DEPRECATED_LatentsPolymorphic" | "DEPRECATED_StringPolymorphic" | "DEPRECATED_MainModel" | "DEPRECATED_UNet" | "DEPRECATED_Vae" | "DEPRECATED_CLIP" | "DEPRECATED_Collection" | "DEPRECATED_CollectionItem" | "DEPRECATED_Enum" | "DEPRECATED_WorkflowField" | "DEPRECATED_IsIntermediate" | "DEPRECATED_BoardField" | "DEPRECATED_MetadataItem" | "DEPRECATED_MetadataItemCollection" | "DEPRECATED_MetadataItemPolymorphic" | "DEPRECATED_MetadataDict"; /** - * StableDiffusionXLModelFormat + * ControlNetModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; - /** - * StableDiffusion1ModelFormat - * @description An enumeration. - * @enum {string} - */ - StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; - /** - * CLIPVisionModelFormat - * @description An enumeration. - * @enum {string} - */ - CLIPVisionModelFormat: "diffusers"; + ControlNetModelFormat: "checkpoint" | "diffusers"; /** * T2IAdapterModelFormat * @description An enumeration. @@ -9716,11 +9715,23 @@ export type components = { */ T2IAdapterModelFormat: "diffusers"; /** - * ControlNetModelFormat + * StableDiffusionXLModelFormat * @description An enumeration. * @enum {string} */ - ControlNetModelFormat: "checkpoint" | "diffusers"; + StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; + /** + * StableDiffusion2ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; + /** + * StableDiffusion1ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; /** * IPAdapterModelFormat * @description An enumeration. @@ -9734,11 +9745,11 @@ export type components = { */ StableDiffusionOnnxModelFormat: "olive" | "onnx"; /** - * StableDiffusion2ModelFormat + * CLIPVisionModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; + CLIPVisionModelFormat: "diffusers"; }; responses: never; parameters: never; From 3e01c396e1f0b4ebeb427ebceafc8c379801ac88 Mon Sep 17 00:00:00 2001 From: ymgenesis Date: Thu, 30 Nov 2023 11:15:59 +0100 Subject: [PATCH 07/30] CenterPadCrop node (#3861) * add centerpadcrop node - Allows users to add padding to or crop images from the center - Also outputs a white mask with the dimensions of the output image for use with outpainting * add CenterPadCrop to NODES.md Updates NODES.md with CenterPadCrop entry. * remove mask & output class - Remove "ImageMaskOutput" where both image and mask are output - Remove ability to output mask from node --------- Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com> --- docs/nodes/defaultNodes.md | 204 +++++++++--------- invokeai/app/invocations/image.py | 55 +++++ .../frontend/web/src/services/api/schema.d.ts | 87 ++++++-- 3 files changed, 229 insertions(+), 117 deletions(-) diff --git a/docs/nodes/defaultNodes.md b/docs/nodes/defaultNodes.md index ace51163ef..1f490dfe81 100644 --- a/docs/nodes/defaultNodes.md +++ b/docs/nodes/defaultNodes.md @@ -1,104 +1,106 @@ # List of Default Nodes -The table below contains a list of the default nodes shipped with InvokeAI and their descriptions. +The table below contains a list of the default nodes shipped with InvokeAI and +their descriptions. -| Node | Function | -|: ---------------------------------- | :--------------------------------------------------------------------------------------| -|Add Integers | Adds two numbers| -|Boolean Primitive Collection | A collection of boolean primitive values| -|Boolean Primitive | A boolean primitive value| -|Canny Processor | Canny edge detection for ControlNet| -|CLIP Skip | Skip layers in clip text_encoder model.| -|Collect | Collects values into a collection| -|Color Correct | Shifts the colors of a target image to match the reference image, optionally using a mask to only color-correct certain regions of the target image.| -|Color Primitive | A color primitive value| -|Compel Prompt | Parse prompt using compel package to conditioning.| -|Conditioning Primitive Collection | A collection of conditioning tensor primitive values| -|Conditioning Primitive | A conditioning tensor primitive value| -|Content Shuffle Processor | Applies content shuffle processing to image| -|ControlNet | Collects ControlNet info to pass to other nodes| -|Denoise Latents | Denoises noisy latents to decodable images| -|Divide Integers | Divides two numbers| -|Dynamic Prompt | Parses a prompt using adieyal/dynamicprompts' random or combinatorial generator| -|[FaceMask](./detailedNodes/faceTools.md#facemask) | Generates masks for faces in an image to use with Inpainting| -|[FaceIdentifier](./detailedNodes/faceTools.md#faceidentifier) | Identifies and labels faces in an image| -|[FaceOff](./detailedNodes/faceTools.md#faceoff) | Creates a new image that is a scaled bounding box with a mask on the face for Inpainting| -|Float Math | Perform basic math operations on two floats| -|Float Primitive Collection | A collection of float primitive values| -|Float Primitive | A float primitive value| -|Float Range | Creates a range| -|HED (softedge) Processor | Applies HED edge detection to image| -|Blur Image | Blurs an image| -|Extract Image Channel | Gets a channel from an image.| -|Image Primitive Collection | A collection of image primitive values| -|Integer Math | Perform basic math operations on two integers| -|Convert Image Mode | Converts an image to a different mode.| -|Crop Image | Crops an image to a specified box. The box can be outside of the image.| -|Image Hue Adjustment | Adjusts the Hue of an image.| -|Inverse Lerp Image | Inverse linear interpolation of all pixels of an image| -|Image Primitive | An image primitive value| -|Lerp Image | Linear interpolation of all pixels of an image| -|Offset Image Channel | Add to or subtract from an image color channel by a uniform value.| -|Multiply Image Channel | Multiply or Invert an image color channel by a scalar value.| -|Multiply Images | Multiplies two images together using `PIL.ImageChops.multiply()`.| -|Blur NSFW Image | Add blur to NSFW-flagged images| -|Paste Image | Pastes an image into another image.| -|ImageProcessor | Base class for invocations that preprocess images for ControlNet| -|Resize Image | Resizes an image to specific dimensions| -|Round Float | Rounds a float to a specified number of decimal places| -|Float to Integer | Converts a float to an integer. Optionally rounds to an even multiple of a input number.| -|Scale Image | Scales an image by a factor| -|Image to Latents | Encodes an image into latents.| -|Add Invisible Watermark | Add an invisible watermark to an image| -|Solid Color Infill | Infills transparent areas of an image with a solid color| -|PatchMatch Infill | Infills transparent areas of an image using the PatchMatch algorithm| -|Tile Infill | Infills transparent areas of an image with tiles of the image| -|Integer Primitive Collection | A collection of integer primitive values| -|Integer Primitive | An integer primitive value| -|Iterate | Iterates over a list of items| -|Latents Primitive Collection | A collection of latents tensor primitive values| -|Latents Primitive | A latents tensor primitive value| -|Latents to Image | Generates an image from latents.| -|Leres (Depth) Processor | Applies leres processing to image| -|Lineart Anime Processor | Applies line art anime processing to image| -|Lineart Processor | Applies line art processing to image| -|LoRA Loader | Apply selected lora to unet and text_encoder.| -|Main Model Loader | Loads a main model, outputting its submodels.| -|Combine Mask | Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`.| -|Mask Edge | Applies an edge mask to an image| -|Mask from Alpha | Extracts the alpha channel of an image as a mask.| -|Mediapipe Face Processor | Applies mediapipe face processing to image| -|Midas (Depth) Processor | Applies Midas depth processing to image| -|MLSD Processor | Applies MLSD processing to image| -|Multiply Integers | Multiplies two numbers| -|Noise | Generates latent noise.| -|Normal BAE Processor | Applies NormalBae processing to image| -|ONNX Latents to Image | Generates an image from latents.| -|ONNX Prompt (Raw) | A node to process inputs and produce outputs. May use dependency injection in __init__ to receive providers.| -|ONNX Text to Latents | Generates latents from conditionings.| -|ONNX Model Loader | Loads a main model, outputting its submodels.| -|OpenCV Inpaint | Simple inpaint using opencv.| -|Openpose Processor | Applies Openpose processing to image| -|PIDI Processor | Applies PIDI processing to image| -|Prompts from File | Loads prompts from a text file| -|Random Integer | Outputs a single random integer.| -|Random Range | Creates a collection of random numbers| -|Integer Range | Creates a range of numbers from start to stop with step| -|Integer Range of Size | Creates a range from start to start + size with step| -|Resize Latents | Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8.| -|SDXL Compel Prompt | Parse prompt using compel package to conditioning.| -|SDXL LoRA Loader | Apply selected lora to unet and text_encoder.| -|SDXL Main Model Loader | Loads an sdxl base model, outputting its submodels.| -|SDXL Refiner Compel Prompt | Parse prompt using compel package to conditioning.| -|SDXL Refiner Model Loader | Loads an sdxl refiner model, outputting its submodels.| -|Scale Latents | Scales latents by a given factor.| -|Segment Anything Processor | Applies segment anything processing to image| -|Show Image | Displays a provided image, and passes it forward in the pipeline.| -|Step Param Easing | Experimental per-step parameter easing for denoising steps| -|String Primitive Collection | A collection of string primitive values| -|String Primitive | A string primitive value| -|Subtract Integers | Subtracts two numbers| -|Tile Resample Processor | Tile resampler processor| -|Upscale (RealESRGAN) | Upscales an image using RealESRGAN.| -|VAE Loader | Loads a VAE model, outputting a VaeLoaderOutput| -|Zoe (Depth) Processor | Applies Zoe depth processing to image| \ No newline at end of file +| Node | Function | +| :------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------- | +| Add Integers | Adds two numbers | +| Boolean Primitive Collection | A collection of boolean primitive values | +| Boolean Primitive | A boolean primitive value | +| Canny Processor | Canny edge detection for ControlNet | +| CenterPadCrop | Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image. | +| CLIP Skip | Skip layers in clip text_encoder model. | +| Collect | Collects values into a collection | +| Color Correct | Shifts the colors of a target image to match the reference image, optionally using a mask to only color-correct certain regions of the target image. | +| Color Primitive | A color primitive value | +| Compel Prompt | Parse prompt using compel package to conditioning. | +| Conditioning Primitive Collection | A collection of conditioning tensor primitive values | +| Conditioning Primitive | A conditioning tensor primitive value | +| Content Shuffle Processor | Applies content shuffle processing to image | +| ControlNet | Collects ControlNet info to pass to other nodes | +| Denoise Latents | Denoises noisy latents to decodable images | +| Divide Integers | Divides two numbers | +| Dynamic Prompt | Parses a prompt using adieyal/dynamicprompts' random or combinatorial generator | +| [FaceMask](./detailedNodes/faceTools.md#facemask) | Generates masks for faces in an image to use with Inpainting | +| [FaceIdentifier](./detailedNodes/faceTools.md#faceidentifier) | Identifies and labels faces in an image | +| [FaceOff](./detailedNodes/faceTools.md#faceoff) | Creates a new image that is a scaled bounding box with a mask on the face for Inpainting | +| Float Math | Perform basic math operations on two floats | +| Float Primitive Collection | A collection of float primitive values | +| Float Primitive | A float primitive value | +| Float Range | Creates a range | +| HED (softedge) Processor | Applies HED edge detection to image | +| Blur Image | Blurs an image | +| Extract Image Channel | Gets a channel from an image. | +| Image Primitive Collection | A collection of image primitive values | +| Integer Math | Perform basic math operations on two integers | +| Convert Image Mode | Converts an image to a different mode. | +| Crop Image | Crops an image to a specified box. The box can be outside of the image. | +| Image Hue Adjustment | Adjusts the Hue of an image. | +| Inverse Lerp Image | Inverse linear interpolation of all pixels of an image | +| Image Primitive | An image primitive value | +| Lerp Image | Linear interpolation of all pixels of an image | +| Offset Image Channel | Add to or subtract from an image color channel by a uniform value. | +| Multiply Image Channel | Multiply or Invert an image color channel by a scalar value. | +| Multiply Images | Multiplies two images together using `PIL.ImageChops.multiply()`. | +| Blur NSFW Image | Add blur to NSFW-flagged images | +| Paste Image | Pastes an image into another image. | +| ImageProcessor | Base class for invocations that preprocess images for ControlNet | +| Resize Image | Resizes an image to specific dimensions | +| Round Float | Rounds a float to a specified number of decimal places | +| Float to Integer | Converts a float to an integer. Optionally rounds to an even multiple of a input number. | +| Scale Image | Scales an image by a factor | +| Image to Latents | Encodes an image into latents. | +| Add Invisible Watermark | Add an invisible watermark to an image | +| Solid Color Infill | Infills transparent areas of an image with a solid color | +| PatchMatch Infill | Infills transparent areas of an image using the PatchMatch algorithm | +| Tile Infill | Infills transparent areas of an image with tiles of the image | +| Integer Primitive Collection | A collection of integer primitive values | +| Integer Primitive | An integer primitive value | +| Iterate | Iterates over a list of items | +| Latents Primitive Collection | A collection of latents tensor primitive values | +| Latents Primitive | A latents tensor primitive value | +| Latents to Image | Generates an image from latents. | +| Leres (Depth) Processor | Applies leres processing to image | +| Lineart Anime Processor | Applies line art anime processing to image | +| Lineart Processor | Applies line art processing to image | +| LoRA Loader | Apply selected lora to unet and text_encoder. | +| Main Model Loader | Loads a main model, outputting its submodels. | +| Combine Mask | Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`. | +| Mask Edge | Applies an edge mask to an image | +| Mask from Alpha | Extracts the alpha channel of an image as a mask. | +| Mediapipe Face Processor | Applies mediapipe face processing to image | +| Midas (Depth) Processor | Applies Midas depth processing to image | +| MLSD Processor | Applies MLSD processing to image | +| Multiply Integers | Multiplies two numbers | +| Noise | Generates latent noise. | +| Normal BAE Processor | Applies NormalBae processing to image | +| ONNX Latents to Image | Generates an image from latents. | +| ONNX Prompt (Raw) | A node to process inputs and produce outputs. May use dependency injection in **init** to receive providers. | +| ONNX Text to Latents | Generates latents from conditionings. | +| ONNX Model Loader | Loads a main model, outputting its submodels. | +| OpenCV Inpaint | Simple inpaint using opencv. | +| Openpose Processor | Applies Openpose processing to image | +| PIDI Processor | Applies PIDI processing to image | +| Prompts from File | Loads prompts from a text file | +| Random Integer | Outputs a single random integer. | +| Random Range | Creates a collection of random numbers | +| Integer Range | Creates a range of numbers from start to stop with step | +| Integer Range of Size | Creates a range from start to start + size with step | +| Resize Latents | Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8. | +| SDXL Compel Prompt | Parse prompt using compel package to conditioning. | +| SDXL LoRA Loader | Apply selected lora to unet and text_encoder. | +| SDXL Main Model Loader | Loads an sdxl base model, outputting its submodels. | +| SDXL Refiner Compel Prompt | Parse prompt using compel package to conditioning. | +| SDXL Refiner Model Loader | Loads an sdxl refiner model, outputting its submodels. | +| Scale Latents | Scales latents by a given factor. | +| Segment Anything Processor | Applies segment anything processing to image | +| Show Image | Displays a provided image, and passes it forward in the pipeline. | +| Step Param Easing | Experimental per-step parameter easing for denoising steps | +| String Primitive Collection | A collection of string primitive values | +| String Primitive | A string primitive value | +| Subtract Integers | Subtracts two numbers | +| Tile Resample Processor | Tile resampler processor | +| Upscale (RealESRGAN) | Upscales an image using RealESRGAN. | +| VAE Loader | Loads a VAE model, outputting a VaeLoaderOutput | +| Zoe (Depth) Processor | Applies Zoe depth processing to image | diff --git a/invokeai/app/invocations/image.py b/invokeai/app/invocations/image.py index 87e8392402..ad3b3aec71 100644 --- a/invokeai/app/invocations/image.py +++ b/invokeai/app/invocations/image.py @@ -100,6 +100,61 @@ class ImageCropInvocation(BaseInvocation, WithWorkflow, WithMetadata): ) +@invocation( + invocation_type="img_pad_crop", + title="Center Pad or Crop Image", + category="image", + tags=["image", "pad", "crop"], + version="1.0.0", +) +class CenterPadCropInvocation(BaseInvocation): + """Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image.""" + + image: ImageField = InputField(description="The image to crop") + left: int = InputField( + default=0, + description="Number of pixels to pad/crop from the left (negative values crop inwards, positive values pad outwards)", + ) + right: int = InputField( + default=0, + description="Number of pixels to pad/crop from the right (negative values crop inwards, positive values pad outwards)", + ) + top: int = InputField( + default=0, + description="Number of pixels to pad/crop from the top (negative values crop inwards, positive values pad outwards)", + ) + bottom: int = InputField( + default=0, + description="Number of pixels to pad/crop from the bottom (negative values crop inwards, positive values pad outwards)", + ) + + def invoke(self, context: InvocationContext) -> ImageOutput: + image = context.services.images.get_pil_image(self.image.image_name) + + # Calculate and create new image dimensions + new_width = image.width + self.right + self.left + new_height = image.height + self.top + self.bottom + image_crop = Image.new(mode="RGBA", size=(new_width, new_height), color=(0, 0, 0, 0)) + + # Paste new image onto input + image_crop.paste(image, (self.left, self.top)) + + image_dto = context.services.images.create( + image=image_crop, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.GENERAL, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + ) + + return ImageOutput( + image=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) + + @invocation("img_paste", title="Paste Image", tags=["image", "paste"], category="image", version="1.1.0") class ImagePasteInvocation(BaseInvocation, WithWorkflow, WithMetadata): """Pastes an image into another image.""" diff --git a/invokeai/frontend/web/src/services/api/schema.d.ts b/invokeai/frontend/web/src/services/api/schema.d.ts index 8204e50650..b6b6a0e8be 100644 --- a/invokeai/frontend/web/src/services/api/schema.d.ts +++ b/invokeai/frontend/web/src/services/api/schema.d.ts @@ -1196,6 +1196,61 @@ export type components = { */ type: "canny_image_processor"; }; + /** + * Center Pad or Crop Image + * @description Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image. + */ + CenterPadCropInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** @description The image to crop */ + image?: components["schemas"]["ImageField"]; + /** + * Left + * @description Number of pixels to pad/crop from the left (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + left?: number; + /** + * Right + * @description Number of pixels to pad/crop from the right (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + right?: number; + /** + * Top + * @description Number of pixels to pad/crop from the top (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + top?: number; + /** + * Bottom + * @description Number of pixels to pad/crop from the bottom (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + bottom?: number; + /** + * type + * @default img_pad_crop + * @constant + */ + type: "img_pad_crop"; + }; /** * ClearResult * @description Result of clearing the session queue @@ -3231,7 +3286,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["BooleanCollectionInvocation"]; + [key: string]: components["schemas"]["BlankImageInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["LineartImageProcessorInvocation"]; }; /** * Edges @@ -3268,7 +3323,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["ConditioningOutput"]; + [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["T2IAdapterOutput"]; }; /** * Errors @@ -9703,17 +9758,23 @@ export type components = { */ UIType: "SDXLMainModelField" | "SDXLRefinerModelField" | "ONNXModelField" | "VAEModelField" | "LoRAModelField" | "ControlNetModelField" | "IPAdapterModelField" | "SchedulerField" | "AnyField" | "CollectionField" | "CollectionItemField" | "DEPRECATED_Boolean" | "DEPRECATED_Color" | "DEPRECATED_Conditioning" | "DEPRECATED_Control" | "DEPRECATED_Float" | "DEPRECATED_Image" | "DEPRECATED_Integer" | "DEPRECATED_Latents" | "DEPRECATED_String" | "DEPRECATED_BooleanCollection" | "DEPRECATED_ColorCollection" | "DEPRECATED_ConditioningCollection" | "DEPRECATED_ControlCollection" | "DEPRECATED_FloatCollection" | "DEPRECATED_ImageCollection" | "DEPRECATED_IntegerCollection" | "DEPRECATED_LatentsCollection" | "DEPRECATED_StringCollection" | "DEPRECATED_BooleanPolymorphic" | "DEPRECATED_ColorPolymorphic" | "DEPRECATED_ConditioningPolymorphic" | "DEPRECATED_ControlPolymorphic" | "DEPRECATED_FloatPolymorphic" | "DEPRECATED_ImagePolymorphic" | "DEPRECATED_IntegerPolymorphic" | "DEPRECATED_LatentsPolymorphic" | "DEPRECATED_StringPolymorphic" | "DEPRECATED_MainModel" | "DEPRECATED_UNet" | "DEPRECATED_Vae" | "DEPRECATED_CLIP" | "DEPRECATED_Collection" | "DEPRECATED_CollectionItem" | "DEPRECATED_Enum" | "DEPRECATED_WorkflowField" | "DEPRECATED_IsIntermediate" | "DEPRECATED_BoardField" | "DEPRECATED_MetadataItem" | "DEPRECATED_MetadataItemCollection" | "DEPRECATED_MetadataItemPolymorphic" | "DEPRECATED_MetadataDict"; /** - * ControlNetModelFormat + * StableDiffusionOnnxModelFormat * @description An enumeration. * @enum {string} */ - ControlNetModelFormat: "checkpoint" | "diffusers"; + StableDiffusionOnnxModelFormat: "olive" | "onnx"; /** - * T2IAdapterModelFormat + * CLIPVisionModelFormat * @description An enumeration. * @enum {string} */ - T2IAdapterModelFormat: "diffusers"; + CLIPVisionModelFormat: "diffusers"; + /** + * StableDiffusion1ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; /** * StableDiffusionXLModelFormat * @description An enumeration. @@ -9727,11 +9788,11 @@ export type components = { */ StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; /** - * StableDiffusion1ModelFormat + * T2IAdapterModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; + T2IAdapterModelFormat: "diffusers"; /** * IPAdapterModelFormat * @description An enumeration. @@ -9739,17 +9800,11 @@ export type components = { */ IPAdapterModelFormat: "invokeai"; /** - * StableDiffusionOnnxModelFormat + * ControlNetModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusionOnnxModelFormat: "olive" | "onnx"; - /** - * CLIPVisionModelFormat - * @description An enumeration. - * @enum {string} - */ - CLIPVisionModelFormat: "diffusers"; + ControlNetModelFormat: "checkpoint" | "diffusers"; }; responses: never; parameters: never; From a6d4e4ed5774ec752666291c95035cc6c6b7a5eb Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 30 Nov 2023 22:01:33 +1100 Subject: [PATCH 08/30] fix(ui): fix enum parsing for optional enums Closes #5121 - Parse `anyOf` for enums (present when they are optional) - Consolidate `FieldTypeParseError` and `UnsupportedFieldTypeError` into `FieldParseError` (there was no difference in handling and it simplifies things a bit) --- invokeai/frontend/web/public/locales/en.json | 1 + .../web/src/features/nodes/types/error.ts | 19 ++--------- .../util/schema/buildFieldInputTemplate.ts | 30 +++++++++++++++-- .../nodes/util/schema/parseFieldType.ts | 33 ++++++++----------- .../features/nodes/util/schema/parseSchema.ts | 21 ++++-------- 5 files changed, 51 insertions(+), 53 deletions(-) diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 52bf4ff8f9..8f93f32d90 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -978,6 +978,7 @@ "unsupportedAnyOfLength": "too many union members ({{count}})", "unsupportedMismatchedUnion": "mismatched CollectionOrScalar type with base types {{firstType}} and {{secondType}}", "unableToParseFieldType": "unable to parse field type", + "unableToExtractEnumOptions": "unable to extract enum options", "uNetField": "UNet", "uNetFieldDescription": "UNet submodel.", "unhandledInputProperty": "Unhandled input property", diff --git a/invokeai/frontend/web/src/features/nodes/types/error.ts b/invokeai/frontend/web/src/features/nodes/types/error.ts index e520b7710d..905b487fb0 100644 --- a/invokeai/frontend/web/src/features/nodes/types/error.ts +++ b/invokeai/frontend/web/src/features/nodes/types/error.ts @@ -43,10 +43,10 @@ export class NodeUpdateError extends Error { } /** - * FieldTypeParseError + * FieldParseError * Raised when a field cannot be parsed from a field schema. */ -export class FieldTypeParseError extends Error { +export class FieldParseError extends Error { /** * Create FieldTypeParseError * @param {String} message @@ -56,18 +56,3 @@ export class FieldTypeParseError extends Error { this.name = this.constructor.name; } } - -/** - * UnsupportedFieldTypeError - * Raised when an unsupported field type is parsed. - */ -export class UnsupportedFieldTypeError extends Error { - /** - * Create UnsupportedFieldTypeError - * @param {String} message - */ - constructor(message: string) { - super(message); - this.name = this.constructor.name; - } -} diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts index 94095bbc08..001e1c9cf4 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts @@ -23,7 +23,12 @@ import { VAEModelFieldInputTemplate, isStatefulFieldType, } from 'features/nodes/types/field'; -import { InvocationFieldSchema } from 'features/nodes/types/openapi'; +import { + InvocationFieldSchema, + isSchemaObject, +} from 'features/nodes/types/openapi'; +import { t } from 'i18next'; +import { FieldParseError } from 'features/nodes/types/error'; // eslint-disable-next-line @typescript-eslint/no-explicit-any type FieldInputTemplateBuilder = // valid `any`! @@ -321,7 +326,28 @@ const buildImageFieldInputTemplate: FieldInputTemplateBuilder< const buildEnumFieldInputTemplate: FieldInputTemplateBuilder< EnumFieldInputTemplate > = ({ schemaObject, baseField, isCollection, isCollectionOrScalar }) => { - const options = schemaObject.enum ?? []; + let options: EnumFieldInputTemplate['options'] = []; + if (schemaObject.anyOf) { + const filteredAnyOf = schemaObject.anyOf.filter((i) => { + if (isSchemaObject(i)) { + if (i.type === 'null') { + return false; + } + } + return true; + }); + const firstAnyOf = filteredAnyOf[0]; + if (filteredAnyOf.length !== 1 || !isSchemaObject(firstAnyOf)) { + options = []; + } else { + options = firstAnyOf.enum ?? []; + } + } else { + options = schemaObject.enum ?? []; + } + if (options.length === 0) { + throw new FieldParseError(t('nodes.unableToExtractEnumOptions')); + } const template: EnumFieldInputTemplate = { ...baseField, type: { diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts b/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts index 3b6fadd8a1..4ee4edce1b 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts @@ -1,10 +1,4 @@ -import { t } from 'i18next'; -import { isArray } from 'lodash-es'; -import { OpenAPIV3_1 } from 'openapi-types'; -import { - FieldTypeParseError, - UnsupportedFieldTypeError, -} from 'features/nodes/types/error'; +import { FieldParseError } from 'features/nodes/types/error'; import { FieldType } from 'features/nodes/types/field'; import { OpenAPIV3_1SchemaOrRef, @@ -14,6 +8,9 @@ import { isRefObject, isSchemaObject, } from 'features/nodes/types/openapi'; +import { t } from 'i18next'; +import { isArray } from 'lodash-es'; +import { OpenAPIV3_1 } from 'openapi-types'; /** * Transforms an invocation output ref object to field type. @@ -70,7 +67,7 @@ export const parseFieldType = ( // This is a single ref type const name = refObjectToSchemaName(allOf[0]); if (!name) { - throw new FieldTypeParseError( + throw new FieldParseError( t('nodes.unableToExtractSchemaNameFromRef') ); } @@ -95,7 +92,7 @@ export const parseFieldType = ( if (isRefObject(filteredAnyOf[0])) { const name = refObjectToSchemaName(filteredAnyOf[0]); if (!name) { - throw new FieldTypeParseError( + throw new FieldParseError( t('nodes.unableToExtractSchemaNameFromRef') ); } @@ -120,7 +117,7 @@ export const parseFieldType = ( if (filteredAnyOf.length !== 2) { // This is a union of more than 2 types, which we don't support - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedAnyOfLength', { count: filteredAnyOf.length, }) @@ -167,7 +164,7 @@ export const parseFieldType = ( }; } - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedMismatchedUnion', { firstType, secondType, @@ -186,7 +183,7 @@ export const parseFieldType = ( if (isSchemaObject(schemaObject.items)) { const itemType = schemaObject.items.type; if (!itemType || isArray(itemType)) { - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedArrayItemType', { type: itemType, }) @@ -196,7 +193,7 @@ export const parseFieldType = ( const name = OPENAPI_TO_FIELD_TYPE_MAP[itemType]; if (!name) { // it's 'null', 'object', or 'array' - skip - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedArrayItemType', { type: itemType, }) @@ -212,7 +209,7 @@ export const parseFieldType = ( // This is a ref object, extract the type name const name = refObjectToSchemaName(schemaObject.items); if (!name) { - throw new FieldTypeParseError( + throw new FieldParseError( t('nodes.unableToExtractSchemaNameFromRef') ); } @@ -226,7 +223,7 @@ export const parseFieldType = ( const name = OPENAPI_TO_FIELD_TYPE_MAP[schemaObject.type]; if (!name) { // it's 'null', 'object', or 'array' - skip - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedArrayItemType', { type: schemaObject.type, }) @@ -242,9 +239,7 @@ export const parseFieldType = ( } else if (isRefObject(schemaObject)) { const name = refObjectToSchemaName(schemaObject); if (!name) { - throw new FieldTypeParseError( - t('nodes.unableToExtractSchemaNameFromRef') - ); + throw new FieldParseError(t('nodes.unableToExtractSchemaNameFromRef')); } return { name, @@ -252,5 +247,5 @@ export const parseFieldType = ( isCollectionOrScalar: false, }; } - throw new FieldTypeParseError(t('nodes.unableToParseFieldType')); + throw new FieldParseError(t('nodes.unableToParseFieldType')); }; diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts b/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts index b425b11663..c5a7cd9f3d 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts @@ -1,12 +1,6 @@ import { logger } from 'app/logging/logger'; import { parseify } from 'common/util/serialize'; -import { t } from 'i18next'; -import { reduce } from 'lodash-es'; -import { OpenAPIV3_1 } from 'openapi-types'; -import { - FieldTypeParseError, - UnsupportedFieldTypeError, -} from 'features/nodes/types/error'; +import { FieldParseError } from 'features/nodes/types/error'; import { FieldInputTemplate, FieldOutputTemplate, @@ -18,6 +12,9 @@ import { isInvocationOutputSchemaObject, isInvocationSchemaObject, } from 'features/nodes/types/openapi'; +import { t } from 'i18next'; +import { reduce } from 'lodash-es'; +import { OpenAPIV3_1 } from 'openapi-types'; import { buildFieldInputTemplate } from './buildFieldInputTemplate'; import { buildFieldOutputTemplate } from './buildFieldOutputTemplate'; import { parseFieldType } from './parseFieldType'; @@ -133,10 +130,7 @@ export const parseSchema = ( inputsAccumulator[propertyName] = fieldInputTemplate; } catch (e) { - if ( - e instanceof FieldTypeParseError || - e instanceof UnsupportedFieldTypeError - ) { + if (e instanceof FieldParseError) { logger('nodes').warn( { node: type, @@ -225,10 +219,7 @@ export const parseSchema = ( outputsAccumulator[propertyName] = fieldOutputTemplate; } catch (e) { - if ( - e instanceof FieldTypeParseError || - e instanceof UnsupportedFieldTypeError - ) { + if (e instanceof FieldParseError) { logger('nodes').warn( { node: type, From 2f81f9fb223b2a3c7c9aa581b41f82b4ad93fe1e Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 30 Nov 2023 21:06:50 +1100 Subject: [PATCH 09/30] fix(ui): add missing star image translation key --- invokeai/frontend/web/public/locales/en.json | 3 ++- .../components/ImageContextMenu/SingleSelectionMenuItems.tsx | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 8f93f32d90..8663815adb 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -243,7 +243,6 @@ "setControlImageDimensions": "Set Control Image Dimensions To W/H", "showAdvanced": "Show Advanced", "toggleControlNet": "Toggle this ControlNet", - "unstarImage": "Unstar Image", "w": "W", "weight": "Weight", "enableIPAdapter": "Enable IP Adapter", @@ -378,6 +377,8 @@ "showGenerations": "Show Generations", "showUploads": "Show Uploads", "singleColumnLayout": "Single Column Layout", + "starImage": "Star Image", + "unstarImage": "Unstar Image", "unableToLoad": "Unable to load Gallery", "uploads": "Uploads", "deleteSelection": "Delete Selection", diff --git a/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx b/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx index fdb0809364..1b4e642e64 100644 --- a/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx +++ b/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx @@ -234,14 +234,14 @@ const SingleSelectionMenuItems = (props: SingleSelectionMenuItemsProps) => { icon={customStarUi ? customStarUi.off.icon : } onClickCapture={handleUnstarImage} > - {customStarUi ? customStarUi.off.text : t('controlnet.unstarImage')} + {customStarUi ? customStarUi.off.text : t('gallery.unstarImage')} ) : ( } onClickCapture={handleStarImage} > - {customStarUi ? customStarUi.on.text : `Star Image`} + {customStarUi ? customStarUi.on.text : t('gallery.starImage')} )} Date: Fri, 1 Dec 2023 00:06:56 +1100 Subject: [PATCH 10/30] fix(ui): fix missing images not handled - Reset init image, control adapter images, and node image fields when their selected image fails to load - Only do this if the app is connected via socket (this indicates that the image is "really" gone, and there isn't just a transient network issue) It's possible for image parameters/nodes/states to have reference a deleted image. For example, a resize image node might have an image set on it, and the workflow saved. The workflow contains a hard reference to that image. The image is deleted and the workflow loaded again later. The deleted image is still in that workflow, but the app doesn't detect that. The result is that the workflow/graph appears to be valid, but will fail on invoke. This creates a really confusing user experience, where when somebody shares a workflow with an image baked into it, and another person opens it, everything *looks* ok, but the workflow fails with a mysterious error about a missing image. The problem affects node images, control adapter images and the img2img init image. Resetting the image when it fails to load *and* socket is connected resolves this in a simple way. The problem also affects canvas images, but we have handle that by displaying an error fallback image, so no change is made there. --- .../components/ControlAdapterImagePreview.tsx | 42 ++++++++++++------- .../inputs/ImageFieldInputComponent.tsx | 14 +++++-- .../Parameters/ImageToImage/InitialImage.tsx | 20 +++++++-- 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx index e12abf4830..b3b584d07e 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx @@ -5,14 +5,19 @@ import { stateSelector } from 'app/store/store'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; import IAIDndImage from 'common/components/IAIDndImage'; +import IAIDndImageIcon from 'common/components/IAIDndImageIcon'; import { setBoundingBoxDimensions } from 'features/canvas/store/canvasSlice'; +import { useControlAdapterControlImage } from 'features/controlAdapters/hooks/useControlAdapterControlImage'; +import { useControlAdapterProcessedControlImage } from 'features/controlAdapters/hooks/useControlAdapterProcessedControlImage'; +import { useControlAdapterProcessorType } from 'features/controlAdapters/hooks/useControlAdapterProcessorType'; +import { controlAdapterImageChanged } from 'features/controlAdapters/store/controlAdaptersSlice'; import { TypesafeDraggableData, TypesafeDroppableData, } from 'features/dnd/types'; import { setHeight, setWidth } from 'features/parameters/store/generationSlice'; import { activeTabNameSelector } from 'features/ui/store/uiSelectors'; -import { memo, useCallback, useMemo, useState } from 'react'; +import { memo, useCallback, useEffect, useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { FaRulerVertical, FaSave, FaUndo } from 'react-icons/fa'; import { @@ -22,11 +27,6 @@ import { useRemoveImageFromBoardMutation, } from 'services/api/endpoints/images'; import { PostUploadAction } from 'services/api/types'; -import IAIDndImageIcon from 'common/components/IAIDndImageIcon'; -import { controlAdapterImageChanged } from 'features/controlAdapters/store/controlAdaptersSlice'; -import { useControlAdapterControlImage } from 'features/controlAdapters/hooks/useControlAdapterControlImage'; -import { useControlAdapterProcessedControlImage } from 'features/controlAdapters/hooks/useControlAdapterProcessedControlImage'; -import { useControlAdapterProcessorType } from 'features/controlAdapters/hooks/useControlAdapterProcessorType'; type Props = { id: string; @@ -35,13 +35,15 @@ type Props = { const selector = createSelector( stateSelector, - ({ controlAdapters, gallery }) => { + ({ controlAdapters, gallery, system }) => { const { pendingControlImages } = controlAdapters; const { autoAddBoardId } = gallery; + const { isConnected } = system; return { pendingControlImages, autoAddBoardId, + isConnected, }; }, defaultSelectorOptions @@ -55,18 +57,19 @@ const ControlAdapterImagePreview = ({ isSmall, id }: Props) => { const dispatch = useAppDispatch(); const { t } = useTranslation(); - const { pendingControlImages, autoAddBoardId } = useAppSelector(selector); + const { pendingControlImages, autoAddBoardId, isConnected } = + useAppSelector(selector); const activeTabName = useAppSelector(activeTabNameSelector); const [isMouseOverImage, setIsMouseOverImage] = useState(false); - const { currentData: controlImage } = useGetImageDTOQuery( - controlImageName ?? skipToken - ); + const { currentData: controlImage, isError: isErrorControlImage } = + useGetImageDTOQuery(controlImageName ?? skipToken); - const { currentData: processedControlImage } = useGetImageDTOQuery( - processedControlImageName ?? skipToken - ); + const { + currentData: processedControlImage, + isError: isErrorProcessedControlImage, + } = useGetImageDTOQuery(processedControlImageName ?? skipToken); const [changeIsIntermediate] = useChangeImageIsIntermediateMutation(); const [addToBoard] = useAddImageToBoardMutation(); @@ -158,6 +161,17 @@ const ControlAdapterImagePreview = ({ isSmall, id }: Props) => { !pendingControlImages.includes(id) && processorType !== 'none'; + useEffect(() => { + if (isConnected && (isErrorControlImage || isErrorProcessedControlImage)) { + handleResetControlImage(); + } + }, [ + handleResetControlImage, + isConnected, + isErrorControlImage, + isErrorProcessedControlImage, + ]); + return ( { const { nodeId, field } = props; const dispatch = useAppDispatch(); - - const { currentData: imageDTO } = useGetImageDTOQuery( + const isConnected = useAppSelector((state) => state.system.isConnected); + const { currentData: imageDTO, isError } = useGetImageDTOQuery( field.value?.image_name ?? skipToken ); @@ -67,6 +67,12 @@ const ImageFieldInputComponent = ( [nodeId, field.name] ); + useEffect(() => { + if (isConnected && isError) { + handleReset(); + } + }, [handleReset, isConnected, isError]); + return ( { const { initialImage } = state.generation; + const { isConnected } = state.system; + return { initialImage, isResetButtonDisabled: !initialImage, + isConnected, }; }, defaultSelectorOptions ); const InitialImage = () => { - const { initialImage } = useAppSelector(selector); + const dispatch = useAppDispatch(); + const { initialImage, isConnected } = useAppSelector(selector); - const { currentData: imageDTO } = useGetImageDTOQuery( + const { currentData: imageDTO, isError } = useGetImageDTOQuery( initialImage?.imageName ?? skipToken ); @@ -49,6 +54,13 @@ const InitialImage = () => { [] ); + useEffect(() => { + if (isError && isConnected) { + // The image doesn't exist, reset init image + dispatch(clearInitialImage()); + } + }, [dispatch, isConnected, isError]); + return ( Date: Thu, 30 Nov 2023 14:33:10 +0100 Subject: [PATCH 11/30] translationBot(ui): update translation files Updated by "Cleanup translation files" hook in Weblate. Co-authored-by: Hosted Weblate Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ Translation: InvokeAI/Web UI --- invokeai/frontend/web/public/locales/de.json | 3 +-- invokeai/frontend/web/public/locales/zh_CN.json | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/invokeai/frontend/web/public/locales/de.json b/invokeai/frontend/web/public/locales/de.json index 72809cc19d..b67663d6d2 100644 --- a/invokeai/frontend/web/public/locales/de.json +++ b/invokeai/frontend/web/public/locales/de.json @@ -803,8 +803,7 @@ "canny": "Canny", "hedDescription": "Ganzheitlich verschachtelte Kantenerkennung", "scribble": "Scribble", - "maxFaces": "Maximal Anzahl Gesichter", - "unstarImage": "Markierung aufheben" + "maxFaces": "Maximal Anzahl Gesichter" }, "queue": { "status": "Status", diff --git a/invokeai/frontend/web/public/locales/zh_CN.json b/invokeai/frontend/web/public/locales/zh_CN.json index 03838520d3..24105f2b40 100644 --- a/invokeai/frontend/web/public/locales/zh_CN.json +++ b/invokeai/frontend/web/public/locales/zh_CN.json @@ -1137,8 +1137,7 @@ "openPose": "Openpose", "controlAdapter_other": "Control Adapters", "lineartAnime": "Lineart Anime", - "canny": "Canny", - "unstarImage": "取消收藏图像" + "canny": "Canny" }, "queue": { "status": "状态", From 29eade48801e4b60773ed556ec9412f8c7af52ea Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 17 Nov 2023 18:36:28 -0500 Subject: [PATCH 12/30] Add nodes for tile splitting and merging. The main motivation for these nodes is for use in tiled upscaling workflows. --- invokeai/app/invocations/tiles.py | 162 +++++++++++++++++++++++++++++ invokeai/backend/tiles/__init__.py | 0 invokeai/backend/tiles/tiles.py | 155 +++++++++++++++++++++++++++ invokeai/backend/tiles/utils.py | 36 +++++++ 4 files changed, 353 insertions(+) create mode 100644 invokeai/app/invocations/tiles.py create mode 100644 invokeai/backend/tiles/__init__.py create mode 100644 invokeai/backend/tiles/tiles.py create mode 100644 invokeai/backend/tiles/utils.py diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py new file mode 100644 index 0000000000..acc87a7864 --- /dev/null +++ b/invokeai/app/invocations/tiles.py @@ -0,0 +1,162 @@ +import numpy as np +from PIL import Image +from pydantic import BaseModel + +from invokeai.app.invocations.baseinvocation import ( + BaseInvocation, + BaseInvocationOutput, + InputField, + InvocationContext, + OutputField, + WithMetadata, + WithWorkflow, + invocation, + invocation_output, +) +from invokeai.app.invocations.primitives import ImageField, ImageOutput +from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin +from invokeai.backend.tiles.tiles import calc_tiles, merge_tiles_with_linear_blending +from invokeai.backend.tiles.utils import Tile + +# TODO(ryand): Is this important? +_DIMENSION_MULTIPLE_OF = 8 + + +class TileWithImage(BaseModel): + tile: Tile + image: ImageField + + +@invocation_output("calc_tiles_output") +class CalcTilesOutput(BaseInvocationOutput): + # TODO(ryand): Add description from FieldDescriptions. + tiles: list[Tile] = OutputField(description="") + + +@invocation("calculate_tiles", title="Calculate Tiles", tags=["tiles"], category="tiles", version="1.0.0") +class CalcTiles(BaseInvocation): + """TODO(ryand)""" + + # Inputs + image_height: int = InputField(ge=1) + image_width: int = InputField(ge=1) + tile_height: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) + tile_width: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) + overlap: int = InputField(ge=0, multiple_of=_DIMENSION_MULTIPLE_OF, default=64) + + def invoke(self, context: InvocationContext) -> CalcTilesOutput: + tiles = calc_tiles( + image_height=self.image_height, + image_width=self.image_width, + tile_height=self.tile_height, + tile_width=self.tile_width, + overlap=self.overlap, + ) + return CalcTilesOutput(tiles=tiles) + + +@invocation_output("tile_to_properties_output") +class TileToPropertiesOutput(BaseInvocationOutput): + # TODO(ryand): Add descriptions. + coords_top: int = OutputField(description="") + coords_bottom: int = OutputField(description="") + coords_left: int = OutputField(description="") + coords_right: int = OutputField(description="") + + overlap_top: int = OutputField(description="") + overlap_bottom: int = OutputField(description="") + overlap_left: int = OutputField(description="") + overlap_right: int = OutputField(description="") + + +@invocation("tile_to_properties") +class TileToProperties(BaseInvocation): + """Split a Tile into its individual properties.""" + + tile: Tile = InputField() + + def invoke(self, context: InvocationContext) -> TileToPropertiesOutput: + return TileToPropertiesOutput( + coords_top=self.tile.coords.top, + coords_bottom=self.tile.coords.bottom, + coords_left=self.tile.coords.left, + coords_right=self.tile.coords.right, + overlap_top=self.tile.overlap.top, + overlap_bottom=self.tile.overlap.bottom, + overlap_left=self.tile.overlap.left, + overlap_right=self.tile.overlap.right, + ) + + +# HACK(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve order. +# Can this be fixed? + + +@invocation_output("pair_tile_image_output") +class PairTileImageOutput(BaseInvocationOutput): + tile_with_image: TileWithImage = OutputField(description="") + + +@invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0") +class PairTileImage(BaseInvocation): + image: ImageField = InputField() + tile: Tile = InputField() + + def invoke(self, context: InvocationContext) -> PairTileImageOutput: + return PairTileImageOutput( + tile_with_image=TileWithImage( + tile=self.tile, + image=self.image, + ) + ) + + +@invocation("merge_tiles_to_image", title="Merge Tiles To Image", tags=["tiles"], category="tiles", version="1.0.0") +class MergeTilesToImage(BaseInvocation, WithMetadata, WithWorkflow): + """TODO(ryand)""" + + # Inputs + image_height: int = InputField(ge=1) + image_width: int = InputField(ge=1) + tiles_with_images: list[TileWithImage] = InputField() + blend_amount: int = InputField(ge=0) + + def invoke(self, context: InvocationContext) -> ImageOutput: + images = [twi.image for twi in self.tiles_with_images] + tiles = [twi.tile for twi in self.tiles_with_images] + + # Get all tile images for processing. + # TODO(ryand): It pains me that we spend time PNG decoding each tile from disk when they almost certainly + # existed in memory at an earlier point in the graph. + tile_np_images: list[np.ndarray] = [] + for image in images: + pil_image = context.services.images.get_pil_image(image.image_name) + pil_image = pil_image.convert("RGB") + tile_np_images.append(np.array(pil_image)) + + # Prepare the output image buffer. + # Check the first tile to determine how many image channels are expected in the output. + channels = tile_np_images[0].shape[-1] + dtype = tile_np_images[0].dtype + np_image = np.zeros(shape=(self.image_height, self.image_width, channels), dtype=dtype) + + merge_tiles_with_linear_blending( + dst_image=np_image, tiles=tiles, tile_images=tile_np_images, blend_amount=self.blend_amount + ) + pil_image = Image.fromarray(np_image) + + image_dto = context.services.images.create( + image=pil_image, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.GENERAL, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + metadata=self.metadata, + workflow=self.workflow, + ) + return ImageOutput( + image=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) diff --git a/invokeai/backend/tiles/__init__.py b/invokeai/backend/tiles/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py new file mode 100644 index 0000000000..566381d1ff --- /dev/null +++ b/invokeai/backend/tiles/tiles.py @@ -0,0 +1,155 @@ +import math + +import numpy as np + +from invokeai.backend.tiles.utils import TBLR, Tile, paste + +# TODO(ryand) +# Test the following: +# - Tile too big in x, y +# - Overlap too big in x, y +# - Single tile fits +# - Multiple tiles fit perfectly +# - Not evenly divisible by tile size(with overlap) + + +def calc_tiles_with_overlap( + image_height: int, image_width: int, tile_height: int, tile_width: int, overlap: int = 0 +) -> list[Tile]: + """Calculate the tile coordinates for a given image shape under a simple tiling scheme with overlaps. + + Args: + image_height (int): The image height in px. + image_width (int): The image width in px. + tile_height (int): The tile height in px. All tiles will have this height. + tile_width (int): The tile width in px. All tiles will have this width. + overlap (int, optional): The target overlap between adjacent tiles. If the tiles do not evenly cover the image + shape, then the last row/column of tiles will overlap more than this. Defaults to 0. + + Returns: + list[Tile]: A list of tiles that cover the image shape. Ordered from left-to-right, top-to-bottom. + """ + assert image_height >= tile_height + assert image_width >= tile_width + assert overlap < tile_height + assert overlap < tile_width + + non_overlap_per_tile_height = tile_height - overlap + non_overlap_per_tile_width = tile_width - overlap + + num_tiles_y = math.ceil((image_height - overlap) / non_overlap_per_tile_height) + num_tiles_x = math.ceil((image_width - overlap) / non_overlap_per_tile_width) + + # Calculate tile coordinates and overlaps. + tiles: list[Tile] = [] + for tile_idx_y in range(num_tiles_y): + for tile_idx_x in range(num_tiles_x): + tile = Tile( + coords=TBLR( + top=tile_idx_y * non_overlap_per_tile_height, + bottom=tile_idx_y * non_overlap_per_tile_height + tile_height, + left=tile_idx_x * non_overlap_per_tile_width, + right=tile_idx_x * non_overlap_per_tile_width + tile_width, + ), + overlap=TBLR( + top=0 if tile_idx_y == 0 else overlap, + bottom=overlap, + left=0 if tile_idx_x == 0 else overlap, + right=overlap, + ), + ) + + if tile.coords.bottom > image_height: + # If this tile would go off the bottom of the image, shift it so that it is aligned with the bottom + # of the image. + tile.coords.bottom = image_height + tile.coords.top = image_height - tile_height + tile.overlap.bottom = 0 + # Note that this could result in a large overlap between this tile and the one above it. + top_neighbor_bottom = (tile_idx_y - 1) * non_overlap_per_tile_height + tile_height + tile.overlap.top = top_neighbor_bottom - tile.coords.top + + if tile.coords.right > image_width: + # If this tile would go off the right edge of the image, shift it so that it is aligned with the + # right edge of the image. + tile.coords.right = image_width + tile.coords.left = image_width - tile_width + tile.overlap.right = 0 + # Note that this could result in a large overlap between this tile and the one to its left. + left_neighbor_right = (tile_idx_x - 1) * non_overlap_per_tile_width + tile_width + tile.overlap.left = left_neighbor_right - tile.coords.left + + tiles.append(tile) + + return tiles + + +# TODO(ryand): +# - Test with blend_amount=0 +# - Test tiles that go off of the dst_image. +# - Test mismatched tiles and tile_images lengths. +# - Test mismatched + + +def merge_tiles_with_linear_blending( + dst_image: np.ndarray, tiles: list[Tile], tile_images: list[np.ndarray], blend_amount: int +): + """Merge a set of image tiles into `dst_image` with linear blending between the tiles. + + We expect every tile edge to either: + 1) have an overlap of 0, because it is aligned with the image edge, or + 2) have an overlap >= blend_amount. + If neither of these conditions are satisfied, we raise an exception. + + The linear blending is centered at the halfway point of the overlap between adjacent tiles. + + Args: + dst_image (np.ndarray): The destination image. Shape: (H, W, C). + tiles (list[Tile]): The list of tiles describing the locations of the respective `tile_images`. + tile_images (list[np.ndarray]): The tile images to merge into `dst_image`. + blend_amount (int): The amount of blending (in px) between adjacent overlapping tiles. + """ + # Sort tiles and images first by left x coordinate, then by top y coordinate. During tile processing, we want to + # iterate over tiles left-to-right, top-to-bottom. + tiles_and_images = list(zip(tiles, tile_images, strict=True)) + tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.left) + tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.top) + + # Prepare 1D linear gradients for blending. + gradient_left_x = np.linspace(start=0.0, stop=1.0, num=blend_amount) + gradient_top_y = np.linspace(start=0.0, stop=1.0, num=blend_amount) + # Convert shape: (blend_amount, ) -> (blend_amount, 1). The extra dimension enables the gradient to be applied + # to a 2D image via broadcasting. Note that no additional dimension is needed on gradient_left_x for + # broadcasting to work correctly. + gradient_top_y = np.expand_dims(gradient_top_y, axis=1) + + for tile, tile_image in tiles_and_images: + # We expect tiles to be written left-to-right, top-to-bottom. We construct a mask that applies linear blending + # to the top and to the left of the current tile. The inverse linear blending is automatically applied to the + # bottom/right of the tiles that have already been pasted by the paste(...) operation. + tile_height, tile_width, _ = tile_image.shape + mask = np.ones(shape=(tile_height, tile_width), dtype=np.float64) + # Top blending: + if tile.overlap.top > 0: + assert tile.overlap.top >= blend_amount + # Center the blending gradient in the middle of the overlap. + blend_start_top = tile.overlap.top // 2 - blend_amount // 2 + # The region above the blending region is masked completely. + mask[:blend_start_top, :] = 0.0 + # Apply the blend gradient to the mask. Note that we use `*=` rather than `=` to achieve more natural + # behavior on the corners where vertical and horizontal blending gradients overlap. + mask[blend_start_top : blend_start_top + blend_amount, :] *= gradient_top_y + # HACK(ryand): For debugging + # tile_image[blend_start_top : blend_start_top + blend_amount, :] = 0 + + # Left blending: + # (See comments under 'top blending' for an explanation of the logic.) + if tile.overlap.left > 0: + assert tile.overlap.left >= blend_amount + blend_start_left = tile.overlap.left // 2 - blend_amount // 2 + mask[:, :blend_start_left] = 0.0 + mask[:, blend_start_left : blend_start_left + blend_amount] *= gradient_left_x + # HACK(ryand): For debugging + # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 + + paste(dst_image=dst_image, src_image=tile_image, box=tile.coords, mask=mask) diff --git a/invokeai/backend/tiles/utils.py b/invokeai/backend/tiles/utils.py new file mode 100644 index 0000000000..cf8e926aa5 --- /dev/null +++ b/invokeai/backend/tiles/utils.py @@ -0,0 +1,36 @@ +from typing import Optional + +import numpy as np +from pydantic import BaseModel, Field + + +class TBLR(BaseModel): + top: int + bottom: int + left: int + right: int + + +class Tile(BaseModel): + coords: TBLR = Field(description="The coordinates of this tile relative to its parent image.") + overlap: TBLR = Field(description="The amount of overlap with adjacent tiles on each side of this tile.") + + +def paste(dst_image: np.ndarray, src_image: np.ndarray, box: TBLR, mask: Optional[np.ndarray] = None): + """Paste a source image into a destination image. + + Args: + dst_image (torch.Tensor): The destination image to paste into. Shape: (H, W, C). + src_image (torch.Tensor): The source image to paste. Shape: (H, W, C). H and W must be compatible with 'box'. + box (TBLR): Box defining the region in the 'dst_image' where 'src_image' will be pasted. + mask (Optional[torch.Tensor]): A mask that defines the blending between 'src_image' and 'dst_image'. + Range: [0.0, 1.0], Shape: (H, W). The output is calculate per-pixel according to + `src * mask + dst * (1 - mask)`. + """ + + if mask is None: + dst_image[box.top : box.bottom, box.left : box.right] = src_image + else: + mask = np.expand_dims(mask, -1) + dst_image_box = dst_image[box.top : box.bottom, box.left : box.right] + dst_image[box.top : box.bottom, box.left : box.right] = src_image * mask + dst_image_box * (1.0 - mask) From 1c8ff0ae669281fce7321482e9e2c534ddeb78e3 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 11:53:40 -0500 Subject: [PATCH 13/30] Add unit tests for tile paste(...) util function. --- tests/backend/tiles/test_utils.py | 101 ++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 tests/backend/tiles/test_utils.py diff --git a/tests/backend/tiles/test_utils.py b/tests/backend/tiles/test_utils.py new file mode 100644 index 0000000000..bbef233ca5 --- /dev/null +++ b/tests/backend/tiles/test_utils.py @@ -0,0 +1,101 @@ +import numpy as np +import pytest + +from invokeai.backend.tiles.utils import TBLR, paste + + +def test_paste_no_mask_success(): + """Test successful paste with mask=None.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + + # Create src_image with a pattern that can be used to validate that it was pasted correctly. + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + src_image[0, :, 0] = 1 # Row of 1s in channel 0. + src_image[:, 0, 1] = 2 # Column of 2s in channel 1. + + # Paste in bottom-center of dst_image. + box = TBLR(top=2, bottom=5, left=1, right=4) + + # Construct expected output image. + expected_output = np.zeros((5, 5, 3), dtype=np.uint8) + expected_output[2, 1:4, 0] = 1 + expected_output[2:5, 1, 1] = 2 + + paste(dst_image=dst_image, src_image=src_image, box=box) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +def test_paste_with_mask_success(): + """Test successful paste with a mask.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + + # Create src_image with a pattern that can be used to validate that it was pasted correctly. + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + src_image[0, :, 0] = 64 # Row of 64s in channel 0. + src_image[:, 0, 1] = 128 # Column of 128s in channel 1. + + # Paste in bottom-center of dst_image. + box = TBLR(top=2, bottom=5, left=1, right=4) + + # Create a mask that blends the top-left corner of 'src_image' at 50%, and ignores the rest of src_image. + mask = np.zeros((3, 3)) + mask[0, 0] = 0.5 + + # Construct expected output image. + expected_output = np.zeros((5, 5, 3), dtype=np.uint8) + expected_output[2, 1, 0] = 32 + expected_output[2, 1, 1] = 64 + + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +@pytest.mark.parametrize("use_mask", [True, False]) +def test_paste_box_overflows_dst_image(use_mask: bool): + """Test that an exception is raised if 'box' overflows the 'dst_image'.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + mask = None + if use_mask: + mask = np.zeros((3, 3)) + + # Construct box that overflows bottom of dst_image. + top = 3 + left = 0 + box = TBLR(top=top, bottom=top + src_image.shape[0], left=left, right=left + src_image.shape[1]) + + with pytest.raises(ValueError): + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) + + +@pytest.mark.parametrize("use_mask", [True, False]) +def test_paste_src_image_does_not_match_box(use_mask: bool): + """Test that an exception is raised if the 'src_image' shape does not match the 'box' dimensions.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + mask = None + if use_mask: + mask = np.zeros((3, 3)) + + # Construct box that is smaller than src_image. + box = TBLR(top=0, bottom=src_image.shape[0] - 1, left=0, right=src_image.shape[1]) + + with pytest.raises(ValueError): + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) + + +def test_paste_mask_does_not_match_src_image(): + """Test that an exception is raised if the 'mask' shape is different than the 'src_image' shape.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + + # Construct mask that is smaller than the src_image. + mask = np.zeros((src_image.shape[0] - 1, src_image.shape[1])) + + # Construct box that matches src_image shape. + box = TBLR(top=0, bottom=src_image.shape[0], left=0, right=src_image.shape[1]) + + with pytest.raises(ValueError): + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) From 65a16be299857109aa3c009c386083827b8b5c19 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 14:23:49 -0500 Subject: [PATCH 14/30] Add unit tests for calc_tiles_with_overlap(...) and fix a bug in its implementation. --- invokeai/backend/tiles/tiles.py | 52 ++++++++++--------- invokeai/backend/tiles/utils.py | 11 ++++ tests/backend/tiles/test_tiles.py | 84 +++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 23 deletions(-) create mode 100644 tests/backend/tiles/test_tiles.py diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py index 566381d1ff..5e5c4b7050 100644 --- a/invokeai/backend/tiles/tiles.py +++ b/invokeai/backend/tiles/tiles.py @@ -1,17 +1,10 @@ import math +from typing import Union import numpy as np from invokeai.backend.tiles.utils import TBLR, Tile, paste -# TODO(ryand) -# Test the following: -# - Tile too big in x, y -# - Overlap too big in x, y -# - Single tile fits -# - Multiple tiles fit perfectly -# - Not evenly divisible by tile size(with overlap) - def calc_tiles_with_overlap( image_height: int, image_width: int, tile_height: int, tile_width: int, overlap: int = 0 @@ -40,8 +33,10 @@ def calc_tiles_with_overlap( num_tiles_y = math.ceil((image_height - overlap) / non_overlap_per_tile_height) num_tiles_x = math.ceil((image_width - overlap) / non_overlap_per_tile_width) - # Calculate tile coordinates and overlaps. + # tiles[y * num_tiles_x + x] is the tile for the y'th row, x'th column. tiles: list[Tile] = [] + + # Calculate tile coordinates. (Ignore overlap values for now.) for tile_idx_y in range(num_tiles_y): for tile_idx_x in range(num_tiles_x): tile = Tile( @@ -51,12 +46,7 @@ def calc_tiles_with_overlap( left=tile_idx_x * non_overlap_per_tile_width, right=tile_idx_x * non_overlap_per_tile_width + tile_width, ), - overlap=TBLR( - top=0 if tile_idx_y == 0 else overlap, - bottom=overlap, - left=0 if tile_idx_x == 0 else overlap, - right=overlap, - ), + overlap=TBLR(top=0, bottom=0, left=0, right=0), ) if tile.coords.bottom > image_height: @@ -64,23 +54,39 @@ def calc_tiles_with_overlap( # of the image. tile.coords.bottom = image_height tile.coords.top = image_height - tile_height - tile.overlap.bottom = 0 - # Note that this could result in a large overlap between this tile and the one above it. - top_neighbor_bottom = (tile_idx_y - 1) * non_overlap_per_tile_height + tile_height - tile.overlap.top = top_neighbor_bottom - tile.coords.top if tile.coords.right > image_width: # If this tile would go off the right edge of the image, shift it so that it is aligned with the # right edge of the image. tile.coords.right = image_width tile.coords.left = image_width - tile_width - tile.overlap.right = 0 - # Note that this could result in a large overlap between this tile and the one to its left. - left_neighbor_right = (tile_idx_x - 1) * non_overlap_per_tile_width + tile_width - tile.overlap.left = left_neighbor_right - tile.coords.left tiles.append(tile) + def get_tile_or_none(idx_y: int, idx_x: int) -> Union[Tile, None]: + if idx_y < 0 or idx_y > num_tiles_y or idx_x < 0 or idx_x > num_tiles_x: + return None + return tiles[idx_y * num_tiles_x + idx_x] + + # Iterate over tiles again and calculate overlaps. + for tile_idx_y in range(num_tiles_y): + for tile_idx_x in range(num_tiles_x): + cur_tile = get_tile_or_none(tile_idx_y, tile_idx_x) + top_neighbor_tile = get_tile_or_none(tile_idx_y - 1, tile_idx_x) + left_neighbor_tile = get_tile_or_none(tile_idx_y, tile_idx_x - 1) + + assert cur_tile is not None + + # Update cur_tile top-overlap and corresponding top-neighbor bottom-overlap. + if top_neighbor_tile is not None: + cur_tile.overlap.top = max(0, top_neighbor_tile.coords.bottom - cur_tile.coords.top) + top_neighbor_tile.overlap.bottom = cur_tile.overlap.top + + # Update cur_tile left-overlap and corresponding left-neighbor right-overlap. + if left_neighbor_tile is not None: + cur_tile.overlap.left = max(0, left_neighbor_tile.coords.right - cur_tile.coords.left) + left_neighbor_tile.overlap.right = cur_tile.overlap.left + return tiles diff --git a/invokeai/backend/tiles/utils.py b/invokeai/backend/tiles/utils.py index cf8e926aa5..4ad40ffa35 100644 --- a/invokeai/backend/tiles/utils.py +++ b/invokeai/backend/tiles/utils.py @@ -10,11 +10,22 @@ class TBLR(BaseModel): left: int right: int + def __eq__(self, other): + return ( + self.top == other.top + and self.bottom == other.bottom + and self.left == other.left + and self.right == other.right + ) + class Tile(BaseModel): coords: TBLR = Field(description="The coordinates of this tile relative to its parent image.") overlap: TBLR = Field(description="The amount of overlap with adjacent tiles on each side of this tile.") + def __eq__(self, other): + return self.coords == other.coords and self.overlap == other.overlap + def paste(dst_image: np.ndarray, src_image: np.ndarray, box: TBLR, mask: Optional[np.ndarray] = None): """Paste a source image into a destination image. diff --git a/tests/backend/tiles/test_tiles.py b/tests/backend/tiles/test_tiles.py new file mode 100644 index 0000000000..332ab15005 --- /dev/null +++ b/tests/backend/tiles/test_tiles.py @@ -0,0 +1,84 @@ +import pytest + +from invokeai.backend.tiles.tiles import calc_tiles_with_overlap +from invokeai.backend.tiles.utils import TBLR, Tile + +#################################### +# Test calc_tiles_with_overlap(...) +#################################### + + +def test_calc_tiles_with_overlap_single_tile(): + """Test calc_tiles_with_overlap() behavior when a single tile covers the image.""" + tiles = calc_tiles_with_overlap(image_height=512, image_width=1024, tile_height=512, tile_width=1024, overlap=64) + + expected_tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=1024), overlap=TBLR(top=0, bottom=0, left=0, right=0)) + ] + + assert tiles == expected_tiles + + +def test_calc_tiles_with_overlap_evenly_divisible(): + """Test calc_tiles_with_overlap() behavior when the image is evenly covered by multiple tiles.""" + # Parameters chosen so that image is evenly covered by 2 rows, 3 columns of tiles. + tiles = calc_tiles_with_overlap(image_height=576, image_width=1600, tile_height=320, tile_width=576, overlap=64) + + expected_tiles = [ + # Row 0 + Tile(coords=TBLR(top=0, bottom=320, left=0, right=576), overlap=TBLR(top=0, bottom=64, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=320, left=512, right=1088), overlap=TBLR(top=0, bottom=64, left=64, right=64)), + Tile(coords=TBLR(top=0, bottom=320, left=1024, right=1600), overlap=TBLR(top=0, bottom=64, left=64, right=0)), + # Row 1 + Tile(coords=TBLR(top=256, bottom=576, left=0, right=576), overlap=TBLR(top=64, bottom=0, left=0, right=64)), + Tile(coords=TBLR(top=256, bottom=576, left=512, right=1088), overlap=TBLR(top=64, bottom=0, left=64, right=64)), + Tile(coords=TBLR(top=256, bottom=576, left=1024, right=1600), overlap=TBLR(top=64, bottom=0, left=64, right=0)), + ] + + assert tiles == expected_tiles + + +def test_calc_tiles_with_overlap_not_evenly_divisible(): + """Test calc_tiles_with_overlap() behavior when the image requires 'uneven' overlaps to achieve proper coverage.""" + # Parameters chosen so that image is covered by 2 rows and 3 columns of tiles, with uneven overlaps. + tiles = calc_tiles_with_overlap(image_height=400, image_width=1200, tile_height=256, tile_width=512, overlap=64) + + expected_tiles = [ + # Row 0 + Tile(coords=TBLR(top=0, bottom=256, left=0, right=512), overlap=TBLR(top=0, bottom=112, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=256, left=448, right=960), overlap=TBLR(top=0, bottom=112, left=64, right=272)), + Tile(coords=TBLR(top=0, bottom=256, left=688, right=1200), overlap=TBLR(top=0, bottom=112, left=272, right=0)), + # Row 1 + Tile(coords=TBLR(top=144, bottom=400, left=0, right=512), overlap=TBLR(top=112, bottom=0, left=0, right=64)), + Tile( + coords=TBLR(top=144, bottom=400, left=448, right=960), overlap=TBLR(top=112, bottom=0, left=64, right=272) + ), + Tile( + coords=TBLR(top=144, bottom=400, left=688, right=1200), overlap=TBLR(top=112, bottom=0, left=272, right=0) + ), + ] + + assert tiles == expected_tiles + + +@pytest.mark.parametrize( + ["image_height", "image_width", "tile_height", "tile_width", "overlap", "raises"], + [ + (128, 128, 128, 128, 127, False), # OK + (128, 128, 128, 128, 0, False), # OK + (128, 128, 64, 64, 0, False), # OK + (128, 128, 129, 128, 0, True), # tile_height exceeds image_height. + (128, 128, 128, 129, 0, True), # tile_width exceeds image_width. + (128, 128, 64, 128, 64, True), # overlap equals tile_height. + (128, 128, 128, 64, 64, True), # overlap equals tile_width. + ], +) +def test_calc_tiles_with_overlap_input_validation( + image_height: int, image_width: int, tile_height: int, tile_width: int, overlap: int, raises: bool +): + """Test that calc_tiles_with_overlap() raises an exception if the inputs are invalid.""" + if raises: + with pytest.raises(AssertionError): + calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) + else: + calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) From 76b888de1769239ef072cc830a06c9d9bacfbc52 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 15:42:23 -0500 Subject: [PATCH 15/30] Add unit tests for merge_tiles_with_linear_blending(...). --- invokeai/backend/tiles/tiles.py | 11 +-- tests/backend/tiles/test_tiles.py | 142 +++++++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 10 deletions(-) diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py index 5e5c4b7050..3d64e3e145 100644 --- a/invokeai/backend/tiles/tiles.py +++ b/invokeai/backend/tiles/tiles.py @@ -90,13 +90,6 @@ def calc_tiles_with_overlap( return tiles -# TODO(ryand): -# - Test with blend_amount=0 -# - Test tiles that go off of the dst_image. -# - Test mismatched tiles and tile_images lengths. -# - Test mismatched - - def merge_tiles_with_linear_blending( dst_image: np.ndarray, tiles: list[Tile], tile_images: list[np.ndarray], blend_amount: int ): @@ -145,7 +138,7 @@ def merge_tiles_with_linear_blending( # Apply the blend gradient to the mask. Note that we use `*=` rather than `=` to achieve more natural # behavior on the corners where vertical and horizontal blending gradients overlap. mask[blend_start_top : blend_start_top + blend_amount, :] *= gradient_top_y - # HACK(ryand): For debugging + # For visual debugging: # tile_image[blend_start_top : blend_start_top + blend_amount, :] = 0 # Left blending: @@ -155,7 +148,7 @@ def merge_tiles_with_linear_blending( blend_start_left = tile.overlap.left // 2 - blend_amount // 2 mask[:, :blend_start_left] = 0.0 mask[:, blend_start_left : blend_start_left + blend_amount] *= gradient_left_x - # HACK(ryand): For debugging + # For visual debugging: # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 paste(dst_image=dst_image, src_image=tile_image, box=tile.coords, mask=mask) diff --git a/tests/backend/tiles/test_tiles.py b/tests/backend/tiles/test_tiles.py index 332ab15005..353e65d336 100644 --- a/tests/backend/tiles/test_tiles.py +++ b/tests/backend/tiles/test_tiles.py @@ -1,6 +1,7 @@ +import numpy as np import pytest -from invokeai.backend.tiles.tiles import calc_tiles_with_overlap +from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending from invokeai.backend.tiles.utils import TBLR, Tile #################################### @@ -82,3 +83,142 @@ def test_calc_tiles_with_overlap_input_validation( calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) else: calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) + + +############################################# +# Test merge_tiles_with_linear_blending(...) +############################################# + + +@pytest.mark.parametrize("blend_amount", [0, 32]) +def test_merge_tiles_with_linear_blending_horizontal(blend_amount: int): + """Test merge_tiles_with_linear_blending(...) behavior when merging horizontally.""" + # Initialize 2 tiles side-by-side. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=512, left=448, right=960), overlap=TBLR(top=0, bottom=0, left=64, right=0)), + ] + + dst_image = np.zeros((512, 960, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. Pixel values are set based on the tile index. + tile_images = [ + np.zeros((512, 512, 3)) + 64, + np.zeros((512, 512, 3)) + 128, + ] + + # Calculate expected output. + expected_output = np.zeros((512, 960, 3), dtype=np.uint8) + expected_output[:, : 480 - (blend_amount // 2), :] = 64 + if blend_amount > 0: + gradient = np.linspace(start=64, stop=128, num=blend_amount, dtype=np.uint8).reshape((1, blend_amount, 1)) + expected_output[:, 480 - (blend_amount // 2) : 480 + (blend_amount // 2), :] = gradient + expected_output[:, 480 + (blend_amount // 2) :, :] = 128 + + merge_tiles_with_linear_blending( + dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=blend_amount + ) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +@pytest.mark.parametrize("blend_amount", [0, 32]) +def test_merge_tiles_with_linear_blending_vertical(blend_amount: int): + """Test merge_tiles_with_linear_blending(...) behavior when merging vertically.""" + # Initialize 2 tiles stacked vertically. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=64, left=0, right=0)), + Tile(coords=TBLR(top=448, bottom=960, left=0, right=512), overlap=TBLR(top=64, bottom=0, left=0, right=0)), + ] + + dst_image = np.zeros((960, 512, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. Pixel values are set based on the tile index. + tile_images = [ + np.zeros((512, 512, 3)) + 64, + np.zeros((512, 512, 3)) + 128, + ] + + # Calculate expected output. + expected_output = np.zeros((960, 512, 3), dtype=np.uint8) + expected_output[: 480 - (blend_amount // 2), :, :] = 64 + if blend_amount > 0: + gradient = np.linspace(start=64, stop=128, num=blend_amount, dtype=np.uint8).reshape((blend_amount, 1, 1)) + expected_output[480 - (blend_amount // 2) : 480 + (blend_amount // 2), :, :] = gradient + expected_output[480 + (blend_amount // 2) :, :, :] = 128 + + merge_tiles_with_linear_blending( + dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=blend_amount + ) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +def test_merge_tiles_with_linear_blending_blend_amount_exceeds_vertical_overlap(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if 'blend_amount' exceeds the overlap between + any vertically adjacent tiles. + """ + # Initialize 2 tiles stacked vertically. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=64, left=0, right=0)), + Tile(coords=TBLR(top=448, bottom=960, left=0, right=512), overlap=TBLR(top=64, bottom=0, left=0, right=0)), + ] + + dst_image = np.zeros((960, 512, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. + tile_images = [np.zeros((512, 512, 3)), np.zeros((512, 512, 3))] + + # blend_amount=128 exceeds overlap of 64, so should raise exception. + with pytest.raises(AssertionError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=128) + + +def test_merge_tiles_with_linear_blending_blend_amount_exceeds_horizontal_overlap(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if 'blend_amount' exceeds the overlap between + any horizontally adjacent tiles. + """ + # Initialize 2 tiles side-by-side. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=512, left=448, right=960), overlap=TBLR(top=0, bottom=0, left=64, right=0)), + ] + + dst_image = np.zeros((512, 960, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. + tile_images = [np.zeros((512, 512, 3)), np.zeros((512, 512, 3))] + + # blend_amount=128 exceeds overlap of 64, so should raise exception. + with pytest.raises(AssertionError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=128) + + +def test_merge_tiles_with_linear_blending_tiles_overflow_dst_image(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if any of the tiles overflows the + dst_image. + """ + tiles = [Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=0))] + + dst_image = np.zeros((256, 512, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. + tile_images = [np.zeros((512, 512, 3))] + + with pytest.raises(ValueError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=0) + + +def test_merge_tiles_with_linear_blending_mismatched_list_lengths(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if the lengths of 'tiles' and 'tile_images' + do not match. + """ + tiles = [Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=0))] + + dst_image = np.zeros((256, 512, 3), dtype=np.uint8) + + # tile_images is longer than tiles, so should cause an exception. + tile_images = [np.zeros((512, 512, 3)), np.zeros((512, 512, 3))] + + with pytest.raises(ValueError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=0) From 7f816c9243d5522173d5ee9de9c2dc0e87844991 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 17:18:13 -0500 Subject: [PATCH 16/30] Tidy up tiles invocations, add documentation. --- invokeai/app/invocations/tiles.py | 92 ++++++++++++++++--------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index acc87a7864..c6499c45d6 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -15,65 +15,65 @@ from invokeai.app.invocations.baseinvocation import ( ) from invokeai.app.invocations.primitives import ImageField, ImageOutput from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin -from invokeai.backend.tiles.tiles import calc_tiles, merge_tiles_with_linear_blending +from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending from invokeai.backend.tiles.utils import Tile -# TODO(ryand): Is this important? -_DIMENSION_MULTIPLE_OF = 8 - class TileWithImage(BaseModel): tile: Tile image: ImageField -@invocation_output("calc_tiles_output") -class CalcTilesOutput(BaseInvocationOutput): - # TODO(ryand): Add description from FieldDescriptions. - tiles: list[Tile] = OutputField(description="") +@invocation_output("calculate_image_tiles_output") +class CalculateImageTilesOutput(BaseInvocationOutput): + tiles: list[Tile] = OutputField(description="The tiles coordinates that cover a particular image shape.") -@invocation("calculate_tiles", title="Calculate Tiles", tags=["tiles"], category="tiles", version="1.0.0") -class CalcTiles(BaseInvocation): - """TODO(ryand)""" +@invocation("calculate_image_tiles", title="Calculate Image Tiles", tags=["tiles"], category="tiles", version="1.0.0") +class CalculateImageTiles(BaseInvocation): + """Calculate the coordinates and overlaps of tiles that cover a target image shape.""" - # Inputs - image_height: int = InputField(ge=1) - image_width: int = InputField(ge=1) - tile_height: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) - tile_width: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) - overlap: int = InputField(ge=0, multiple_of=_DIMENSION_MULTIPLE_OF, default=64) + image_height: int = InputField( + ge=1, default=1024, description="The image height, in pixels, to calculate tiles for." + ) + image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.") + tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.") + tile_width: int = InputField(ge=1, default=576, description="The tile width, in pixels.") + overlap: int = InputField( + ge=0, + default=128, + description="The target overlap, in pixels, between adjacent tiles. Adjacent tiles will overlap by at least this amount", + ) - def invoke(self, context: InvocationContext) -> CalcTilesOutput: - tiles = calc_tiles( + def invoke(self, context: InvocationContext) -> CalculateImageTilesOutput: + tiles = calc_tiles_with_overlap( image_height=self.image_height, image_width=self.image_width, tile_height=self.tile_height, tile_width=self.tile_width, overlap=self.overlap, ) - return CalcTilesOutput(tiles=tiles) + return CalculateImageTilesOutput(tiles=tiles) @invocation_output("tile_to_properties_output") class TileToPropertiesOutput(BaseInvocationOutput): - # TODO(ryand): Add descriptions. - coords_top: int = OutputField(description="") - coords_bottom: int = OutputField(description="") - coords_left: int = OutputField(description="") - coords_right: int = OutputField(description="") + coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.") + coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.") + coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.") + coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.") - overlap_top: int = OutputField(description="") - overlap_bottom: int = OutputField(description="") - overlap_left: int = OutputField(description="") - overlap_right: int = OutputField(description="") + overlap_top: int = OutputField(description="Overlap between this tile and its top neighbor.") + overlap_bottom: int = OutputField(description="Overlap between this tile and its bottom neighbor.") + overlap_left: int = OutputField(description="Overlap between this tile and its left neighbor.") + overlap_right: int = OutputField(description="Overlap between this tile and its right neighbor.") -@invocation("tile_to_properties") +@invocation("tile_to_properties", title="Tile to Properties", tags=["tiles"], category="tiles", version="1.0.0") class TileToProperties(BaseInvocation): """Split a Tile into its individual properties.""" - tile: Tile = InputField() + tile: Tile = InputField(description="The tile to split into properties.") def invoke(self, context: InvocationContext) -> TileToPropertiesOutput: return TileToPropertiesOutput( @@ -88,19 +88,20 @@ class TileToProperties(BaseInvocation): ) -# HACK(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve order. -# Can this be fixed? - - @invocation_output("pair_tile_image_output") class PairTileImageOutput(BaseInvocationOutput): - tile_with_image: TileWithImage = OutputField(description="") + tile_with_image: TileWithImage = OutputField(description="A tile description with its corresponding image.") @invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0") class PairTileImage(BaseInvocation): - image: ImageField = InputField() - tile: Tile = InputField() + """Pair an image with its tile properties.""" + + # TODO(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve + # order. Can this be fixed? + + image: ImageField = InputField(description="The tile image.") + tile: Tile = InputField(description="The tile properties.") def invoke(self, context: InvocationContext) -> PairTileImageOutput: return PairTileImageOutput( @@ -111,15 +112,18 @@ class PairTileImage(BaseInvocation): ) -@invocation("merge_tiles_to_image", title="Merge Tiles To Image", tags=["tiles"], category="tiles", version="1.0.0") +@invocation("merge_tiles_to_image", title="Merge Tiles to Image", tags=["tiles"], category="tiles", version="1.0.0") class MergeTilesToImage(BaseInvocation, WithMetadata, WithWorkflow): - """TODO(ryand)""" + """Merge multiple tile images into a single image.""" # Inputs - image_height: int = InputField(ge=1) - image_width: int = InputField(ge=1) - tiles_with_images: list[TileWithImage] = InputField() - blend_amount: int = InputField(ge=0) + image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") + image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") + tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") + blend_amount: int = InputField( + ge=0, + description="The amount to blend adjacent tiles in pixels. Must be <= the amount of overlap between adjacent tiles.", + ) def invoke(self, context: InvocationContext) -> ImageOutput: images = [twi.image for twi in self.tiles_with_images] From 67540c9ee064b1c4f5e9aa1c71df09be6300bbbd Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 23 Nov 2023 10:52:03 -0500 Subject: [PATCH 17/30] (minor) Add 'Invocation' suffix to all tiling node classes. --- invokeai/app/invocations/tiles.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index c6499c45d6..927e99be64 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -30,7 +30,7 @@ class CalculateImageTilesOutput(BaseInvocationOutput): @invocation("calculate_image_tiles", title="Calculate Image Tiles", tags=["tiles"], category="tiles", version="1.0.0") -class CalculateImageTiles(BaseInvocation): +class CalculateImageTilesInvocation(BaseInvocation): """Calculate the coordinates and overlaps of tiles that cover a target image shape.""" image_height: int = InputField( @@ -70,7 +70,7 @@ class TileToPropertiesOutput(BaseInvocationOutput): @invocation("tile_to_properties", title="Tile to Properties", tags=["tiles"], category="tiles", version="1.0.0") -class TileToProperties(BaseInvocation): +class TileToPropertiesInvocation(BaseInvocation): """Split a Tile into its individual properties.""" tile: Tile = InputField(description="The tile to split into properties.") @@ -94,7 +94,7 @@ class PairTileImageOutput(BaseInvocationOutput): @invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0") -class PairTileImage(BaseInvocation): +class PairTileImageInvocation(BaseInvocation): """Pair an image with its tile properties.""" # TODO(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve @@ -113,7 +113,7 @@ class PairTileImage(BaseInvocation): @invocation("merge_tiles_to_image", title="Merge Tiles to Image", tags=["tiles"], category="tiles", version="1.0.0") -class MergeTilesToImage(BaseInvocation, WithMetadata, WithWorkflow): +class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs From 843f2d71d663ac95f970645489fbc2f7f74be9fb Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 11:02:10 -0500 Subject: [PATCH 18/30] Copy CropLatentsInvocation from https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80. --- invokeai/app/invocations/latent.py | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ab59b41865..26294ed7f7 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1166,3 +1166,56 @@ class BlendLatentsInvocation(BaseInvocation): # context.services.latents.set(name, resized_latents) context.services.latents.save(name, blended_latents) return build_latents_output(latents_name=name, latents=blended_latents) + + +@invocation( + "lcrop", + title="Crop Latents", + tags=["latents", "crop"], + category="latents", + version="1.0.0", +) +class CropLatentsInvocation(BaseInvocation): + """Crops latents""" + + latents: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + width: int = InputField( + ge=64, + multiple_of=_downsampling_factor, + description=FieldDescriptions.width, + ) + height: int = InputField( + ge=64, + multiple_of=_downsampling_factor, + description=FieldDescriptions.width, + ) + x_offset: int = InputField( + ge=0, + multiple_of=_downsampling_factor, + description="x-coordinate", + ) + y_offset: int = InputField( + ge=0, + multiple_of=_downsampling_factor, + description="y-coordinate", + ) + + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = context.services.latents.get(self.latents.latents_name) + + x1 = self.x_offset // _downsampling_factor + y1 = self.y_offset // _downsampling_factor + x2 = x1 + (self.width // _downsampling_factor) + y2 = y1 + (self.height // _downsampling_factor) + + cropped_latents = latents[:, :, y1:y2, x1:x2] + + # resized_latents = resized_latents.to("cpu") + + name = f"{context.graph_execution_state_id}__{self.id}" + context.services.latents.save(name, cropped_latents) + + return build_latents_output(latents_name=name, latents=cropped_latents) From 18c6ff427ec2eabf7721ad078cd4179edf795d00 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 11:12:15 -0500 Subject: [PATCH 19/30] Use LATENT_SCALE_FACTOR = 8 constant in CropLatentsInvocation. --- invokeai/app/invocations/latent.py | 34 ++++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 26294ed7f7..49ffa1f7e9 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -79,6 +79,12 @@ DEFAULT_PRECISION = choose_precision(choose_torch_device()) SAMPLER_NAME_VALUES = Literal[tuple(SCHEDULER_MAP.keys())] +# HACK: Many nodes are currently hard-coded to use a fixed latent scale factor of 8. This is fragile, and will need to +# be addressed if future models use a different latent scale factor. Also, note that there may be places where the scale +# factor is hard-coded to a literal '8' rather than using this constant. +# The ratio of image:latent dimensions is LATENT_SCALE_FACTOR:1, or 8:1. +LATENT_SCALE_FACTOR = 8 + @invocation_output("scheduler_output") class SchedulerOutput(BaseInvocationOutput): @@ -394,9 +400,9 @@ class DenoiseLatentsInvocation(BaseInvocation): exit_stack: ExitStack, do_classifier_free_guidance: bool = True, ) -> List[ControlNetData]: - # assuming fixed dimensional scaling of 8:1 for image:latents - control_height_resize = latents_shape[2] * 8 - control_width_resize = latents_shape[3] * 8 + # Assuming fixed dimensional scaling of LATENT_SCALE_FACTOR. + control_height_resize = latents_shape[2] * LATENT_SCALE_FACTOR + control_width_resize = latents_shape[3] * LATENT_SCALE_FACTOR if control_input is None: control_list = None elif isinstance(control_input, list) and len(control_input) == 0: @@ -909,12 +915,12 @@ class ResizeLatentsInvocation(BaseInvocation): ) width: int = InputField( ge=64, - multiple_of=8, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) height: int = InputField( ge=64, - multiple_of=8, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode) @@ -928,7 +934,7 @@ class ResizeLatentsInvocation(BaseInvocation): resized_latents = torch.nn.functional.interpolate( latents.to(device), - size=(self.height // 8, self.width // 8), + size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR), mode=self.mode, antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False, ) @@ -1184,32 +1190,32 @@ class CropLatentsInvocation(BaseInvocation): ) width: int = InputField( ge=64, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) height: int = InputField( ge=64, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) x_offset: int = InputField( ge=0, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description="x-coordinate", ) y_offset: int = InputField( ge=0, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description="y-coordinate", ) def invoke(self, context: InvocationContext) -> LatentsOutput: latents = context.services.latents.get(self.latents.latents_name) - x1 = self.x_offset // _downsampling_factor - y1 = self.y_offset // _downsampling_factor - x2 = x1 + (self.width // _downsampling_factor) - y2 = y1 + (self.height // _downsampling_factor) + x1 = self.x_offset // LATENT_SCALE_FACTOR + y1 = self.y_offset // LATENT_SCALE_FACTOR + x2 = x1 + (self.width // LATENT_SCALE_FACTOR) + y2 = y1 + (self.height // LATENT_SCALE_FACTOR) cropped_latents = latents[:, :, y1:y2, x1:x2] From 7cab51745b8684e9737ccda334b8191c27272419 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 11:30:00 -0500 Subject: [PATCH 20/30] Improve documentation of CropLatentsInvocation. --- invokeai/app/invocations/latent.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 49ffa1f7e9..ad2de0d9ae 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1182,31 +1182,33 @@ class BlendLatentsInvocation(BaseInvocation): version="1.0.0", ) class CropLatentsInvocation(BaseInvocation): - """Crops latents""" + """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be + divisible by the latent scale factor of 8. + """ latents: LatentsField = InputField( description=FieldDescriptions.latents, input=Input.Connection, ) width: int = InputField( - ge=64, + ge=1, multiple_of=LATENT_SCALE_FACTOR, - description=FieldDescriptions.width, + description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) height: int = InputField( - ge=64, + ge=1, multiple_of=LATENT_SCALE_FACTOR, - description=FieldDescriptions.width, + description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) x_offset: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, - description="x-coordinate", + description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) y_offset: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, - description="y-coordinate", + description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) def invoke(self, context: InvocationContext) -> LatentsOutput: From 9b863fb9bcd5038324ae40ac0afdab5f8926570e Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 12:05:55 -0500 Subject: [PATCH 21/30] Rename CropLatentsInvocation -> CropLatentsCoreInvocation to prevent conflict with custom node. And other minor tidying. --- invokeai/app/invocations/latent.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ad2de0d9ae..e48d7458d4 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1174,14 +1174,18 @@ class BlendLatentsInvocation(BaseInvocation): return build_latents_output(latents_name=name, latents=blended_latents) +# The Crop Latents node was copied from @skunkworxdark's implementation here: +# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80 @invocation( - "lcrop", + "crop_latents", title="Crop Latents", tags=["latents", "crop"], category="latents", version="1.0.0", ) -class CropLatentsInvocation(BaseInvocation): +# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`. +# Currently, if the class names conflict then 'GET /openapi.json' fails. +class CropLatentsCoreInvocation(BaseInvocation): """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be divisible by the latent scale factor of 8. """ @@ -1219,9 +1223,7 @@ class CropLatentsInvocation(BaseInvocation): x2 = x1 + (self.width // LATENT_SCALE_FACTOR) y2 = y1 + (self.height // LATENT_SCALE_FACTOR) - cropped_latents = latents[:, :, y1:y2, x1:x2] - - # resized_latents = resized_latents.to("cpu") + cropped_latents = latents[..., y1:y2, x1:x2] name = f"{context.graph_execution_state_id}__{self.id}" context.services.latents.save(name, cropped_latents) From e5a212b5c877691cf04cbb1af2f98f3b8e1f477f Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 12:30:10 -0500 Subject: [PATCH 22/30] Update tiling nodes to use width-before-height field ordering convention. --- invokeai/app/invocations/tiles.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 927e99be64..350141a2f3 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -33,12 +33,12 @@ class CalculateImageTilesOutput(BaseInvocationOutput): class CalculateImageTilesInvocation(BaseInvocation): """Calculate the coordinates and overlaps of tiles that cover a target image shape.""" + image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.") image_height: int = InputField( ge=1, default=1024, description="The image height, in pixels, to calculate tiles for." ) - image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.") - tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.") tile_width: int = InputField(ge=1, default=576, description="The tile width, in pixels.") + tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.") overlap: int = InputField( ge=0, default=128, @@ -117,8 +117,8 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs - image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") + image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") blend_amount: int = InputField( ge=0, From b19ed36b43b9a261b77defc0589f796725a458f5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 13:49:33 -0500 Subject: [PATCH 23/30] Add width and height fields to TileToPropertiesInvocation output to avoid having to calculate them with math nodes. --- invokeai/app/invocations/tiles.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 350141a2f3..934861f008 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -63,6 +63,14 @@ class TileToPropertiesOutput(BaseInvocationOutput): coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.") coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.") + # HACK: The width and height fields are 'meta' fields that can easily be calculated from the other fields on this + # object. Including redundant fields that can cheaply/easily be re-calculated goes against conventional API design + # principles. These fields are included, because 1) they are often useful in tiled workflows, and 2) they are + # difficult to calculate in a workflow (even though it's just a couple of subtraction nodes the graph gets + # surprisingly complicated). + width: int = OutputField(description="The width of the tile. Equal to coords_right - coords_left.") + height: int = OutputField(description="The height of the tile. Equal to coords_bottom - coords_top.") + overlap_top: int = OutputField(description="Overlap between this tile and its top neighbor.") overlap_bottom: int = OutputField(description="Overlap between this tile and its bottom neighbor.") overlap_left: int = OutputField(description="Overlap between this tile and its left neighbor.") @@ -81,6 +89,8 @@ class TileToPropertiesInvocation(BaseInvocation): coords_bottom=self.tile.coords.bottom, coords_left=self.tile.coords.left, coords_right=self.tile.coords.right, + width=self.tile.coords.right - self.tile.coords.left, + height=self.tile.coords.bottom - self.tile.coords.top, overlap_top=self.tile.overlap.top, overlap_bottom=self.tile.overlap.bottom, overlap_left=self.tile.overlap.left, From 32da359ba5e6ca86a398bf896dfa54f528c583aa Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 14:07:38 -0500 Subject: [PATCH 24/30] Infer a tight-fitting output image size from the passed tiles in MergeTilesToImageInvocation. --- invokeai/app/invocations/tiles.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 934861f008..d1b51a43f0 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -127,8 +127,6 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs - image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") - image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") blend_amount: int = InputField( ge=0, @@ -139,6 +137,13 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): images = [twi.image for twi in self.tiles_with_images] tiles = [twi.tile for twi in self.tiles_with_images] + # Infer the output image dimensions from the max/min tile limits. + height = 0 + width = 0 + for tile in tiles: + height = max(height, tile.coords.bottom) + width = max(width, tile.coords.right) + # Get all tile images for processing. # TODO(ryand): It pains me that we spend time PNG decoding each tile from disk when they almost certainly # existed in memory at an earlier point in the graph. @@ -152,7 +157,7 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): # Check the first tile to determine how many image channels are expected in the output. channels = tile_np_images[0].shape[-1] dtype = tile_np_images[0].dtype - np_image = np.zeros(shape=(self.image_height, self.image_width, channels), dtype=dtype) + np_image = np.zeros(shape=(height, width, channels), dtype=dtype) merge_tiles_with_linear_blending( dst_image=np_image, tiles=tiles, tile_images=tile_np_images, blend_amount=self.blend_amount From bfdef120d1ef253448ddc7a2a99a99967332f26a Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 23:34:45 -0500 Subject: [PATCH 25/30] Re-organize merge_tiles_with_linear_blending(...) to merge rows horizontally first and then vertically. This change achieves slightly more natural blending on the corners where 4 tiles overlap. --- invokeai/backend/tiles/tiles.py | 101 +++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py index 3d64e3e145..3a678d825e 100644 --- a/invokeai/backend/tiles/tiles.py +++ b/invokeai/backend/tiles/tiles.py @@ -114,6 +114,24 @@ def merge_tiles_with_linear_blending( tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.left) tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.top) + # Organize tiles into rows. + tile_and_image_rows: list[list[tuple[Tile, np.ndarray]]] = [] + cur_tile_and_image_row: list[tuple[Tile, np.ndarray]] = [] + first_tile_in_cur_row, _ = tiles_and_images[0] + for tile_and_image in tiles_and_images: + tile, _ = tile_and_image + if not ( + tile.coords.top == first_tile_in_cur_row.coords.top + and tile.coords.bottom == first_tile_in_cur_row.coords.bottom + ): + # Store the previous row, and start a new one. + tile_and_image_rows.append(cur_tile_and_image_row) + cur_tile_and_image_row = [] + first_tile_in_cur_row, _ = tile_and_image + + cur_tile_and_image_row.append(tile_and_image) + tile_and_image_rows.append(cur_tile_and_image_row) + # Prepare 1D linear gradients for blending. gradient_left_x = np.linspace(start=0.0, stop=1.0, num=blend_amount) gradient_top_y = np.linspace(start=0.0, stop=1.0, num=blend_amount) @@ -122,33 +140,62 @@ def merge_tiles_with_linear_blending( # broadcasting to work correctly. gradient_top_y = np.expand_dims(gradient_top_y, axis=1) - for tile, tile_image in tiles_and_images: - # We expect tiles to be written left-to-right, top-to-bottom. We construct a mask that applies linear blending - # to the top and to the left of the current tile. The inverse linear blending is automatically applied to the - # bottom/right of the tiles that have already been pasted by the paste(...) operation. - tile_height, tile_width, _ = tile_image.shape - mask = np.ones(shape=(tile_height, tile_width), dtype=np.float64) + for tile_and_image_row in tile_and_image_rows: + first_tile_in_row, _ = tile_and_image_row[0] + row_height = first_tile_in_row.coords.bottom - first_tile_in_row.coords.top + row_image = np.zeros((row_height, dst_image.shape[1], dst_image.shape[2]), dtype=dst_image.dtype) + + # Blend the tiles in the row horizontally. + for tile, tile_image in tile_and_image_row: + # We expect the tiles to be ordered left-to-right. For each tile, we construct a mask that applies linear + # blending to the left of the current tile. The inverse linear blending is automatically applied to the + # right of the tiles that have already been pasted by the paste(...) operation. + tile_height, tile_width, _ = tile_image.shape + mask = np.ones(shape=(tile_height, tile_width), dtype=np.float64) + + # Left blending: + if tile.overlap.left > 0: + assert tile.overlap.left >= blend_amount + # Center the blending gradient in the middle of the overlap. + blend_start_left = tile.overlap.left // 2 - blend_amount // 2 + # The region left of the blending region is masked completely. + mask[:, :blend_start_left] = 0.0 + # Apply the blend gradient to the mask. + mask[:, blend_start_left : blend_start_left + blend_amount] = gradient_left_x + # For visual debugging: + # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 + + paste( + dst_image=row_image, + src_image=tile_image, + box=TBLR( + top=0, bottom=tile.coords.bottom - tile.coords.top, left=tile.coords.left, right=tile.coords.right + ), + mask=mask, + ) + + # Blend the row into the dst_image vertically. + # We construct a mask that applies linear blending to the top of the current row. The inverse linear blending is + # automatically applied to the bottom of the tiles that have already been pasted by the paste(...) operation. + mask = np.ones(shape=(row_image.shape[0], row_image.shape[1]), dtype=np.float64) # Top blending: - if tile.overlap.top > 0: - assert tile.overlap.top >= blend_amount - # Center the blending gradient in the middle of the overlap. - blend_start_top = tile.overlap.top // 2 - blend_amount // 2 - # The region above the blending region is masked completely. + # (See comments under 'Left blending' for an explanation of the logic.) + # We assume that the entire row has the same vertical overlaps as the first_tile_in_row. + if first_tile_in_row.overlap.top > 0: + assert first_tile_in_row.overlap.top >= blend_amount + blend_start_top = first_tile_in_row.overlap.top // 2 - blend_amount // 2 mask[:blend_start_top, :] = 0.0 - # Apply the blend gradient to the mask. Note that we use `*=` rather than `=` to achieve more natural - # behavior on the corners where vertical and horizontal blending gradients overlap. - mask[blend_start_top : blend_start_top + blend_amount, :] *= gradient_top_y + mask[blend_start_top : blend_start_top + blend_amount, :] = gradient_top_y # For visual debugging: - # tile_image[blend_start_top : blend_start_top + blend_amount, :] = 0 - - # Left blending: - # (See comments under 'top blending' for an explanation of the logic.) - if tile.overlap.left > 0: - assert tile.overlap.left >= blend_amount - blend_start_left = tile.overlap.left // 2 - blend_amount // 2 - mask[:, :blend_start_left] = 0.0 - mask[:, blend_start_left : blend_start_left + blend_amount] *= gradient_left_x - # For visual debugging: - # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 - - paste(dst_image=dst_image, src_image=tile_image, box=tile.coords, mask=mask) + # row_image[blend_start_top : blend_start_top + blend_amount, :] = 0 + paste( + dst_image=dst_image, + src_image=row_image, + box=TBLR( + top=first_tile_in_row.coords.top, + bottom=first_tile_in_row.coords.bottom, + left=0, + right=row_image.shape[1], + ), + mask=mask, + ) From 57e70aaf5006316cda784dd5d32ba9180b32c6da Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 29 Nov 2023 10:23:55 -0500 Subject: [PATCH 26/30] Change input field ordering of CropLatentsCoreInvocation to match ImageCropInvocation. --- invokeai/app/invocations/latent.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index e48d7458d4..34ef3421f8 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1194,16 +1194,6 @@ class CropLatentsCoreInvocation(BaseInvocation): description=FieldDescriptions.latents, input=Input.Connection, ) - width: int = InputField( - ge=1, - multiple_of=LATENT_SCALE_FACTOR, - description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) - height: int = InputField( - ge=1, - multiple_of=LATENT_SCALE_FACTOR, - description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) x_offset: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, @@ -1214,6 +1204,16 @@ class CropLatentsCoreInvocation(BaseInvocation): multiple_of=LATENT_SCALE_FACTOR, description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) + width: int = InputField( + ge=1, + multiple_of=LATENT_SCALE_FACTOR, + description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) + height: int = InputField( + ge=1, + multiple_of=LATENT_SCALE_FACTOR, + description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) def invoke(self, context: InvocationContext) -> LatentsOutput: latents = context.services.latents.get(self.latents.latents_name) From 984e609c61525fae4ff2cc205ac19aa38286d542 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 30 Nov 2023 10:44:21 -0500 Subject: [PATCH 27/30] (minor) Tweak field ordering and field names for tiling nodes. --- invokeai/app/invocations/latent.py | 8 ++++---- invokeai/app/invocations/tiles.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 34ef3421f8..218e05a986 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1194,12 +1194,12 @@ class CropLatentsCoreInvocation(BaseInvocation): description=FieldDescriptions.latents, input=Input.Connection, ) - x_offset: int = InputField( + x: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) - y_offset: int = InputField( + y: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", @@ -1218,8 +1218,8 @@ class CropLatentsCoreInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> LatentsOutput: latents = context.services.latents.get(self.latents.latents_name) - x1 = self.x_offset // LATENT_SCALE_FACTOR - y1 = self.y_offset // LATENT_SCALE_FACTOR + x1 = self.x // LATENT_SCALE_FACTOR + y1 = self.y // LATENT_SCALE_FACTOR x2 = x1 + (self.width // LATENT_SCALE_FACTOR) y2 = y1 + (self.height // LATENT_SCALE_FACTOR) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index d1b51a43f0..3055c1baae 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -58,10 +58,10 @@ class CalculateImageTilesInvocation(BaseInvocation): @invocation_output("tile_to_properties_output") class TileToPropertiesOutput(BaseInvocationOutput): - coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.") - coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.") coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.") coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.") + coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.") + coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.") # HACK: The width and height fields are 'meta' fields that can easily be calculated from the other fields on this # object. Including redundant fields that can cheaply/easily be re-calculated goes against conventional API design @@ -85,10 +85,10 @@ class TileToPropertiesInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> TileToPropertiesOutput: return TileToPropertiesOutput( - coords_top=self.tile.coords.top, - coords_bottom=self.tile.coords.bottom, coords_left=self.tile.coords.left, coords_right=self.tile.coords.right, + coords_top=self.tile.coords.top, + coords_bottom=self.tile.coords.bottom, width=self.tile.coords.right - self.tile.coords.left, height=self.tile.coords.bottom - self.tile.coords.top, overlap_top=self.tile.overlap.top, From aadcde3edd12b8b81d22d9b6cc8205bf1e460a44 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Mon, 13 Nov 2023 18:08:17 +1100 Subject: [PATCH 28/30] feat(ui): use IndexedDB for persistence IndexedDB has a much larger storage limit than LocalStorage, and is widely supported. Implemented as a custom storage driver for `redux-remember` via `idb-keyval`. `idb-keyval` is a simple wrapper for IndexedDB that allows it to be used easily as a key-value store. The logic to clear persisted storage has been updated throughout the app. --- invokeai/frontend/web/package.json | 1 + .../frontend/web/src/app/components/App.tsx | 6 ++++-- .../web/src/app/components/InvokeAIUI.tsx | 6 +++--- .../src/app/components/ThemeLocaleProvider.tsx | 2 +- .../frontend/web/src/app/store/constants.ts | 9 +-------- invokeai/frontend/web/src/app/store/store.ts | 18 ++++++++++++++---- .../web/src/common/hooks/useClearStorage.ts | 12 ++++++++++++ .../components/SettingsModal/SettingsModal.tsx | 16 +++++----------- invokeai/frontend/web/yarn.lock | 5 +++++ 9 files changed, 46 insertions(+), 29 deletions(-) create mode 100644 invokeai/frontend/web/src/common/hooks/useClearStorage.ts diff --git a/invokeai/frontend/web/package.json b/invokeai/frontend/web/package.json index 6f160bae46..6a6b79c3b7 100644 --- a/invokeai/frontend/web/package.json +++ b/invokeai/frontend/web/package.json @@ -75,6 +75,7 @@ "framer-motion": "^10.16.4", "i18next": "^23.6.0", "i18next-http-backend": "^2.3.1", + "idb-keyval": "^6.2.1", "konva": "^9.2.3", "lodash-es": "^4.17.21", "nanostores": "^0.9.4", diff --git a/invokeai/frontend/web/src/app/components/App.tsx b/invokeai/frontend/web/src/app/components/App.tsx index 63533aee0d..73bd92ffab 100644 --- a/invokeai/frontend/web/src/app/components/App.tsx +++ b/invokeai/frontend/web/src/app/components/App.tsx @@ -21,6 +21,7 @@ import GlobalHotkeys from './GlobalHotkeys'; import PreselectedImage from './PreselectedImage'; import Toaster from './Toaster'; import { useSocketIO } from 'app/hooks/useSocketIO'; +import { useClearStorage } from 'common/hooks/useClearStorage'; const DEFAULT_CONFIG = {}; @@ -36,15 +37,16 @@ const App = ({ config = DEFAULT_CONFIG, selectedImage }: Props) => { const language = useAppSelector(languageSelector); const logger = useLogger('system'); const dispatch = useAppDispatch(); + const clearStorage = useClearStorage(); // singleton! useSocketIO(); const handleReset = useCallback(() => { - localStorage.clear(); + clearStorage(); location.reload(); return false; - }, []); + }, [clearStorage]); useEffect(() => { i18n.changeLanguage(language); diff --git a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx index 459ac65635..64d0d8d3ab 100644 --- a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx +++ b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx @@ -9,6 +9,9 @@ import { $projectId } from 'app/store/nanostores/projectId'; import { $queueId, DEFAULT_QUEUE_ID } from 'app/store/nanostores/queueId'; import { store } from 'app/store/store'; import { PartialAppConfig } from 'app/types/invokeai'; +import Loading from 'common/components/Loading/Loading'; +import AppDndContext from 'features/dnd/components/AppDndContext'; +import 'i18n'; import React, { PropsWithChildren, ReactNode, @@ -19,9 +22,6 @@ import React, { import { Provider } from 'react-redux'; import { addMiddleware, resetMiddlewares } from 'redux-dynamic-middlewares'; import { ManagerOptions, SocketOptions } from 'socket.io-client'; -import Loading from 'common/components/Loading/Loading'; -import AppDndContext from 'features/dnd/components/AppDndContext'; -import 'i18n'; const App = lazy(() => import('./App')); const ThemeLocaleProvider = lazy(() => import('./ThemeLocaleProvider')); diff --git a/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx b/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx index a9d56a7f16..ba0aaa5823 100644 --- a/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx +++ b/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx @@ -9,9 +9,9 @@ import { TOAST_OPTIONS, theme as invokeAITheme } from 'theme/theme'; import '@fontsource-variable/inter'; import { MantineProvider } from '@mantine/core'; +import { useMantineTheme } from 'mantine-theme/theme'; import 'overlayscrollbars/overlayscrollbars.css'; import 'theme/css/overlayscrollbars.css'; -import { useMantineTheme } from 'mantine-theme/theme'; type ThemeLocaleProviderProps = { children: ReactNode; diff --git a/invokeai/frontend/web/src/app/store/constants.ts b/invokeai/frontend/web/src/app/store/constants.ts index 6d48762bef..c2f3a5e10b 100644 --- a/invokeai/frontend/web/src/app/store/constants.ts +++ b/invokeai/frontend/web/src/app/store/constants.ts @@ -1,8 +1 @@ -export const LOCALSTORAGE_KEYS = [ - 'chakra-ui-color-mode', - 'i18nextLng', - 'ROARR_FILTER', - 'ROARR_LOG', -]; - -export const LOCALSTORAGE_PREFIX = '@@invokeai-'; +export const STORAGE_PREFIX = '@@invokeai-'; diff --git a/invokeai/frontend/web/src/app/store/store.ts b/invokeai/frontend/web/src/app/store/store.ts index d9bc7b085d..a0230c2807 100644 --- a/invokeai/frontend/web/src/app/store/store.ts +++ b/invokeai/frontend/web/src/app/store/store.ts @@ -23,9 +23,9 @@ import systemReducer from 'features/system/store/systemSlice'; import hotkeysReducer from 'features/ui/store/hotkeysSlice'; import uiReducer from 'features/ui/store/uiSlice'; import dynamicMiddlewares from 'redux-dynamic-middlewares'; -import { rememberEnhancer, rememberReducer } from 'redux-remember'; +import { Driver, rememberEnhancer, rememberReducer } from 'redux-remember'; import { api } from 'services/api'; -import { LOCALSTORAGE_PREFIX } from './constants'; +import { STORAGE_PREFIX } from './constants'; import { serialize } from './enhancers/reduxRemember/serialize'; import { unserialize } from './enhancers/reduxRemember/unserialize'; import { actionSanitizer } from './middleware/devtools/actionSanitizer'; @@ -33,6 +33,7 @@ import { actionsDenylist } from './middleware/devtools/actionsDenylist'; import { stateSanitizer } from './middleware/devtools/stateSanitizer'; import { listenerMiddleware } from './middleware/listenerMiddleware'; import { $store } from './nanostores/store'; +import { createStore as createIDBKeyValStore, get, set } from 'idb-keyval'; const allReducers = { canvas: canvasReducer, @@ -74,16 +75,25 @@ const rememberedKeys: (keyof typeof allReducers)[] = [ 'modelmanager', ]; +// Create a custom idb-keyval store (just needed to customize the name) +export const idbKeyValStore = createIDBKeyValStore('invoke', 'invoke-store'); + +// Create redux-remember driver, wrapping idb-keyval +const idbKeyValDriver: Driver = { + getItem: (key) => get(key, idbKeyValStore), + setItem: (key, value) => set(key, value, idbKeyValStore), +}; + export const store = configureStore({ reducer: rememberedRootReducer, enhancers: (existingEnhancers) => { return existingEnhancers .concat( - rememberEnhancer(window.localStorage, rememberedKeys, { + rememberEnhancer(idbKeyValDriver, rememberedKeys, { persistDebounce: 300, serialize, unserialize, - prefix: LOCALSTORAGE_PREFIX, + prefix: STORAGE_PREFIX, }) ) .concat(autoBatchEnhancer()); diff --git a/invokeai/frontend/web/src/common/hooks/useClearStorage.ts b/invokeai/frontend/web/src/common/hooks/useClearStorage.ts new file mode 100644 index 0000000000..0ab4936d72 --- /dev/null +++ b/invokeai/frontend/web/src/common/hooks/useClearStorage.ts @@ -0,0 +1,12 @@ +import { idbKeyValStore } from 'app/store/store'; +import { clear } from 'idb-keyval'; +import { useCallback } from 'react'; + +export const useClearStorage = () => { + const clearStorage = useCallback(() => { + clear(idbKeyValStore); + localStorage.clear(); + }, []); + + return clearStorage; +}; diff --git a/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx b/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx index e1eeb19df3..7841a94d3f 100644 --- a/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx +++ b/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx @@ -14,11 +14,11 @@ import { } from '@chakra-ui/react'; import { createSelector } from '@reduxjs/toolkit'; import { VALID_LOG_LEVELS } from 'app/logging/logger'; -import { LOCALSTORAGE_KEYS, LOCALSTORAGE_PREFIX } from 'app/store/constants'; import { stateSelector } from 'app/store/store'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; import IAIButton from 'common/components/IAIButton'; import IAIMantineSelect from 'common/components/IAIMantineSelect'; +import { useClearStorage } from 'common/hooks/useClearStorage'; import { consoleLogLevelChanged, setEnableImageDebugging, @@ -164,20 +164,14 @@ const SettingsModal = ({ children, config }: SettingsModalProps) => { shouldEnableInformationalPopovers, } = useAppSelector(selector); + const clearStorage = useClearStorage(); + const handleClickResetWebUI = useCallback(() => { - // Only remove our keys - Object.keys(window.localStorage).forEach((key) => { - if ( - LOCALSTORAGE_KEYS.includes(key) || - key.startsWith(LOCALSTORAGE_PREFIX) - ) { - localStorage.removeItem(key); - } - }); + clearStorage(); onSettingsModalClose(); onRefreshModalOpen(); setInterval(() => setCountdown((prev) => prev - 1), 1000); - }, [onSettingsModalClose, onRefreshModalOpen]); + }, [clearStorage, onSettingsModalClose, onRefreshModalOpen]); useEffect(() => { if (countdown <= 0) { diff --git a/invokeai/frontend/web/yarn.lock b/invokeai/frontend/web/yarn.lock index e0a9db1c5e..6c661af24b 100644 --- a/invokeai/frontend/web/yarn.lock +++ b/invokeai/frontend/web/yarn.lock @@ -4158,6 +4158,11 @@ i18next@^23.6.0: dependencies: "@babel/runtime" "^7.22.5" +idb-keyval@^6.2.1: + version "6.2.1" + resolved "https://registry.yarnpkg.com/idb-keyval/-/idb-keyval-6.2.1.tgz#94516d625346d16f56f3b33855da11bfded2db33" + integrity sha512-8Sb3veuYCyrZL+VBt9LJfZjLUPWVvqn8tG28VqYNFCo43KHcKuq+b4EiXGeuaLAQWL2YmyDgMp2aSpH9JHsEQg== + ieee754@^1.1.13: version "1.2.1" resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352" From e6fe2540b81cf5be4d403d6adc53d0c75b3922d1 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Thu, 30 Nov 2023 11:47:27 -0500 Subject: [PATCH 29/30] dynamically create indexedDB store using unique store key if available --- .../web/src/app/components/InvokeAIUI.tsx | 12 ++- invokeai/frontend/web/src/app/store/store.ts | 100 +++++++++--------- 2 files changed, 61 insertions(+), 51 deletions(-) diff --git a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx index 64d0d8d3ab..b190a36f06 100644 --- a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx +++ b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx @@ -7,7 +7,8 @@ import { $headerComponent } from 'app/store/nanostores/headerComponent'; import { $isDebugging } from 'app/store/nanostores/isDebugging'; import { $projectId } from 'app/store/nanostores/projectId'; import { $queueId, DEFAULT_QUEUE_ID } from 'app/store/nanostores/queueId'; -import { store } from 'app/store/store'; +import { $store } from 'app/store/nanostores/store'; +import { createStore } from 'app/store/store'; import { PartialAppConfig } from 'app/types/invokeai'; import Loading from 'common/components/Loading/Loading'; import AppDndContext from 'features/dnd/components/AppDndContext'; @@ -18,6 +19,7 @@ import React, { lazy, memo, useEffect, + useMemo, } from 'react'; import { Provider } from 'react-redux'; import { addMiddleware, resetMiddlewares } from 'redux-dynamic-middlewares'; @@ -137,6 +139,14 @@ const InvokeAIUI = ({ }; }, [isDebugging]); + const store = useMemo(() => { + return createStore(projectId); + }, [projectId]); + + useEffect(() => { + $store.set(store); + }, [store]); + return ( diff --git a/invokeai/frontend/web/src/app/store/store.ts b/invokeai/frontend/web/src/app/store/store.ts index a0230c2807..87edba56e0 100644 --- a/invokeai/frontend/web/src/app/store/store.ts +++ b/invokeai/frontend/web/src/app/store/store.ts @@ -32,7 +32,6 @@ import { actionSanitizer } from './middleware/devtools/actionSanitizer'; import { actionsDenylist } from './middleware/devtools/actionsDenylist'; import { stateSanitizer } from './middleware/devtools/stateSanitizer'; import { listenerMiddleware } from './middleware/listenerMiddleware'; -import { $store } from './nanostores/store'; import { createStore as createIDBKeyValStore, get, set } from 'idb-keyval'; const allReducers = { @@ -84,57 +83,58 @@ const idbKeyValDriver: Driver = { setItem: (key, value) => set(key, value, idbKeyValStore), }; -export const store = configureStore({ - reducer: rememberedRootReducer, - enhancers: (existingEnhancers) => { - return existingEnhancers - .concat( - rememberEnhancer(idbKeyValDriver, rememberedKeys, { - persistDebounce: 300, - serialize, - unserialize, - prefix: STORAGE_PREFIX, - }) - ) - .concat(autoBatchEnhancer()); - }, - middleware: (getDefaultMiddleware) => - getDefaultMiddleware({ - serializableCheck: false, - immutableCheck: false, - }) - .concat(api.middleware) - .concat(dynamicMiddlewares) - .prepend(listenerMiddleware.middleware), - devTools: { - actionSanitizer, - stateSanitizer, - trace: true, - predicate: (state, action) => { - // TODO: hook up to the log level param in system slice - // manually type state, cannot type the arg - // const typedState = state as ReturnType; - - // TODO: doing this breaks the rtk query devtools, commenting out for now - // if (action.type.startsWith('api/')) { - // // don't log api actions, with manual cache updates they are extremely noisy - // return false; - // } - - if (actionsDenylist.includes(action.type)) { - // don't log other noisy actions - return false; - } - - return true; +export const createStore = (uniqueStoreKey?: string) => + configureStore({ + reducer: rememberedRootReducer, + enhancers: (existingEnhancers) => { + return existingEnhancers + .concat( + rememberEnhancer(idbKeyValDriver, rememberedKeys, { + persistDebounce: 300, + serialize, + unserialize, + prefix: uniqueStoreKey + ? `${STORAGE_PREFIX}-${uniqueStoreKey}-` + : STORAGE_PREFIX, + }) + ) + .concat(autoBatchEnhancer()); }, - }, -}); + middleware: (getDefaultMiddleware) => + getDefaultMiddleware({ + serializableCheck: false, + immutableCheck: false, + }) + .concat(api.middleware) + .concat(dynamicMiddlewares) + .prepend(listenerMiddleware.middleware), + devTools: { + actionSanitizer, + stateSanitizer, + trace: true, + predicate: (state, action) => { + // TODO: hook up to the log level param in system slice + // manually type state, cannot type the arg + // const typedState = state as ReturnType; -export type AppGetState = typeof store.getState; -export type RootState = ReturnType; + // TODO: doing this breaks the rtk query devtools, commenting out for now + // if (action.type.startsWith('api/')) { + // // don't log api actions, with manual cache updates they are extremely noisy + // return false; + // } + + if (actionsDenylist.includes(action.type)) { + // don't log other noisy actions + return false; + } + + return true; + }, + }, + }); + +export type RootState = ReturnType; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type AppThunkDispatch = ThunkDispatch; -export type AppDispatch = typeof store.dispatch; +export type AppDispatch = ReturnType['dispatch']; export const stateSelector = (state: RootState) => state; -$store.set(store); From a8ef4e5be847cb16528b3f1d5759dd086d1d00e3 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:02:02 +1100 Subject: [PATCH 30/30] fix(ui): fix types and storage prefix --- invokeai/frontend/web/src/app/store/store.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/invokeai/frontend/web/src/app/store/store.ts b/invokeai/frontend/web/src/app/store/store.ts index 87edba56e0..0e3634468b 100644 --- a/invokeai/frontend/web/src/app/store/store.ts +++ b/invokeai/frontend/web/src/app/store/store.ts @@ -94,7 +94,7 @@ export const createStore = (uniqueStoreKey?: string) => serialize, unserialize, prefix: uniqueStoreKey - ? `${STORAGE_PREFIX}-${uniqueStoreKey}-` + ? `${STORAGE_PREFIX}${uniqueStoreKey}-` : STORAGE_PREFIX, }) ) @@ -133,8 +133,11 @@ export const createStore = (uniqueStoreKey?: string) => }, }); -export type RootState = ReturnType; +export type AppGetState = ReturnType< + ReturnType['getState'] +>; +export type RootState = ReturnType['getState']>; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type AppThunkDispatch = ThunkDispatch; -export type AppDispatch = ReturnType['dispatch']; +export type AppDispatch = ReturnType['dispatch']; export const stateSelector = (state: RootState) => state;