From 693c6cf5e4eea760ca8b7bbece794edbcc8c41fc Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 29 Nov 2023 15:10:45 -0500 Subject: [PATCH 01/45] Add support for IPAdapterFull models. The changes are based on this upstream PR: https://github.com/tencent-ailab/IP-Adapter/pull/139 . --- invokeai/backend/ip_adapter/ip_adapter.py | 57 ++++++++++++++++++--- tests/backend/ip_adapter/test_ip_adapter.py | 8 +++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py index 826112156d..9176bf1f49 100644 --- a/invokeai/backend/ip_adapter/ip_adapter.py +++ b/invokeai/backend/ip_adapter/ip_adapter.py @@ -54,6 +54,44 @@ class ImageProjModel(torch.nn.Module): return clip_extra_context_tokens +class MLPProjModel(torch.nn.Module): + """SD model with image prompt""" + + def __init__(self, cross_attention_dim=1024, clip_embeddings_dim=1024): + super().__init__() + + self.proj = torch.nn.Sequential( + torch.nn.Linear(clip_embeddings_dim, clip_embeddings_dim), + torch.nn.GELU(), + torch.nn.Linear(clip_embeddings_dim, cross_attention_dim), + torch.nn.LayerNorm(cross_attention_dim), + ) + + @classmethod + def from_state_dict(cls, state_dict: dict[torch.Tensor]): + """Initialize an MLPProjModel from a state_dict. + + The cross_attention_dim and clip_embeddings_dim are inferred from the shape of the tensors in the state_dict. + + Args: + state_dict (dict[torch.Tensor]): The state_dict of model weights. + + Returns: + MLPProjModel + """ + cross_attention_dim = state_dict["proj.3.weight"].shape[0] + clip_embeddings_dim = state_dict["proj.0.weight"].shape[0] + + model = cls(cross_attention_dim, clip_embeddings_dim) + + model.load_state_dict(state_dict) + return model + + def forward(self, image_embeds): + clip_extra_context_tokens = self.proj(image_embeds) + return clip_extra_context_tokens + + class IPAdapter: """IP-Adapter: https://arxiv.org/pdf/2308.06721.pdf""" @@ -130,6 +168,13 @@ class IPAdapterPlus(IPAdapter): return image_prompt_embeds, uncond_image_prompt_embeds +class IPAdapterFull(IPAdapterPlus): + """IP-Adapter Plus with full features.""" + + def _init_image_proj_model(self, state_dict: dict[torch.Tensor]): + return MLPProjModel.from_state_dict(state_dict).to(self.device, dtype=self.dtype) + + class IPAdapterPlusXL(IPAdapterPlus): """IP-Adapter Plus for SDXL.""" @@ -149,11 +194,9 @@ def build_ip_adapter( ) -> Union[IPAdapter, IPAdapterPlus]: state_dict = torch.load(ip_adapter_ckpt_path, map_location="cpu") - # Determine if the state_dict is from an IPAdapter or IPAdapterPlus based on the image_proj weights that it - # contains. - is_plus = "proj.weight" not in state_dict["image_proj"] - - if is_plus: + if "proj.weight" in state_dict["image_proj"]: # IPAdapter (with ImageProjModel). + return IPAdapter(state_dict, device=device, dtype=dtype) + elif "proj_in.weight" in state_dict["image_proj"]: # IPAdaterPlus or IPAdapterPlusXL (with Resampler). cross_attention_dim = state_dict["ip_adapter"]["1.to_k_ip.weight"].shape[-1] if cross_attention_dim == 768: # SD1 IP-Adapter Plus @@ -163,5 +206,7 @@ def build_ip_adapter( return IPAdapterPlusXL(state_dict, device=device, dtype=dtype) else: raise Exception(f"Unsupported IP-Adapter Plus cross-attention dimension: {cross_attention_dim}.") + elif "proj.0.weight" in state_dict["image_proj"]: # IPAdapterFull (with MLPProjModel). + return IPAdapterFull(state_dict, device=device, dtype=dtype) else: - return IPAdapter(state_dict, device=device, dtype=dtype) + raise ValueError(f"'{ip_adapter_ckpt_path}' has an unrecognized IP-Adapter model architecture.") diff --git a/tests/backend/ip_adapter/test_ip_adapter.py b/tests/backend/ip_adapter/test_ip_adapter.py index 6712196778..6a3ec510a2 100644 --- a/tests/backend/ip_adapter/test_ip_adapter.py +++ b/tests/backend/ip_adapter/test_ip_adapter.py @@ -37,6 +37,14 @@ def build_dummy_sd15_unet_input(torch_device): "unet_model_id": "runwayml/stable-diffusion-v1-5", "unet_model_name": "stable-diffusion-v1-5", }, + # SD1.5, IPAdapterFull + { + "ip_adapter_model_id": "InvokeAI/ip-adapter-full-face_sd15", + "ip_adapter_model_name": "ip-adapter-full-face_sd15", + "base_model": BaseModelType.StableDiffusion1, + "unet_model_id": "runwayml/stable-diffusion-v1-5", + "unet_model_name": "stable-diffusion-v1-5", + }, ], ) @pytest.mark.slow From 0beb08686c64acb451dbda0dc4b6a257433fbac8 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Thu, 30 Nov 2023 10:55:20 +0100 Subject: [PATCH 02/45] Add CFG Rescale option for supporting zero-terminal SNR models (#4335) * add support for CFG rescale * fix typo * move rescale position and tweak docs * move input position * implement suggestions from github and discord * cleanup unused code * add back dropped FieldDescription * fix(ui): revert unrelated UI changes * chore(nodes): bump denoise_latents version 1.4.0 -> 1.5.0 * feat(nodes): add cfg_rescale_multiplier to metadata node * feat(ui): add cfg rescale multiplier to linear UI - add param to state - update graph builders - add UI under advanced - add metadata handling & recall - regen types * chore: black * fix(backend): make `StableDiffusionGeneratorPipeline._rescale_cfg()` staticmethod This doesn't need access to class. * feat(backend): add docstring for `_rescale_cfg()` method * feat(ui): update cfg rescale mult translation string --------- Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com> --- invokeai/app/invocations/latent.py | 6 +- invokeai/app/invocations/metadata.py | 3 + invokeai/app/shared/fields.py | 1 + .../stable_diffusion/diffusers_pipeline.py | 23 +++++-- .../diffusion/conditioning_data.py | 6 +- invokeai/frontend/web/public/locales/en.json | 9 +++ .../IAIInformationalPopover/constants.ts | 1 + .../ImageMetadataActions.tsx | 13 ++++ .../web/src/features/nodes/types/metadata.ts | 1 + .../graph/buildCanvasImageToImageGraph.ts | 2 + .../graph/buildCanvasSDXLImageToImageGraph.ts | 2 + .../graph/buildCanvasSDXLTextToImageGraph.ts | 2 + .../util/graph/buildCanvasTextToImageGraph.ts | 2 + .../graph/buildLinearImageToImageGraph.ts | 2 + .../graph/buildLinearSDXLImageToImageGraph.ts | 2 + .../graph/buildLinearSDXLTextToImageGraph.ts | 2 + .../util/graph/buildLinearTextToImageGraph.ts | 3 + .../Advanced/ParamAdvancedCollapse.tsx | 46 ++++++++++++-- .../Advanced/ParamCFGRescaleMultiplier.tsx | 60 +++++++++++++++++++ .../parameters/hooks/useRecallParameters.ts | 23 +++++++ .../parameters/store/generationSlice.ts | 12 +++- .../parameters/types/parameterSchemas.ts | 11 ++++ .../frontend/web/src/services/api/schema.d.ts | 51 +++++++++------- 23 files changed, 249 insertions(+), 34 deletions(-) create mode 100644 invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index d438bcae02..ab59b41865 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -215,7 +215,7 @@ def get_scheduler( title="Denoise Latents", tags=["latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"], category="latents", - version="1.4.0", + version="1.5.0", ) class DenoiseLatentsInvocation(BaseInvocation): """Denoises noisy latents to decodable images""" @@ -273,6 +273,9 @@ class DenoiseLatentsInvocation(BaseInvocation): input=Input.Connection, ui_order=7, ) + cfg_rescale_multiplier: float = InputField( + default=0, ge=0, lt=1, description=FieldDescriptions.cfg_rescale_multiplier + ) latents: Optional[LatentsField] = InputField( default=None, description=FieldDescriptions.latents, @@ -332,6 +335,7 @@ class DenoiseLatentsInvocation(BaseInvocation): unconditioned_embeddings=uc, text_embeddings=c, guidance_scale=self.cfg_scale, + guidance_rescale_multiplier=self.cfg_rescale_multiplier, extra=extra_conditioning_info, postprocessing_settings=PostprocessingSettings( threshold=0.0, # threshold, diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index d837e6297f..14d66f8ef6 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -127,6 +127,9 @@ class CoreMetadataInvocation(BaseInvocation): seed: Optional[int] = InputField(default=None, description="The seed used for noise generation") rand_device: Optional[str] = InputField(default=None, description="The device used for random number generation") cfg_scale: Optional[float] = InputField(default=None, description="The classifier-free guidance scale parameter") + cfg_rescale_multiplier: Optional[float] = InputField( + default=None, description=FieldDescriptions.cfg_rescale_multiplier + ) steps: Optional[int] = InputField(default=None, description="The number of steps used for inference") scheduler: Optional[str] = InputField(default=None, description="The scheduler used for inference") seamless_x: Optional[bool] = InputField(default=None, description="Whether seamless tiling was used on the X axis") diff --git a/invokeai/app/shared/fields.py b/invokeai/app/shared/fields.py index dd9cbb7b82..3e841ffbf2 100644 --- a/invokeai/app/shared/fields.py +++ b/invokeai/app/shared/fields.py @@ -2,6 +2,7 @@ class FieldDescriptions: denoising_start = "When to start denoising, expressed a percentage of total steps" denoising_end = "When to stop denoising, expressed a percentage of total steps" cfg_scale = "Classifier-Free Guidance scale" + cfg_rescale_multiplier = "Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR" scheduler = "Scheduler to use during inference" positive_cond = "Positive conditioning tensor" negative_cond = "Negative conditioning tensor" diff --git a/invokeai/backend/stable_diffusion/diffusers_pipeline.py b/invokeai/backend/stable_diffusion/diffusers_pipeline.py index 1353e804a7..ae0cc17203 100644 --- a/invokeai/backend/stable_diffusion/diffusers_pipeline.py +++ b/invokeai/backend/stable_diffusion/diffusers_pipeline.py @@ -607,11 +607,14 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): if isinstance(guidance_scale, list): guidance_scale = guidance_scale[step_index] - noise_pred = self.invokeai_diffuser._combine( - uc_noise_pred, - c_noise_pred, - guidance_scale, - ) + noise_pred = self.invokeai_diffuser._combine(uc_noise_pred, c_noise_pred, guidance_scale) + guidance_rescale_multiplier = conditioning_data.guidance_rescale_multiplier + if guidance_rescale_multiplier > 0: + noise_pred = self._rescale_cfg( + noise_pred, + c_noise_pred, + guidance_rescale_multiplier, + ) # compute the previous noisy sample x_t -> x_t-1 step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args) @@ -634,6 +637,16 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): return step_output + @staticmethod + def _rescale_cfg(total_noise_pred, pos_noise_pred, multiplier=0.7): + """Implementation of Algorithm 2 from https://arxiv.org/pdf/2305.08891.pdf.""" + ro_pos = torch.std(pos_noise_pred, dim=(1, 2, 3), keepdim=True) + ro_cfg = torch.std(total_noise_pred, dim=(1, 2, 3), keepdim=True) + + x_rescaled = total_noise_pred * (ro_pos / ro_cfg) + x_final = multiplier * x_rescaled + (1.0 - multiplier) * total_noise_pred + return x_final + def _unet_forward( self, latents, diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py index 6a63c225fc..3e38f9f78d 100644 --- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py @@ -67,13 +67,17 @@ class IPAdapterConditioningInfo: class ConditioningData: unconditioned_embeddings: BasicConditioningInfo text_embeddings: BasicConditioningInfo - guidance_scale: Union[float, List[float]] """ Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality. """ + guidance_scale: Union[float, List[float]] + """ for models trained using zero-terminal SNR ("ztsnr"), it's suggested to use guidance_rescale_multiplier of 0.7 . + ref [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf) + """ + guidance_rescale_multiplier: float = 0 extra: Optional[ExtraConditioningInfo] = None scheduler_args: dict[str, Any] = field(default_factory=dict) """ diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 8b5afce4a7..52bf4ff8f9 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -599,6 +599,7 @@ }, "metadata": { "cfgScale": "CFG scale", + "cfgRescaleMultiplier": "$t(parameters.cfgRescaleMultiplier)", "createdBy": "Created By", "fit": "Image to image fit", "generationMode": "Generation Mode", @@ -1032,6 +1033,8 @@ "setType": "Set cancel type" }, "cfgScale": "CFG Scale", + "cfgRescaleMultiplier": "CFG Rescale Multiplier", + "cfgRescale": "CFG Rescale", "clipSkip": "CLIP Skip", "clipSkipWithLayerCount": "CLIP Skip {{layerCount}}", "closeViewer": "Close Viewer", @@ -1470,6 +1473,12 @@ "Controls how much your prompt influences the generation process." ] }, + "paramCFGRescaleMultiplier": { + "heading": "CFG Rescale Multiplier", + "paragraphs": [ + "Rescale multiplier for CFG guidance, used for models trained using zero-terminal SNR (ztsnr). Suggested value 0.7." + ] + }, "paramDenoisingStrength": { "heading": "Denoising Strength", "paragraphs": [ diff --git a/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts b/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts index 197f5f4068..8960399b48 100644 --- a/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts +++ b/invokeai/frontend/web/src/common/components/IAIInformationalPopover/constants.ts @@ -25,6 +25,7 @@ export type Feature = | 'lora' | 'noiseUseCPU' | 'paramCFGScale' + | 'paramCFGRescaleMultiplier' | 'paramDenoisingStrength' | 'paramIterations' | 'paramModel' diff --git a/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx b/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx index 8c2c053846..890b5f7330 100644 --- a/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx +++ b/invokeai/frontend/web/src/features/gallery/components/ImageMetadataViewer/ImageMetadataActions.tsx @@ -29,6 +29,7 @@ const ImageMetadataActions = (props: Props) => { recallNegativePrompt, recallSeed, recallCfgScale, + recallCfgRescaleMultiplier, recallModel, recallScheduler, recallVaeModel, @@ -85,6 +86,10 @@ const ImageMetadataActions = (props: Props) => { recallCfgScale(metadata?.cfg_scale); }, [metadata?.cfg_scale, recallCfgScale]); + const handleRecallCfgRescaleMultiplier = useCallback(() => { + recallCfgRescaleMultiplier(metadata?.cfg_rescale_multiplier); + }, [metadata?.cfg_rescale_multiplier, recallCfgRescaleMultiplier]); + const handleRecallStrength = useCallback(() => { recallStrength(metadata?.strength); }, [metadata?.strength, recallStrength]); @@ -243,6 +248,14 @@ const ImageMetadataActions = (props: Props) => { onClick={handleRecallCfgScale} /> )} + {metadata.cfg_rescale_multiplier !== undefined && + metadata.cfg_rescale_multiplier !== null && ( + + )} {metadata.strength && ( { - const { clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise } = - state.generation; + const { + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + cfgRescaleMultiplier, + } = state.generation; - return { clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise }; + return { + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + cfgRescaleMultiplier, + }; }, defaultSelectorOptions ); export default function ParamAdvancedCollapse() { - const { clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise } = - useAppSelector(selector); + const { + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + cfgRescaleMultiplier, + } = useAppSelector(selector); const { t } = useTranslation(); const activeLabel = useMemo(() => { const activeLabel: string[] = []; @@ -46,8 +66,20 @@ export default function ParamAdvancedCollapse() { activeLabel.push(t('parameters.seamlessY')); } + if (cfgRescaleMultiplier) { + activeLabel.push(t('parameters.cfgRescale')); + } + return activeLabel.join(', '); - }, [clipSkip, model, seamlessXAxis, seamlessYAxis, shouldUseCpuNoise, t]); + }, [ + cfgRescaleMultiplier, + clipSkip, + model, + seamlessXAxis, + seamlessYAxis, + shouldUseCpuNoise, + t, + ]); return ( @@ -61,6 +93,8 @@ export default function ParamAdvancedCollapse() { )} + + ); diff --git a/invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx b/invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx new file mode 100644 index 0000000000..2a65b32028 --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Parameters/Advanced/ParamCFGRescaleMultiplier.tsx @@ -0,0 +1,60 @@ +import { createSelector } from '@reduxjs/toolkit'; +import { stateSelector } from 'app/store/store'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; +import IAIInformationalPopover from 'common/components/IAIInformationalPopover/IAIInformationalPopover'; +import IAISlider from 'common/components/IAISlider'; +import { setCfgRescaleMultiplier } from 'features/parameters/store/generationSlice'; +import { memo, useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; + +const selector = createSelector( + [stateSelector], + ({ generation, hotkeys }) => { + const { cfgRescaleMultiplier } = generation; + const { shift } = hotkeys; + + return { + cfgRescaleMultiplier, + shift, + }; + }, + defaultSelectorOptions +); + +const ParamCFGRescaleMultiplier = () => { + const { cfgRescaleMultiplier, shift } = useAppSelector(selector); + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + + const handleChange = useCallback( + (v: number) => dispatch(setCfgRescaleMultiplier(v)), + [dispatch] + ); + + const handleReset = useCallback( + () => dispatch(setCfgRescaleMultiplier(0)), + [dispatch] + ); + + return ( + + + + ); +}; + +export default memo(ParamCFGRescaleMultiplier); diff --git a/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts b/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts index c1b7dbabd6..3217ab7be7 100644 --- a/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts +++ b/invokeai/frontend/web/src/features/parameters/hooks/useRecallParameters.ts @@ -57,6 +57,7 @@ import { modelSelected, } from 'features/parameters/store/actions'; import { + setCfgRescaleMultiplier, setCfgScale, setHeight, setHrfEnabled, @@ -94,6 +95,7 @@ import { isParameterStrength, isParameterVAEModel, isParameterWidth, + isParameterCFGRescaleMultiplier, } from 'features/parameters/types/parameterSchemas'; const selector = createSelector( @@ -282,6 +284,21 @@ export const useRecallParameters = () => { [dispatch, parameterSetToast, parameterNotSetToast] ); + /** + * Recall CFG rescale multiplier with toast + */ + const recallCfgRescaleMultiplier = useCallback( + (cfgRescaleMultiplier: unknown) => { + if (!isParameterCFGRescaleMultiplier(cfgRescaleMultiplier)) { + parameterNotSetToast(); + return; + } + dispatch(setCfgRescaleMultiplier(cfgRescaleMultiplier)); + parameterSetToast(); + }, + [dispatch, parameterSetToast, parameterNotSetToast] + ); + /** * Recall model with toast */ @@ -799,6 +816,7 @@ export const useRecallParameters = () => { const { cfg_scale, + cfg_rescale_multiplier, height, model, positive_prompt, @@ -831,6 +849,10 @@ export const useRecallParameters = () => { dispatch(setCfgScale(cfg_scale)); } + if (isParameterCFGRescaleMultiplier(cfg_rescale_multiplier)) { + dispatch(setCfgRescaleMultiplier(cfg_rescale_multiplier)); + } + if (isParameterModel(model)) { dispatch(modelSelected(model)); } @@ -985,6 +1007,7 @@ export const useRecallParameters = () => { recallSDXLNegativeStylePrompt, recallSeed, recallCfgScale, + recallCfgRescaleMultiplier, recallModel, recallScheduler, recallVaeModel, diff --git a/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts b/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts index 8b7b8cb487..49835601d2 100644 --- a/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts +++ b/invokeai/frontend/web/src/features/parameters/store/generationSlice.ts @@ -24,6 +24,7 @@ import { ParameterVAEModel, ParameterWidth, zParameterModel, + ParameterCFGRescaleMultiplier, } from 'features/parameters/types/parameterSchemas'; export interface GenerationState { @@ -31,6 +32,7 @@ export interface GenerationState { hrfStrength: ParameterStrength; hrfMethod: ParameterHRFMethod; cfgScale: ParameterCFGScale; + cfgRescaleMultiplier: ParameterCFGRescaleMultiplier; height: ParameterHeight; img2imgStrength: ParameterStrength; infillMethod: string; @@ -76,6 +78,7 @@ export const initialGenerationState: GenerationState = { hrfEnabled: false, hrfMethod: 'ESRGAN', cfgScale: 7.5, + cfgRescaleMultiplier: 0, height: 512, img2imgStrength: 0.75, infillMethod: 'patchmatch', @@ -145,9 +148,15 @@ export const generationSlice = createSlice({ state.steps ); }, - setCfgScale: (state, action: PayloadAction) => { + setCfgScale: (state, action: PayloadAction) => { state.cfgScale = action.payload; }, + setCfgRescaleMultiplier: ( + state, + action: PayloadAction + ) => { + state.cfgRescaleMultiplier = action.payload; + }, setThreshold: (state, action: PayloadAction) => { state.threshold = action.payload; }, @@ -336,6 +345,7 @@ export const { resetParametersState, resetSeed, setCfgScale, + setCfgRescaleMultiplier, setWidth, setHeight, toggleSize, diff --git a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts index 99f58f721c..73e7d7d2c3 100644 --- a/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts +++ b/invokeai/frontend/web/src/features/parameters/types/parameterSchemas.ts @@ -77,6 +77,17 @@ export const isParameterCFGScale = (val: unknown): val is ParameterCFGScale => zParameterCFGScale.safeParse(val).success; // #endregion +// #region CFG Rescale Multiplier +export const zParameterCFGRescaleMultiplier = z.number().gte(0).lt(1); +export type ParameterCFGRescaleMultiplier = z.infer< + typeof zParameterCFGRescaleMultiplier +>; +export const isParameterCFGRescaleMultiplier = ( + val: unknown +): val is ParameterCFGRescaleMultiplier => + zParameterCFGRescaleMultiplier.safeParse(val).success; +// #endregion + // #region Scheduler export const zParameterScheduler = zSchedulerField; export type ParameterScheduler = z.infer; diff --git a/invokeai/frontend/web/src/services/api/schema.d.ts b/invokeai/frontend/web/src/services/api/schema.d.ts index b4f9db1370..8204e50650 100644 --- a/invokeai/frontend/web/src/services/api/schema.d.ts +++ b/invokeai/frontend/web/src/services/api/schema.d.ts @@ -2067,6 +2067,11 @@ export type components = { * @description The classifier-free guidance scale parameter */ cfg_scale?: number | null; + /** + * Cfg Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + */ + cfg_rescale_multiplier?: number | null; /** * Steps * @description The number of steps used for inference @@ -2392,6 +2397,12 @@ export type components = { * @description T2I-Adapter(s) to apply */ t2i_adapter?: components["schemas"]["T2IAdapterField"] | components["schemas"]["T2IAdapterField"][] | null; + /** + * Cfg Rescale Multiplier + * @description Rescale multiplier for CFG guidance, used for models trained with zero-terminal SNR + * @default 0 + */ + cfg_rescale_multiplier?: number; /** @description Latents tensor */ latents?: components["schemas"]["LatentsField"] | null; /** @description The mask to use for the operation */ @@ -3220,7 +3231,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["RangeInvocation"]; + [key: string]: components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["BooleanCollectionInvocation"]; }; /** * Edges @@ -3257,7 +3268,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["String2Output"] | components["schemas"]["VAEOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["SchedulerOutput"]; + [key: string]: components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["ConditioningOutput"]; }; /** * Errors @@ -9692,23 +9703,11 @@ export type components = { */ UIType: "SDXLMainModelField" | "SDXLRefinerModelField" | "ONNXModelField" | "VAEModelField" | "LoRAModelField" | "ControlNetModelField" | "IPAdapterModelField" | "SchedulerField" | "AnyField" | "CollectionField" | "CollectionItemField" | "DEPRECATED_Boolean" | "DEPRECATED_Color" | "DEPRECATED_Conditioning" | "DEPRECATED_Control" | "DEPRECATED_Float" | "DEPRECATED_Image" | "DEPRECATED_Integer" | "DEPRECATED_Latents" | "DEPRECATED_String" | "DEPRECATED_BooleanCollection" | "DEPRECATED_ColorCollection" | "DEPRECATED_ConditioningCollection" | "DEPRECATED_ControlCollection" | "DEPRECATED_FloatCollection" | "DEPRECATED_ImageCollection" | "DEPRECATED_IntegerCollection" | "DEPRECATED_LatentsCollection" | "DEPRECATED_StringCollection" | "DEPRECATED_BooleanPolymorphic" | "DEPRECATED_ColorPolymorphic" | "DEPRECATED_ConditioningPolymorphic" | "DEPRECATED_ControlPolymorphic" | "DEPRECATED_FloatPolymorphic" | "DEPRECATED_ImagePolymorphic" | "DEPRECATED_IntegerPolymorphic" | "DEPRECATED_LatentsPolymorphic" | "DEPRECATED_StringPolymorphic" | "DEPRECATED_MainModel" | "DEPRECATED_UNet" | "DEPRECATED_Vae" | "DEPRECATED_CLIP" | "DEPRECATED_Collection" | "DEPRECATED_CollectionItem" | "DEPRECATED_Enum" | "DEPRECATED_WorkflowField" | "DEPRECATED_IsIntermediate" | "DEPRECATED_BoardField" | "DEPRECATED_MetadataItem" | "DEPRECATED_MetadataItemCollection" | "DEPRECATED_MetadataItemPolymorphic" | "DEPRECATED_MetadataDict"; /** - * StableDiffusionXLModelFormat + * ControlNetModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; - /** - * StableDiffusion1ModelFormat - * @description An enumeration. - * @enum {string} - */ - StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; - /** - * CLIPVisionModelFormat - * @description An enumeration. - * @enum {string} - */ - CLIPVisionModelFormat: "diffusers"; + ControlNetModelFormat: "checkpoint" | "diffusers"; /** * T2IAdapterModelFormat * @description An enumeration. @@ -9716,11 +9715,23 @@ export type components = { */ T2IAdapterModelFormat: "diffusers"; /** - * ControlNetModelFormat + * StableDiffusionXLModelFormat * @description An enumeration. * @enum {string} */ - ControlNetModelFormat: "checkpoint" | "diffusers"; + StableDiffusionXLModelFormat: "checkpoint" | "diffusers"; + /** + * StableDiffusion2ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; + /** + * StableDiffusion1ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; /** * IPAdapterModelFormat * @description An enumeration. @@ -9734,11 +9745,11 @@ export type components = { */ StableDiffusionOnnxModelFormat: "olive" | "onnx"; /** - * StableDiffusion2ModelFormat + * CLIPVisionModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; + CLIPVisionModelFormat: "diffusers"; }; responses: never; parameters: never; From 3e01c396e1f0b4ebeb427ebceafc8c379801ac88 Mon Sep 17 00:00:00 2001 From: ymgenesis Date: Thu, 30 Nov 2023 11:15:59 +0100 Subject: [PATCH 03/45] CenterPadCrop node (#3861) * add centerpadcrop node - Allows users to add padding to or crop images from the center - Also outputs a white mask with the dimensions of the output image for use with outpainting * add CenterPadCrop to NODES.md Updates NODES.md with CenterPadCrop entry. * remove mask & output class - Remove "ImageMaskOutput" where both image and mask are output - Remove ability to output mask from node --------- Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com> --- docs/nodes/defaultNodes.md | 204 +++++++++--------- invokeai/app/invocations/image.py | 55 +++++ .../frontend/web/src/services/api/schema.d.ts | 87 ++++++-- 3 files changed, 229 insertions(+), 117 deletions(-) diff --git a/docs/nodes/defaultNodes.md b/docs/nodes/defaultNodes.md index ace51163ef..1f490dfe81 100644 --- a/docs/nodes/defaultNodes.md +++ b/docs/nodes/defaultNodes.md @@ -1,104 +1,106 @@ # List of Default Nodes -The table below contains a list of the default nodes shipped with InvokeAI and their descriptions. +The table below contains a list of the default nodes shipped with InvokeAI and +their descriptions. -| Node | Function | -|: ---------------------------------- | :--------------------------------------------------------------------------------------| -|Add Integers | Adds two numbers| -|Boolean Primitive Collection | A collection of boolean primitive values| -|Boolean Primitive | A boolean primitive value| -|Canny Processor | Canny edge detection for ControlNet| -|CLIP Skip | Skip layers in clip text_encoder model.| -|Collect | Collects values into a collection| -|Color Correct | Shifts the colors of a target image to match the reference image, optionally using a mask to only color-correct certain regions of the target image.| -|Color Primitive | A color primitive value| -|Compel Prompt | Parse prompt using compel package to conditioning.| -|Conditioning Primitive Collection | A collection of conditioning tensor primitive values| -|Conditioning Primitive | A conditioning tensor primitive value| -|Content Shuffle Processor | Applies content shuffle processing to image| -|ControlNet | Collects ControlNet info to pass to other nodes| -|Denoise Latents | Denoises noisy latents to decodable images| -|Divide Integers | Divides two numbers| -|Dynamic Prompt | Parses a prompt using adieyal/dynamicprompts' random or combinatorial generator| -|[FaceMask](./detailedNodes/faceTools.md#facemask) | Generates masks for faces in an image to use with Inpainting| -|[FaceIdentifier](./detailedNodes/faceTools.md#faceidentifier) | Identifies and labels faces in an image| -|[FaceOff](./detailedNodes/faceTools.md#faceoff) | Creates a new image that is a scaled bounding box with a mask on the face for Inpainting| -|Float Math | Perform basic math operations on two floats| -|Float Primitive Collection | A collection of float primitive values| -|Float Primitive | A float primitive value| -|Float Range | Creates a range| -|HED (softedge) Processor | Applies HED edge detection to image| -|Blur Image | Blurs an image| -|Extract Image Channel | Gets a channel from an image.| -|Image Primitive Collection | A collection of image primitive values| -|Integer Math | Perform basic math operations on two integers| -|Convert Image Mode | Converts an image to a different mode.| -|Crop Image | Crops an image to a specified box. The box can be outside of the image.| -|Image Hue Adjustment | Adjusts the Hue of an image.| -|Inverse Lerp Image | Inverse linear interpolation of all pixels of an image| -|Image Primitive | An image primitive value| -|Lerp Image | Linear interpolation of all pixels of an image| -|Offset Image Channel | Add to or subtract from an image color channel by a uniform value.| -|Multiply Image Channel | Multiply or Invert an image color channel by a scalar value.| -|Multiply Images | Multiplies two images together using `PIL.ImageChops.multiply()`.| -|Blur NSFW Image | Add blur to NSFW-flagged images| -|Paste Image | Pastes an image into another image.| -|ImageProcessor | Base class for invocations that preprocess images for ControlNet| -|Resize Image | Resizes an image to specific dimensions| -|Round Float | Rounds a float to a specified number of decimal places| -|Float to Integer | Converts a float to an integer. Optionally rounds to an even multiple of a input number.| -|Scale Image | Scales an image by a factor| -|Image to Latents | Encodes an image into latents.| -|Add Invisible Watermark | Add an invisible watermark to an image| -|Solid Color Infill | Infills transparent areas of an image with a solid color| -|PatchMatch Infill | Infills transparent areas of an image using the PatchMatch algorithm| -|Tile Infill | Infills transparent areas of an image with tiles of the image| -|Integer Primitive Collection | A collection of integer primitive values| -|Integer Primitive | An integer primitive value| -|Iterate | Iterates over a list of items| -|Latents Primitive Collection | A collection of latents tensor primitive values| -|Latents Primitive | A latents tensor primitive value| -|Latents to Image | Generates an image from latents.| -|Leres (Depth) Processor | Applies leres processing to image| -|Lineart Anime Processor | Applies line art anime processing to image| -|Lineart Processor | Applies line art processing to image| -|LoRA Loader | Apply selected lora to unet and text_encoder.| -|Main Model Loader | Loads a main model, outputting its submodels.| -|Combine Mask | Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`.| -|Mask Edge | Applies an edge mask to an image| -|Mask from Alpha | Extracts the alpha channel of an image as a mask.| -|Mediapipe Face Processor | Applies mediapipe face processing to image| -|Midas (Depth) Processor | Applies Midas depth processing to image| -|MLSD Processor | Applies MLSD processing to image| -|Multiply Integers | Multiplies two numbers| -|Noise | Generates latent noise.| -|Normal BAE Processor | Applies NormalBae processing to image| -|ONNX Latents to Image | Generates an image from latents.| -|ONNX Prompt (Raw) | A node to process inputs and produce outputs. May use dependency injection in __init__ to receive providers.| -|ONNX Text to Latents | Generates latents from conditionings.| -|ONNX Model Loader | Loads a main model, outputting its submodels.| -|OpenCV Inpaint | Simple inpaint using opencv.| -|Openpose Processor | Applies Openpose processing to image| -|PIDI Processor | Applies PIDI processing to image| -|Prompts from File | Loads prompts from a text file| -|Random Integer | Outputs a single random integer.| -|Random Range | Creates a collection of random numbers| -|Integer Range | Creates a range of numbers from start to stop with step| -|Integer Range of Size | Creates a range from start to start + size with step| -|Resize Latents | Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8.| -|SDXL Compel Prompt | Parse prompt using compel package to conditioning.| -|SDXL LoRA Loader | Apply selected lora to unet and text_encoder.| -|SDXL Main Model Loader | Loads an sdxl base model, outputting its submodels.| -|SDXL Refiner Compel Prompt | Parse prompt using compel package to conditioning.| -|SDXL Refiner Model Loader | Loads an sdxl refiner model, outputting its submodels.| -|Scale Latents | Scales latents by a given factor.| -|Segment Anything Processor | Applies segment anything processing to image| -|Show Image | Displays a provided image, and passes it forward in the pipeline.| -|Step Param Easing | Experimental per-step parameter easing for denoising steps| -|String Primitive Collection | A collection of string primitive values| -|String Primitive | A string primitive value| -|Subtract Integers | Subtracts two numbers| -|Tile Resample Processor | Tile resampler processor| -|Upscale (RealESRGAN) | Upscales an image using RealESRGAN.| -|VAE Loader | Loads a VAE model, outputting a VaeLoaderOutput| -|Zoe (Depth) Processor | Applies Zoe depth processing to image| \ No newline at end of file +| Node | Function | +| :------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------------------------------------------------- | +| Add Integers | Adds two numbers | +| Boolean Primitive Collection | A collection of boolean primitive values | +| Boolean Primitive | A boolean primitive value | +| Canny Processor | Canny edge detection for ControlNet | +| CenterPadCrop | Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image. | +| CLIP Skip | Skip layers in clip text_encoder model. | +| Collect | Collects values into a collection | +| Color Correct | Shifts the colors of a target image to match the reference image, optionally using a mask to only color-correct certain regions of the target image. | +| Color Primitive | A color primitive value | +| Compel Prompt | Parse prompt using compel package to conditioning. | +| Conditioning Primitive Collection | A collection of conditioning tensor primitive values | +| Conditioning Primitive | A conditioning tensor primitive value | +| Content Shuffle Processor | Applies content shuffle processing to image | +| ControlNet | Collects ControlNet info to pass to other nodes | +| Denoise Latents | Denoises noisy latents to decodable images | +| Divide Integers | Divides two numbers | +| Dynamic Prompt | Parses a prompt using adieyal/dynamicprompts' random or combinatorial generator | +| [FaceMask](./detailedNodes/faceTools.md#facemask) | Generates masks for faces in an image to use with Inpainting | +| [FaceIdentifier](./detailedNodes/faceTools.md#faceidentifier) | Identifies and labels faces in an image | +| [FaceOff](./detailedNodes/faceTools.md#faceoff) | Creates a new image that is a scaled bounding box with a mask on the face for Inpainting | +| Float Math | Perform basic math operations on two floats | +| Float Primitive Collection | A collection of float primitive values | +| Float Primitive | A float primitive value | +| Float Range | Creates a range | +| HED (softedge) Processor | Applies HED edge detection to image | +| Blur Image | Blurs an image | +| Extract Image Channel | Gets a channel from an image. | +| Image Primitive Collection | A collection of image primitive values | +| Integer Math | Perform basic math operations on two integers | +| Convert Image Mode | Converts an image to a different mode. | +| Crop Image | Crops an image to a specified box. The box can be outside of the image. | +| Image Hue Adjustment | Adjusts the Hue of an image. | +| Inverse Lerp Image | Inverse linear interpolation of all pixels of an image | +| Image Primitive | An image primitive value | +| Lerp Image | Linear interpolation of all pixels of an image | +| Offset Image Channel | Add to or subtract from an image color channel by a uniform value. | +| Multiply Image Channel | Multiply or Invert an image color channel by a scalar value. | +| Multiply Images | Multiplies two images together using `PIL.ImageChops.multiply()`. | +| Blur NSFW Image | Add blur to NSFW-flagged images | +| Paste Image | Pastes an image into another image. | +| ImageProcessor | Base class for invocations that preprocess images for ControlNet | +| Resize Image | Resizes an image to specific dimensions | +| Round Float | Rounds a float to a specified number of decimal places | +| Float to Integer | Converts a float to an integer. Optionally rounds to an even multiple of a input number. | +| Scale Image | Scales an image by a factor | +| Image to Latents | Encodes an image into latents. | +| Add Invisible Watermark | Add an invisible watermark to an image | +| Solid Color Infill | Infills transparent areas of an image with a solid color | +| PatchMatch Infill | Infills transparent areas of an image using the PatchMatch algorithm | +| Tile Infill | Infills transparent areas of an image with tiles of the image | +| Integer Primitive Collection | A collection of integer primitive values | +| Integer Primitive | An integer primitive value | +| Iterate | Iterates over a list of items | +| Latents Primitive Collection | A collection of latents tensor primitive values | +| Latents Primitive | A latents tensor primitive value | +| Latents to Image | Generates an image from latents. | +| Leres (Depth) Processor | Applies leres processing to image | +| Lineart Anime Processor | Applies line art anime processing to image | +| Lineart Processor | Applies line art processing to image | +| LoRA Loader | Apply selected lora to unet and text_encoder. | +| Main Model Loader | Loads a main model, outputting its submodels. | +| Combine Mask | Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`. | +| Mask Edge | Applies an edge mask to an image | +| Mask from Alpha | Extracts the alpha channel of an image as a mask. | +| Mediapipe Face Processor | Applies mediapipe face processing to image | +| Midas (Depth) Processor | Applies Midas depth processing to image | +| MLSD Processor | Applies MLSD processing to image | +| Multiply Integers | Multiplies two numbers | +| Noise | Generates latent noise. | +| Normal BAE Processor | Applies NormalBae processing to image | +| ONNX Latents to Image | Generates an image from latents. | +| ONNX Prompt (Raw) | A node to process inputs and produce outputs. May use dependency injection in **init** to receive providers. | +| ONNX Text to Latents | Generates latents from conditionings. | +| ONNX Model Loader | Loads a main model, outputting its submodels. | +| OpenCV Inpaint | Simple inpaint using opencv. | +| Openpose Processor | Applies Openpose processing to image | +| PIDI Processor | Applies PIDI processing to image | +| Prompts from File | Loads prompts from a text file | +| Random Integer | Outputs a single random integer. | +| Random Range | Creates a collection of random numbers | +| Integer Range | Creates a range of numbers from start to stop with step | +| Integer Range of Size | Creates a range from start to start + size with step | +| Resize Latents | Resizes latents to explicit width/height (in pixels). Provided dimensions are floor-divided by 8. | +| SDXL Compel Prompt | Parse prompt using compel package to conditioning. | +| SDXL LoRA Loader | Apply selected lora to unet and text_encoder. | +| SDXL Main Model Loader | Loads an sdxl base model, outputting its submodels. | +| SDXL Refiner Compel Prompt | Parse prompt using compel package to conditioning. | +| SDXL Refiner Model Loader | Loads an sdxl refiner model, outputting its submodels. | +| Scale Latents | Scales latents by a given factor. | +| Segment Anything Processor | Applies segment anything processing to image | +| Show Image | Displays a provided image, and passes it forward in the pipeline. | +| Step Param Easing | Experimental per-step parameter easing for denoising steps | +| String Primitive Collection | A collection of string primitive values | +| String Primitive | A string primitive value | +| Subtract Integers | Subtracts two numbers | +| Tile Resample Processor | Tile resampler processor | +| Upscale (RealESRGAN) | Upscales an image using RealESRGAN. | +| VAE Loader | Loads a VAE model, outputting a VaeLoaderOutput | +| Zoe (Depth) Processor | Applies Zoe depth processing to image | diff --git a/invokeai/app/invocations/image.py b/invokeai/app/invocations/image.py index 87e8392402..ad3b3aec71 100644 --- a/invokeai/app/invocations/image.py +++ b/invokeai/app/invocations/image.py @@ -100,6 +100,61 @@ class ImageCropInvocation(BaseInvocation, WithWorkflow, WithMetadata): ) +@invocation( + invocation_type="img_pad_crop", + title="Center Pad or Crop Image", + category="image", + tags=["image", "pad", "crop"], + version="1.0.0", +) +class CenterPadCropInvocation(BaseInvocation): + """Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image.""" + + image: ImageField = InputField(description="The image to crop") + left: int = InputField( + default=0, + description="Number of pixels to pad/crop from the left (negative values crop inwards, positive values pad outwards)", + ) + right: int = InputField( + default=0, + description="Number of pixels to pad/crop from the right (negative values crop inwards, positive values pad outwards)", + ) + top: int = InputField( + default=0, + description="Number of pixels to pad/crop from the top (negative values crop inwards, positive values pad outwards)", + ) + bottom: int = InputField( + default=0, + description="Number of pixels to pad/crop from the bottom (negative values crop inwards, positive values pad outwards)", + ) + + def invoke(self, context: InvocationContext) -> ImageOutput: + image = context.services.images.get_pil_image(self.image.image_name) + + # Calculate and create new image dimensions + new_width = image.width + self.right + self.left + new_height = image.height + self.top + self.bottom + image_crop = Image.new(mode="RGBA", size=(new_width, new_height), color=(0, 0, 0, 0)) + + # Paste new image onto input + image_crop.paste(image, (self.left, self.top)) + + image_dto = context.services.images.create( + image=image_crop, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.GENERAL, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + ) + + return ImageOutput( + image=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) + + @invocation("img_paste", title="Paste Image", tags=["image", "paste"], category="image", version="1.1.0") class ImagePasteInvocation(BaseInvocation, WithWorkflow, WithMetadata): """Pastes an image into another image.""" diff --git a/invokeai/frontend/web/src/services/api/schema.d.ts b/invokeai/frontend/web/src/services/api/schema.d.ts index 8204e50650..b6b6a0e8be 100644 --- a/invokeai/frontend/web/src/services/api/schema.d.ts +++ b/invokeai/frontend/web/src/services/api/schema.d.ts @@ -1196,6 +1196,61 @@ export type components = { */ type: "canny_image_processor"; }; + /** + * Center Pad or Crop Image + * @description Pad or crop an image's sides from the center by specified pixels. Positive values are outside of the image. + */ + CenterPadCropInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** @description The image to crop */ + image?: components["schemas"]["ImageField"]; + /** + * Left + * @description Number of pixels to pad/crop from the left (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + left?: number; + /** + * Right + * @description Number of pixels to pad/crop from the right (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + right?: number; + /** + * Top + * @description Number of pixels to pad/crop from the top (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + top?: number; + /** + * Bottom + * @description Number of pixels to pad/crop from the bottom (negative values crop inwards, positive values pad outwards) + * @default 0 + */ + bottom?: number; + /** + * type + * @default img_pad_crop + * @constant + */ + type: "img_pad_crop"; + }; /** * ClearResult * @description Result of clearing the session queue @@ -3231,7 +3286,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["LineartImageProcessorInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["BooleanCollectionInvocation"]; + [key: string]: components["schemas"]["BlankImageInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["AddInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["PidiImageProcessorInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["LoraLoaderInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["OpenposeImageProcessorInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["LeresImageProcessorInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["MediapipeFaceProcessorInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ClipSkipInvocation"] | components["schemas"]["LineartAnimeImageProcessorInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["CannyImageProcessorInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["VaeLoaderInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["HedImageProcessorInvocation"] | components["schemas"]["MidasDepthImageProcessorInvocation"] | components["schemas"]["LinearUIOutputInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["TileResamplerProcessorInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["FloatInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["SegmentAnythingProcessorInvocation"] | components["schemas"]["ColorMapImageProcessorInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["ONNXLatentsToImageInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["StringInvocation"] | components["schemas"]["ONNXTextToLatentsInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["NormalbaeImageProcessorInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["OnnxModelLoaderInvocation"] | components["schemas"]["ContentShuffleImageProcessorInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ZoeDepthImageProcessorInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ONNXPromptInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["GraphInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["MlsdImageProcessorInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["StepParamEasingInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["SDXLLoraLoaderInvocation"] | components["schemas"]["LineartImageProcessorInvocation"]; }; /** * Edges @@ -3268,7 +3323,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["ConditioningOutput"]; + [key: string]: components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["SDXLLoraLoaderOutput"] | components["schemas"]["ONNXModelLoaderOutput"] | components["schemas"]["ClipSkipInvocationOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["LoraLoaderOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["String2Output"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["GraphInvocationOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["T2IAdapterOutput"]; }; /** * Errors @@ -9703,17 +9758,23 @@ export type components = { */ UIType: "SDXLMainModelField" | "SDXLRefinerModelField" | "ONNXModelField" | "VAEModelField" | "LoRAModelField" | "ControlNetModelField" | "IPAdapterModelField" | "SchedulerField" | "AnyField" | "CollectionField" | "CollectionItemField" | "DEPRECATED_Boolean" | "DEPRECATED_Color" | "DEPRECATED_Conditioning" | "DEPRECATED_Control" | "DEPRECATED_Float" | "DEPRECATED_Image" | "DEPRECATED_Integer" | "DEPRECATED_Latents" | "DEPRECATED_String" | "DEPRECATED_BooleanCollection" | "DEPRECATED_ColorCollection" | "DEPRECATED_ConditioningCollection" | "DEPRECATED_ControlCollection" | "DEPRECATED_FloatCollection" | "DEPRECATED_ImageCollection" | "DEPRECATED_IntegerCollection" | "DEPRECATED_LatentsCollection" | "DEPRECATED_StringCollection" | "DEPRECATED_BooleanPolymorphic" | "DEPRECATED_ColorPolymorphic" | "DEPRECATED_ConditioningPolymorphic" | "DEPRECATED_ControlPolymorphic" | "DEPRECATED_FloatPolymorphic" | "DEPRECATED_ImagePolymorphic" | "DEPRECATED_IntegerPolymorphic" | "DEPRECATED_LatentsPolymorphic" | "DEPRECATED_StringPolymorphic" | "DEPRECATED_MainModel" | "DEPRECATED_UNet" | "DEPRECATED_Vae" | "DEPRECATED_CLIP" | "DEPRECATED_Collection" | "DEPRECATED_CollectionItem" | "DEPRECATED_Enum" | "DEPRECATED_WorkflowField" | "DEPRECATED_IsIntermediate" | "DEPRECATED_BoardField" | "DEPRECATED_MetadataItem" | "DEPRECATED_MetadataItemCollection" | "DEPRECATED_MetadataItemPolymorphic" | "DEPRECATED_MetadataDict"; /** - * ControlNetModelFormat + * StableDiffusionOnnxModelFormat * @description An enumeration. * @enum {string} */ - ControlNetModelFormat: "checkpoint" | "diffusers"; + StableDiffusionOnnxModelFormat: "olive" | "onnx"; /** - * T2IAdapterModelFormat + * CLIPVisionModelFormat * @description An enumeration. * @enum {string} */ - T2IAdapterModelFormat: "diffusers"; + CLIPVisionModelFormat: "diffusers"; + /** + * StableDiffusion1ModelFormat + * @description An enumeration. + * @enum {string} + */ + StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; /** * StableDiffusionXLModelFormat * @description An enumeration. @@ -9727,11 +9788,11 @@ export type components = { */ StableDiffusion2ModelFormat: "checkpoint" | "diffusers"; /** - * StableDiffusion1ModelFormat + * T2IAdapterModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusion1ModelFormat: "checkpoint" | "diffusers"; + T2IAdapterModelFormat: "diffusers"; /** * IPAdapterModelFormat * @description An enumeration. @@ -9739,17 +9800,11 @@ export type components = { */ IPAdapterModelFormat: "invokeai"; /** - * StableDiffusionOnnxModelFormat + * ControlNetModelFormat * @description An enumeration. * @enum {string} */ - StableDiffusionOnnxModelFormat: "olive" | "onnx"; - /** - * CLIPVisionModelFormat - * @description An enumeration. - * @enum {string} - */ - CLIPVisionModelFormat: "diffusers"; + ControlNetModelFormat: "checkpoint" | "diffusers"; }; responses: never; parameters: never; From a6d4e4ed5774ec752666291c95035cc6c6b7a5eb Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 30 Nov 2023 22:01:33 +1100 Subject: [PATCH 04/45] fix(ui): fix enum parsing for optional enums Closes #5121 - Parse `anyOf` for enums (present when they are optional) - Consolidate `FieldTypeParseError` and `UnsupportedFieldTypeError` into `FieldParseError` (there was no difference in handling and it simplifies things a bit) --- invokeai/frontend/web/public/locales/en.json | 1 + .../web/src/features/nodes/types/error.ts | 19 ++--------- .../util/schema/buildFieldInputTemplate.ts | 30 +++++++++++++++-- .../nodes/util/schema/parseFieldType.ts | 33 ++++++++----------- .../features/nodes/util/schema/parseSchema.ts | 21 ++++-------- 5 files changed, 51 insertions(+), 53 deletions(-) diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 52bf4ff8f9..8f93f32d90 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -978,6 +978,7 @@ "unsupportedAnyOfLength": "too many union members ({{count}})", "unsupportedMismatchedUnion": "mismatched CollectionOrScalar type with base types {{firstType}} and {{secondType}}", "unableToParseFieldType": "unable to parse field type", + "unableToExtractEnumOptions": "unable to extract enum options", "uNetField": "UNet", "uNetFieldDescription": "UNet submodel.", "unhandledInputProperty": "Unhandled input property", diff --git a/invokeai/frontend/web/src/features/nodes/types/error.ts b/invokeai/frontend/web/src/features/nodes/types/error.ts index e520b7710d..905b487fb0 100644 --- a/invokeai/frontend/web/src/features/nodes/types/error.ts +++ b/invokeai/frontend/web/src/features/nodes/types/error.ts @@ -43,10 +43,10 @@ export class NodeUpdateError extends Error { } /** - * FieldTypeParseError + * FieldParseError * Raised when a field cannot be parsed from a field schema. */ -export class FieldTypeParseError extends Error { +export class FieldParseError extends Error { /** * Create FieldTypeParseError * @param {String} message @@ -56,18 +56,3 @@ export class FieldTypeParseError extends Error { this.name = this.constructor.name; } } - -/** - * UnsupportedFieldTypeError - * Raised when an unsupported field type is parsed. - */ -export class UnsupportedFieldTypeError extends Error { - /** - * Create UnsupportedFieldTypeError - * @param {String} message - */ - constructor(message: string) { - super(message); - this.name = this.constructor.name; - } -} diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts index 94095bbc08..001e1c9cf4 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/buildFieldInputTemplate.ts @@ -23,7 +23,12 @@ import { VAEModelFieldInputTemplate, isStatefulFieldType, } from 'features/nodes/types/field'; -import { InvocationFieldSchema } from 'features/nodes/types/openapi'; +import { + InvocationFieldSchema, + isSchemaObject, +} from 'features/nodes/types/openapi'; +import { t } from 'i18next'; +import { FieldParseError } from 'features/nodes/types/error'; // eslint-disable-next-line @typescript-eslint/no-explicit-any type FieldInputTemplateBuilder = // valid `any`! @@ -321,7 +326,28 @@ const buildImageFieldInputTemplate: FieldInputTemplateBuilder< const buildEnumFieldInputTemplate: FieldInputTemplateBuilder< EnumFieldInputTemplate > = ({ schemaObject, baseField, isCollection, isCollectionOrScalar }) => { - const options = schemaObject.enum ?? []; + let options: EnumFieldInputTemplate['options'] = []; + if (schemaObject.anyOf) { + const filteredAnyOf = schemaObject.anyOf.filter((i) => { + if (isSchemaObject(i)) { + if (i.type === 'null') { + return false; + } + } + return true; + }); + const firstAnyOf = filteredAnyOf[0]; + if (filteredAnyOf.length !== 1 || !isSchemaObject(firstAnyOf)) { + options = []; + } else { + options = firstAnyOf.enum ?? []; + } + } else { + options = schemaObject.enum ?? []; + } + if (options.length === 0) { + throw new FieldParseError(t('nodes.unableToExtractEnumOptions')); + } const template: EnumFieldInputTemplate = { ...baseField, type: { diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts b/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts index 3b6fadd8a1..4ee4edce1b 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/parseFieldType.ts @@ -1,10 +1,4 @@ -import { t } from 'i18next'; -import { isArray } from 'lodash-es'; -import { OpenAPIV3_1 } from 'openapi-types'; -import { - FieldTypeParseError, - UnsupportedFieldTypeError, -} from 'features/nodes/types/error'; +import { FieldParseError } from 'features/nodes/types/error'; import { FieldType } from 'features/nodes/types/field'; import { OpenAPIV3_1SchemaOrRef, @@ -14,6 +8,9 @@ import { isRefObject, isSchemaObject, } from 'features/nodes/types/openapi'; +import { t } from 'i18next'; +import { isArray } from 'lodash-es'; +import { OpenAPIV3_1 } from 'openapi-types'; /** * Transforms an invocation output ref object to field type. @@ -70,7 +67,7 @@ export const parseFieldType = ( // This is a single ref type const name = refObjectToSchemaName(allOf[0]); if (!name) { - throw new FieldTypeParseError( + throw new FieldParseError( t('nodes.unableToExtractSchemaNameFromRef') ); } @@ -95,7 +92,7 @@ export const parseFieldType = ( if (isRefObject(filteredAnyOf[0])) { const name = refObjectToSchemaName(filteredAnyOf[0]); if (!name) { - throw new FieldTypeParseError( + throw new FieldParseError( t('nodes.unableToExtractSchemaNameFromRef') ); } @@ -120,7 +117,7 @@ export const parseFieldType = ( if (filteredAnyOf.length !== 2) { // This is a union of more than 2 types, which we don't support - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedAnyOfLength', { count: filteredAnyOf.length, }) @@ -167,7 +164,7 @@ export const parseFieldType = ( }; } - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedMismatchedUnion', { firstType, secondType, @@ -186,7 +183,7 @@ export const parseFieldType = ( if (isSchemaObject(schemaObject.items)) { const itemType = schemaObject.items.type; if (!itemType || isArray(itemType)) { - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedArrayItemType', { type: itemType, }) @@ -196,7 +193,7 @@ export const parseFieldType = ( const name = OPENAPI_TO_FIELD_TYPE_MAP[itemType]; if (!name) { // it's 'null', 'object', or 'array' - skip - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedArrayItemType', { type: itemType, }) @@ -212,7 +209,7 @@ export const parseFieldType = ( // This is a ref object, extract the type name const name = refObjectToSchemaName(schemaObject.items); if (!name) { - throw new FieldTypeParseError( + throw new FieldParseError( t('nodes.unableToExtractSchemaNameFromRef') ); } @@ -226,7 +223,7 @@ export const parseFieldType = ( const name = OPENAPI_TO_FIELD_TYPE_MAP[schemaObject.type]; if (!name) { // it's 'null', 'object', or 'array' - skip - throw new UnsupportedFieldTypeError( + throw new FieldParseError( t('nodes.unsupportedArrayItemType', { type: schemaObject.type, }) @@ -242,9 +239,7 @@ export const parseFieldType = ( } else if (isRefObject(schemaObject)) { const name = refObjectToSchemaName(schemaObject); if (!name) { - throw new FieldTypeParseError( - t('nodes.unableToExtractSchemaNameFromRef') - ); + throw new FieldParseError(t('nodes.unableToExtractSchemaNameFromRef')); } return { name, @@ -252,5 +247,5 @@ export const parseFieldType = ( isCollectionOrScalar: false, }; } - throw new FieldTypeParseError(t('nodes.unableToParseFieldType')); + throw new FieldParseError(t('nodes.unableToParseFieldType')); }; diff --git a/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts b/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts index b425b11663..c5a7cd9f3d 100644 --- a/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts +++ b/invokeai/frontend/web/src/features/nodes/util/schema/parseSchema.ts @@ -1,12 +1,6 @@ import { logger } from 'app/logging/logger'; import { parseify } from 'common/util/serialize'; -import { t } from 'i18next'; -import { reduce } from 'lodash-es'; -import { OpenAPIV3_1 } from 'openapi-types'; -import { - FieldTypeParseError, - UnsupportedFieldTypeError, -} from 'features/nodes/types/error'; +import { FieldParseError } from 'features/nodes/types/error'; import { FieldInputTemplate, FieldOutputTemplate, @@ -18,6 +12,9 @@ import { isInvocationOutputSchemaObject, isInvocationSchemaObject, } from 'features/nodes/types/openapi'; +import { t } from 'i18next'; +import { reduce } from 'lodash-es'; +import { OpenAPIV3_1 } from 'openapi-types'; import { buildFieldInputTemplate } from './buildFieldInputTemplate'; import { buildFieldOutputTemplate } from './buildFieldOutputTemplate'; import { parseFieldType } from './parseFieldType'; @@ -133,10 +130,7 @@ export const parseSchema = ( inputsAccumulator[propertyName] = fieldInputTemplate; } catch (e) { - if ( - e instanceof FieldTypeParseError || - e instanceof UnsupportedFieldTypeError - ) { + if (e instanceof FieldParseError) { logger('nodes').warn( { node: type, @@ -225,10 +219,7 @@ export const parseSchema = ( outputsAccumulator[propertyName] = fieldOutputTemplate; } catch (e) { - if ( - e instanceof FieldTypeParseError || - e instanceof UnsupportedFieldTypeError - ) { + if (e instanceof FieldParseError) { logger('nodes').warn( { node: type, From 2f81f9fb223b2a3c7c9aa581b41f82b4ad93fe1e Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 30 Nov 2023 21:06:50 +1100 Subject: [PATCH 05/45] fix(ui): add missing star image translation key --- invokeai/frontend/web/public/locales/en.json | 3 ++- .../components/ImageContextMenu/SingleSelectionMenuItems.tsx | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 8f93f32d90..8663815adb 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -243,7 +243,6 @@ "setControlImageDimensions": "Set Control Image Dimensions To W/H", "showAdvanced": "Show Advanced", "toggleControlNet": "Toggle this ControlNet", - "unstarImage": "Unstar Image", "w": "W", "weight": "Weight", "enableIPAdapter": "Enable IP Adapter", @@ -378,6 +377,8 @@ "showGenerations": "Show Generations", "showUploads": "Show Uploads", "singleColumnLayout": "Single Column Layout", + "starImage": "Star Image", + "unstarImage": "Unstar Image", "unableToLoad": "Unable to load Gallery", "uploads": "Uploads", "deleteSelection": "Delete Selection", diff --git a/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx b/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx index fdb0809364..1b4e642e64 100644 --- a/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx +++ b/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/SingleSelectionMenuItems.tsx @@ -234,14 +234,14 @@ const SingleSelectionMenuItems = (props: SingleSelectionMenuItemsProps) => { icon={customStarUi ? customStarUi.off.icon : } onClickCapture={handleUnstarImage} > - {customStarUi ? customStarUi.off.text : t('controlnet.unstarImage')} + {customStarUi ? customStarUi.off.text : t('gallery.unstarImage')} ) : ( } onClickCapture={handleStarImage} > - {customStarUi ? customStarUi.on.text : `Star Image`} + {customStarUi ? customStarUi.on.text : t('gallery.starImage')} )} Date: Fri, 1 Dec 2023 00:06:56 +1100 Subject: [PATCH 06/45] fix(ui): fix missing images not handled - Reset init image, control adapter images, and node image fields when their selected image fails to load - Only do this if the app is connected via socket (this indicates that the image is "really" gone, and there isn't just a transient network issue) It's possible for image parameters/nodes/states to have reference a deleted image. For example, a resize image node might have an image set on it, and the workflow saved. The workflow contains a hard reference to that image. The image is deleted and the workflow loaded again later. The deleted image is still in that workflow, but the app doesn't detect that. The result is that the workflow/graph appears to be valid, but will fail on invoke. This creates a really confusing user experience, where when somebody shares a workflow with an image baked into it, and another person opens it, everything *looks* ok, but the workflow fails with a mysterious error about a missing image. The problem affects node images, control adapter images and the img2img init image. Resetting the image when it fails to load *and* socket is connected resolves this in a simple way. The problem also affects canvas images, but we have handle that by displaying an error fallback image, so no change is made there. --- .../components/ControlAdapterImagePreview.tsx | 42 ++++++++++++------- .../inputs/ImageFieldInputComponent.tsx | 14 +++++-- .../Parameters/ImageToImage/InitialImage.tsx | 20 +++++++-- 3 files changed, 54 insertions(+), 22 deletions(-) diff --git a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx index e12abf4830..b3b584d07e 100644 --- a/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx +++ b/invokeai/frontend/web/src/features/controlAdapters/components/ControlAdapterImagePreview.tsx @@ -5,14 +5,19 @@ import { stateSelector } from 'app/store/store'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; import { defaultSelectorOptions } from 'app/store/util/defaultMemoizeOptions'; import IAIDndImage from 'common/components/IAIDndImage'; +import IAIDndImageIcon from 'common/components/IAIDndImageIcon'; import { setBoundingBoxDimensions } from 'features/canvas/store/canvasSlice'; +import { useControlAdapterControlImage } from 'features/controlAdapters/hooks/useControlAdapterControlImage'; +import { useControlAdapterProcessedControlImage } from 'features/controlAdapters/hooks/useControlAdapterProcessedControlImage'; +import { useControlAdapterProcessorType } from 'features/controlAdapters/hooks/useControlAdapterProcessorType'; +import { controlAdapterImageChanged } from 'features/controlAdapters/store/controlAdaptersSlice'; import { TypesafeDraggableData, TypesafeDroppableData, } from 'features/dnd/types'; import { setHeight, setWidth } from 'features/parameters/store/generationSlice'; import { activeTabNameSelector } from 'features/ui/store/uiSelectors'; -import { memo, useCallback, useMemo, useState } from 'react'; +import { memo, useCallback, useEffect, useMemo, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { FaRulerVertical, FaSave, FaUndo } from 'react-icons/fa'; import { @@ -22,11 +27,6 @@ import { useRemoveImageFromBoardMutation, } from 'services/api/endpoints/images'; import { PostUploadAction } from 'services/api/types'; -import IAIDndImageIcon from 'common/components/IAIDndImageIcon'; -import { controlAdapterImageChanged } from 'features/controlAdapters/store/controlAdaptersSlice'; -import { useControlAdapterControlImage } from 'features/controlAdapters/hooks/useControlAdapterControlImage'; -import { useControlAdapterProcessedControlImage } from 'features/controlAdapters/hooks/useControlAdapterProcessedControlImage'; -import { useControlAdapterProcessorType } from 'features/controlAdapters/hooks/useControlAdapterProcessorType'; type Props = { id: string; @@ -35,13 +35,15 @@ type Props = { const selector = createSelector( stateSelector, - ({ controlAdapters, gallery }) => { + ({ controlAdapters, gallery, system }) => { const { pendingControlImages } = controlAdapters; const { autoAddBoardId } = gallery; + const { isConnected } = system; return { pendingControlImages, autoAddBoardId, + isConnected, }; }, defaultSelectorOptions @@ -55,18 +57,19 @@ const ControlAdapterImagePreview = ({ isSmall, id }: Props) => { const dispatch = useAppDispatch(); const { t } = useTranslation(); - const { pendingControlImages, autoAddBoardId } = useAppSelector(selector); + const { pendingControlImages, autoAddBoardId, isConnected } = + useAppSelector(selector); const activeTabName = useAppSelector(activeTabNameSelector); const [isMouseOverImage, setIsMouseOverImage] = useState(false); - const { currentData: controlImage } = useGetImageDTOQuery( - controlImageName ?? skipToken - ); + const { currentData: controlImage, isError: isErrorControlImage } = + useGetImageDTOQuery(controlImageName ?? skipToken); - const { currentData: processedControlImage } = useGetImageDTOQuery( - processedControlImageName ?? skipToken - ); + const { + currentData: processedControlImage, + isError: isErrorProcessedControlImage, + } = useGetImageDTOQuery(processedControlImageName ?? skipToken); const [changeIsIntermediate] = useChangeImageIsIntermediateMutation(); const [addToBoard] = useAddImageToBoardMutation(); @@ -158,6 +161,17 @@ const ControlAdapterImagePreview = ({ isSmall, id }: Props) => { !pendingControlImages.includes(id) && processorType !== 'none'; + useEffect(() => { + if (isConnected && (isErrorControlImage || isErrorProcessedControlImage)) { + handleResetControlImage(); + } + }, [ + handleResetControlImage, + isConnected, + isErrorControlImage, + isErrorProcessedControlImage, + ]); + return ( { const { nodeId, field } = props; const dispatch = useAppDispatch(); - - const { currentData: imageDTO } = useGetImageDTOQuery( + const isConnected = useAppSelector((state) => state.system.isConnected); + const { currentData: imageDTO, isError } = useGetImageDTOQuery( field.value?.image_name ?? skipToken ); @@ -67,6 +67,12 @@ const ImageFieldInputComponent = ( [nodeId, field.name] ); + useEffect(() => { + if (isConnected && isError) { + handleReset(); + } + }, [handleReset, isConnected, isError]); + return ( { const { initialImage } = state.generation; + const { isConnected } = state.system; + return { initialImage, isResetButtonDisabled: !initialImage, + isConnected, }; }, defaultSelectorOptions ); const InitialImage = () => { - const { initialImage } = useAppSelector(selector); + const dispatch = useAppDispatch(); + const { initialImage, isConnected } = useAppSelector(selector); - const { currentData: imageDTO } = useGetImageDTOQuery( + const { currentData: imageDTO, isError } = useGetImageDTOQuery( initialImage?.imageName ?? skipToken ); @@ -49,6 +54,13 @@ const InitialImage = () => { [] ); + useEffect(() => { + if (isError && isConnected) { + // The image doesn't exist, reset init image + dispatch(clearInitialImage()); + } + }, [dispatch, isConnected, isError]); + return ( Date: Thu, 30 Nov 2023 14:33:10 +0100 Subject: [PATCH 07/45] translationBot(ui): update translation files Updated by "Cleanup translation files" hook in Weblate. Co-authored-by: Hosted Weblate Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ Translation: InvokeAI/Web UI --- invokeai/frontend/web/public/locales/de.json | 3 +-- invokeai/frontend/web/public/locales/zh_CN.json | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/invokeai/frontend/web/public/locales/de.json b/invokeai/frontend/web/public/locales/de.json index 72809cc19d..b67663d6d2 100644 --- a/invokeai/frontend/web/public/locales/de.json +++ b/invokeai/frontend/web/public/locales/de.json @@ -803,8 +803,7 @@ "canny": "Canny", "hedDescription": "Ganzheitlich verschachtelte Kantenerkennung", "scribble": "Scribble", - "maxFaces": "Maximal Anzahl Gesichter", - "unstarImage": "Markierung aufheben" + "maxFaces": "Maximal Anzahl Gesichter" }, "queue": { "status": "Status", diff --git a/invokeai/frontend/web/public/locales/zh_CN.json b/invokeai/frontend/web/public/locales/zh_CN.json index 03838520d3..24105f2b40 100644 --- a/invokeai/frontend/web/public/locales/zh_CN.json +++ b/invokeai/frontend/web/public/locales/zh_CN.json @@ -1137,8 +1137,7 @@ "openPose": "Openpose", "controlAdapter_other": "Control Adapters", "lineartAnime": "Lineart Anime", - "canny": "Canny", - "unstarImage": "取消收藏图像" + "canny": "Canny" }, "queue": { "status": "状态", From 29eade48801e4b60773ed556ec9412f8c7af52ea Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 17 Nov 2023 18:36:28 -0500 Subject: [PATCH 08/45] Add nodes for tile splitting and merging. The main motivation for these nodes is for use in tiled upscaling workflows. --- invokeai/app/invocations/tiles.py | 162 +++++++++++++++++++++++++++++ invokeai/backend/tiles/__init__.py | 0 invokeai/backend/tiles/tiles.py | 155 +++++++++++++++++++++++++++ invokeai/backend/tiles/utils.py | 36 +++++++ 4 files changed, 353 insertions(+) create mode 100644 invokeai/app/invocations/tiles.py create mode 100644 invokeai/backend/tiles/__init__.py create mode 100644 invokeai/backend/tiles/tiles.py create mode 100644 invokeai/backend/tiles/utils.py diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py new file mode 100644 index 0000000000..acc87a7864 --- /dev/null +++ b/invokeai/app/invocations/tiles.py @@ -0,0 +1,162 @@ +import numpy as np +from PIL import Image +from pydantic import BaseModel + +from invokeai.app.invocations.baseinvocation import ( + BaseInvocation, + BaseInvocationOutput, + InputField, + InvocationContext, + OutputField, + WithMetadata, + WithWorkflow, + invocation, + invocation_output, +) +from invokeai.app.invocations.primitives import ImageField, ImageOutput +from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin +from invokeai.backend.tiles.tiles import calc_tiles, merge_tiles_with_linear_blending +from invokeai.backend.tiles.utils import Tile + +# TODO(ryand): Is this important? +_DIMENSION_MULTIPLE_OF = 8 + + +class TileWithImage(BaseModel): + tile: Tile + image: ImageField + + +@invocation_output("calc_tiles_output") +class CalcTilesOutput(BaseInvocationOutput): + # TODO(ryand): Add description from FieldDescriptions. + tiles: list[Tile] = OutputField(description="") + + +@invocation("calculate_tiles", title="Calculate Tiles", tags=["tiles"], category="tiles", version="1.0.0") +class CalcTiles(BaseInvocation): + """TODO(ryand)""" + + # Inputs + image_height: int = InputField(ge=1) + image_width: int = InputField(ge=1) + tile_height: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) + tile_width: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) + overlap: int = InputField(ge=0, multiple_of=_DIMENSION_MULTIPLE_OF, default=64) + + def invoke(self, context: InvocationContext) -> CalcTilesOutput: + tiles = calc_tiles( + image_height=self.image_height, + image_width=self.image_width, + tile_height=self.tile_height, + tile_width=self.tile_width, + overlap=self.overlap, + ) + return CalcTilesOutput(tiles=tiles) + + +@invocation_output("tile_to_properties_output") +class TileToPropertiesOutput(BaseInvocationOutput): + # TODO(ryand): Add descriptions. + coords_top: int = OutputField(description="") + coords_bottom: int = OutputField(description="") + coords_left: int = OutputField(description="") + coords_right: int = OutputField(description="") + + overlap_top: int = OutputField(description="") + overlap_bottom: int = OutputField(description="") + overlap_left: int = OutputField(description="") + overlap_right: int = OutputField(description="") + + +@invocation("tile_to_properties") +class TileToProperties(BaseInvocation): + """Split a Tile into its individual properties.""" + + tile: Tile = InputField() + + def invoke(self, context: InvocationContext) -> TileToPropertiesOutput: + return TileToPropertiesOutput( + coords_top=self.tile.coords.top, + coords_bottom=self.tile.coords.bottom, + coords_left=self.tile.coords.left, + coords_right=self.tile.coords.right, + overlap_top=self.tile.overlap.top, + overlap_bottom=self.tile.overlap.bottom, + overlap_left=self.tile.overlap.left, + overlap_right=self.tile.overlap.right, + ) + + +# HACK(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve order. +# Can this be fixed? + + +@invocation_output("pair_tile_image_output") +class PairTileImageOutput(BaseInvocationOutput): + tile_with_image: TileWithImage = OutputField(description="") + + +@invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0") +class PairTileImage(BaseInvocation): + image: ImageField = InputField() + tile: Tile = InputField() + + def invoke(self, context: InvocationContext) -> PairTileImageOutput: + return PairTileImageOutput( + tile_with_image=TileWithImage( + tile=self.tile, + image=self.image, + ) + ) + + +@invocation("merge_tiles_to_image", title="Merge Tiles To Image", tags=["tiles"], category="tiles", version="1.0.0") +class MergeTilesToImage(BaseInvocation, WithMetadata, WithWorkflow): + """TODO(ryand)""" + + # Inputs + image_height: int = InputField(ge=1) + image_width: int = InputField(ge=1) + tiles_with_images: list[TileWithImage] = InputField() + blend_amount: int = InputField(ge=0) + + def invoke(self, context: InvocationContext) -> ImageOutput: + images = [twi.image for twi in self.tiles_with_images] + tiles = [twi.tile for twi in self.tiles_with_images] + + # Get all tile images for processing. + # TODO(ryand): It pains me that we spend time PNG decoding each tile from disk when they almost certainly + # existed in memory at an earlier point in the graph. + tile_np_images: list[np.ndarray] = [] + for image in images: + pil_image = context.services.images.get_pil_image(image.image_name) + pil_image = pil_image.convert("RGB") + tile_np_images.append(np.array(pil_image)) + + # Prepare the output image buffer. + # Check the first tile to determine how many image channels are expected in the output. + channels = tile_np_images[0].shape[-1] + dtype = tile_np_images[0].dtype + np_image = np.zeros(shape=(self.image_height, self.image_width, channels), dtype=dtype) + + merge_tiles_with_linear_blending( + dst_image=np_image, tiles=tiles, tile_images=tile_np_images, blend_amount=self.blend_amount + ) + pil_image = Image.fromarray(np_image) + + image_dto = context.services.images.create( + image=pil_image, + image_origin=ResourceOrigin.INTERNAL, + image_category=ImageCategory.GENERAL, + node_id=self.id, + session_id=context.graph_execution_state_id, + is_intermediate=self.is_intermediate, + metadata=self.metadata, + workflow=self.workflow, + ) + return ImageOutput( + image=ImageField(image_name=image_dto.image_name), + width=image_dto.width, + height=image_dto.height, + ) diff --git a/invokeai/backend/tiles/__init__.py b/invokeai/backend/tiles/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py new file mode 100644 index 0000000000..566381d1ff --- /dev/null +++ b/invokeai/backend/tiles/tiles.py @@ -0,0 +1,155 @@ +import math + +import numpy as np + +from invokeai.backend.tiles.utils import TBLR, Tile, paste + +# TODO(ryand) +# Test the following: +# - Tile too big in x, y +# - Overlap too big in x, y +# - Single tile fits +# - Multiple tiles fit perfectly +# - Not evenly divisible by tile size(with overlap) + + +def calc_tiles_with_overlap( + image_height: int, image_width: int, tile_height: int, tile_width: int, overlap: int = 0 +) -> list[Tile]: + """Calculate the tile coordinates for a given image shape under a simple tiling scheme with overlaps. + + Args: + image_height (int): The image height in px. + image_width (int): The image width in px. + tile_height (int): The tile height in px. All tiles will have this height. + tile_width (int): The tile width in px. All tiles will have this width. + overlap (int, optional): The target overlap between adjacent tiles. If the tiles do not evenly cover the image + shape, then the last row/column of tiles will overlap more than this. Defaults to 0. + + Returns: + list[Tile]: A list of tiles that cover the image shape. Ordered from left-to-right, top-to-bottom. + """ + assert image_height >= tile_height + assert image_width >= tile_width + assert overlap < tile_height + assert overlap < tile_width + + non_overlap_per_tile_height = tile_height - overlap + non_overlap_per_tile_width = tile_width - overlap + + num_tiles_y = math.ceil((image_height - overlap) / non_overlap_per_tile_height) + num_tiles_x = math.ceil((image_width - overlap) / non_overlap_per_tile_width) + + # Calculate tile coordinates and overlaps. + tiles: list[Tile] = [] + for tile_idx_y in range(num_tiles_y): + for tile_idx_x in range(num_tiles_x): + tile = Tile( + coords=TBLR( + top=tile_idx_y * non_overlap_per_tile_height, + bottom=tile_idx_y * non_overlap_per_tile_height + tile_height, + left=tile_idx_x * non_overlap_per_tile_width, + right=tile_idx_x * non_overlap_per_tile_width + tile_width, + ), + overlap=TBLR( + top=0 if tile_idx_y == 0 else overlap, + bottom=overlap, + left=0 if tile_idx_x == 0 else overlap, + right=overlap, + ), + ) + + if tile.coords.bottom > image_height: + # If this tile would go off the bottom of the image, shift it so that it is aligned with the bottom + # of the image. + tile.coords.bottom = image_height + tile.coords.top = image_height - tile_height + tile.overlap.bottom = 0 + # Note that this could result in a large overlap between this tile and the one above it. + top_neighbor_bottom = (tile_idx_y - 1) * non_overlap_per_tile_height + tile_height + tile.overlap.top = top_neighbor_bottom - tile.coords.top + + if tile.coords.right > image_width: + # If this tile would go off the right edge of the image, shift it so that it is aligned with the + # right edge of the image. + tile.coords.right = image_width + tile.coords.left = image_width - tile_width + tile.overlap.right = 0 + # Note that this could result in a large overlap between this tile and the one to its left. + left_neighbor_right = (tile_idx_x - 1) * non_overlap_per_tile_width + tile_width + tile.overlap.left = left_neighbor_right - tile.coords.left + + tiles.append(tile) + + return tiles + + +# TODO(ryand): +# - Test with blend_amount=0 +# - Test tiles that go off of the dst_image. +# - Test mismatched tiles and tile_images lengths. +# - Test mismatched + + +def merge_tiles_with_linear_blending( + dst_image: np.ndarray, tiles: list[Tile], tile_images: list[np.ndarray], blend_amount: int +): + """Merge a set of image tiles into `dst_image` with linear blending between the tiles. + + We expect every tile edge to either: + 1) have an overlap of 0, because it is aligned with the image edge, or + 2) have an overlap >= blend_amount. + If neither of these conditions are satisfied, we raise an exception. + + The linear blending is centered at the halfway point of the overlap between adjacent tiles. + + Args: + dst_image (np.ndarray): The destination image. Shape: (H, W, C). + tiles (list[Tile]): The list of tiles describing the locations of the respective `tile_images`. + tile_images (list[np.ndarray]): The tile images to merge into `dst_image`. + blend_amount (int): The amount of blending (in px) between adjacent overlapping tiles. + """ + # Sort tiles and images first by left x coordinate, then by top y coordinate. During tile processing, we want to + # iterate over tiles left-to-right, top-to-bottom. + tiles_and_images = list(zip(tiles, tile_images, strict=True)) + tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.left) + tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.top) + + # Prepare 1D linear gradients for blending. + gradient_left_x = np.linspace(start=0.0, stop=1.0, num=blend_amount) + gradient_top_y = np.linspace(start=0.0, stop=1.0, num=blend_amount) + # Convert shape: (blend_amount, ) -> (blend_amount, 1). The extra dimension enables the gradient to be applied + # to a 2D image via broadcasting. Note that no additional dimension is needed on gradient_left_x for + # broadcasting to work correctly. + gradient_top_y = np.expand_dims(gradient_top_y, axis=1) + + for tile, tile_image in tiles_and_images: + # We expect tiles to be written left-to-right, top-to-bottom. We construct a mask that applies linear blending + # to the top and to the left of the current tile. The inverse linear blending is automatically applied to the + # bottom/right of the tiles that have already been pasted by the paste(...) operation. + tile_height, tile_width, _ = tile_image.shape + mask = np.ones(shape=(tile_height, tile_width), dtype=np.float64) + # Top blending: + if tile.overlap.top > 0: + assert tile.overlap.top >= blend_amount + # Center the blending gradient in the middle of the overlap. + blend_start_top = tile.overlap.top // 2 - blend_amount // 2 + # The region above the blending region is masked completely. + mask[:blend_start_top, :] = 0.0 + # Apply the blend gradient to the mask. Note that we use `*=` rather than `=` to achieve more natural + # behavior on the corners where vertical and horizontal blending gradients overlap. + mask[blend_start_top : blend_start_top + blend_amount, :] *= gradient_top_y + # HACK(ryand): For debugging + # tile_image[blend_start_top : blend_start_top + blend_amount, :] = 0 + + # Left blending: + # (See comments under 'top blending' for an explanation of the logic.) + if tile.overlap.left > 0: + assert tile.overlap.left >= blend_amount + blend_start_left = tile.overlap.left // 2 - blend_amount // 2 + mask[:, :blend_start_left] = 0.0 + mask[:, blend_start_left : blend_start_left + blend_amount] *= gradient_left_x + # HACK(ryand): For debugging + # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 + + paste(dst_image=dst_image, src_image=tile_image, box=tile.coords, mask=mask) diff --git a/invokeai/backend/tiles/utils.py b/invokeai/backend/tiles/utils.py new file mode 100644 index 0000000000..cf8e926aa5 --- /dev/null +++ b/invokeai/backend/tiles/utils.py @@ -0,0 +1,36 @@ +from typing import Optional + +import numpy as np +from pydantic import BaseModel, Field + + +class TBLR(BaseModel): + top: int + bottom: int + left: int + right: int + + +class Tile(BaseModel): + coords: TBLR = Field(description="The coordinates of this tile relative to its parent image.") + overlap: TBLR = Field(description="The amount of overlap with adjacent tiles on each side of this tile.") + + +def paste(dst_image: np.ndarray, src_image: np.ndarray, box: TBLR, mask: Optional[np.ndarray] = None): + """Paste a source image into a destination image. + + Args: + dst_image (torch.Tensor): The destination image to paste into. Shape: (H, W, C). + src_image (torch.Tensor): The source image to paste. Shape: (H, W, C). H and W must be compatible with 'box'. + box (TBLR): Box defining the region in the 'dst_image' where 'src_image' will be pasted. + mask (Optional[torch.Tensor]): A mask that defines the blending between 'src_image' and 'dst_image'. + Range: [0.0, 1.0], Shape: (H, W). The output is calculate per-pixel according to + `src * mask + dst * (1 - mask)`. + """ + + if mask is None: + dst_image[box.top : box.bottom, box.left : box.right] = src_image + else: + mask = np.expand_dims(mask, -1) + dst_image_box = dst_image[box.top : box.bottom, box.left : box.right] + dst_image[box.top : box.bottom, box.left : box.right] = src_image * mask + dst_image_box * (1.0 - mask) From 1c8ff0ae669281fce7321482e9e2c534ddeb78e3 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 11:53:40 -0500 Subject: [PATCH 09/45] Add unit tests for tile paste(...) util function. --- tests/backend/tiles/test_utils.py | 101 ++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 tests/backend/tiles/test_utils.py diff --git a/tests/backend/tiles/test_utils.py b/tests/backend/tiles/test_utils.py new file mode 100644 index 0000000000..bbef233ca5 --- /dev/null +++ b/tests/backend/tiles/test_utils.py @@ -0,0 +1,101 @@ +import numpy as np +import pytest + +from invokeai.backend.tiles.utils import TBLR, paste + + +def test_paste_no_mask_success(): + """Test successful paste with mask=None.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + + # Create src_image with a pattern that can be used to validate that it was pasted correctly. + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + src_image[0, :, 0] = 1 # Row of 1s in channel 0. + src_image[:, 0, 1] = 2 # Column of 2s in channel 1. + + # Paste in bottom-center of dst_image. + box = TBLR(top=2, bottom=5, left=1, right=4) + + # Construct expected output image. + expected_output = np.zeros((5, 5, 3), dtype=np.uint8) + expected_output[2, 1:4, 0] = 1 + expected_output[2:5, 1, 1] = 2 + + paste(dst_image=dst_image, src_image=src_image, box=box) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +def test_paste_with_mask_success(): + """Test successful paste with a mask.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + + # Create src_image with a pattern that can be used to validate that it was pasted correctly. + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + src_image[0, :, 0] = 64 # Row of 64s in channel 0. + src_image[:, 0, 1] = 128 # Column of 128s in channel 1. + + # Paste in bottom-center of dst_image. + box = TBLR(top=2, bottom=5, left=1, right=4) + + # Create a mask that blends the top-left corner of 'src_image' at 50%, and ignores the rest of src_image. + mask = np.zeros((3, 3)) + mask[0, 0] = 0.5 + + # Construct expected output image. + expected_output = np.zeros((5, 5, 3), dtype=np.uint8) + expected_output[2, 1, 0] = 32 + expected_output[2, 1, 1] = 64 + + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +@pytest.mark.parametrize("use_mask", [True, False]) +def test_paste_box_overflows_dst_image(use_mask: bool): + """Test that an exception is raised if 'box' overflows the 'dst_image'.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + mask = None + if use_mask: + mask = np.zeros((3, 3)) + + # Construct box that overflows bottom of dst_image. + top = 3 + left = 0 + box = TBLR(top=top, bottom=top + src_image.shape[0], left=left, right=left + src_image.shape[1]) + + with pytest.raises(ValueError): + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) + + +@pytest.mark.parametrize("use_mask", [True, False]) +def test_paste_src_image_does_not_match_box(use_mask: bool): + """Test that an exception is raised if the 'src_image' shape does not match the 'box' dimensions.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + mask = None + if use_mask: + mask = np.zeros((3, 3)) + + # Construct box that is smaller than src_image. + box = TBLR(top=0, bottom=src_image.shape[0] - 1, left=0, right=src_image.shape[1]) + + with pytest.raises(ValueError): + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) + + +def test_paste_mask_does_not_match_src_image(): + """Test that an exception is raised if the 'mask' shape is different than the 'src_image' shape.""" + dst_image = np.zeros((5, 5, 3), dtype=np.uint8) + src_image = np.zeros((3, 3, 3), dtype=np.uint8) + + # Construct mask that is smaller than the src_image. + mask = np.zeros((src_image.shape[0] - 1, src_image.shape[1])) + + # Construct box that matches src_image shape. + box = TBLR(top=0, bottom=src_image.shape[0], left=0, right=src_image.shape[1]) + + with pytest.raises(ValueError): + paste(dst_image=dst_image, src_image=src_image, box=box, mask=mask) From 65a16be299857109aa3c009c386083827b8b5c19 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 14:23:49 -0500 Subject: [PATCH 10/45] Add unit tests for calc_tiles_with_overlap(...) and fix a bug in its implementation. --- invokeai/backend/tiles/tiles.py | 52 ++++++++++--------- invokeai/backend/tiles/utils.py | 11 ++++ tests/backend/tiles/test_tiles.py | 84 +++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 23 deletions(-) create mode 100644 tests/backend/tiles/test_tiles.py diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py index 566381d1ff..5e5c4b7050 100644 --- a/invokeai/backend/tiles/tiles.py +++ b/invokeai/backend/tiles/tiles.py @@ -1,17 +1,10 @@ import math +from typing import Union import numpy as np from invokeai.backend.tiles.utils import TBLR, Tile, paste -# TODO(ryand) -# Test the following: -# - Tile too big in x, y -# - Overlap too big in x, y -# - Single tile fits -# - Multiple tiles fit perfectly -# - Not evenly divisible by tile size(with overlap) - def calc_tiles_with_overlap( image_height: int, image_width: int, tile_height: int, tile_width: int, overlap: int = 0 @@ -40,8 +33,10 @@ def calc_tiles_with_overlap( num_tiles_y = math.ceil((image_height - overlap) / non_overlap_per_tile_height) num_tiles_x = math.ceil((image_width - overlap) / non_overlap_per_tile_width) - # Calculate tile coordinates and overlaps. + # tiles[y * num_tiles_x + x] is the tile for the y'th row, x'th column. tiles: list[Tile] = [] + + # Calculate tile coordinates. (Ignore overlap values for now.) for tile_idx_y in range(num_tiles_y): for tile_idx_x in range(num_tiles_x): tile = Tile( @@ -51,12 +46,7 @@ def calc_tiles_with_overlap( left=tile_idx_x * non_overlap_per_tile_width, right=tile_idx_x * non_overlap_per_tile_width + tile_width, ), - overlap=TBLR( - top=0 if tile_idx_y == 0 else overlap, - bottom=overlap, - left=0 if tile_idx_x == 0 else overlap, - right=overlap, - ), + overlap=TBLR(top=0, bottom=0, left=0, right=0), ) if tile.coords.bottom > image_height: @@ -64,23 +54,39 @@ def calc_tiles_with_overlap( # of the image. tile.coords.bottom = image_height tile.coords.top = image_height - tile_height - tile.overlap.bottom = 0 - # Note that this could result in a large overlap between this tile and the one above it. - top_neighbor_bottom = (tile_idx_y - 1) * non_overlap_per_tile_height + tile_height - tile.overlap.top = top_neighbor_bottom - tile.coords.top if tile.coords.right > image_width: # If this tile would go off the right edge of the image, shift it so that it is aligned with the # right edge of the image. tile.coords.right = image_width tile.coords.left = image_width - tile_width - tile.overlap.right = 0 - # Note that this could result in a large overlap between this tile and the one to its left. - left_neighbor_right = (tile_idx_x - 1) * non_overlap_per_tile_width + tile_width - tile.overlap.left = left_neighbor_right - tile.coords.left tiles.append(tile) + def get_tile_or_none(idx_y: int, idx_x: int) -> Union[Tile, None]: + if idx_y < 0 or idx_y > num_tiles_y or idx_x < 0 or idx_x > num_tiles_x: + return None + return tiles[idx_y * num_tiles_x + idx_x] + + # Iterate over tiles again and calculate overlaps. + for tile_idx_y in range(num_tiles_y): + for tile_idx_x in range(num_tiles_x): + cur_tile = get_tile_or_none(tile_idx_y, tile_idx_x) + top_neighbor_tile = get_tile_or_none(tile_idx_y - 1, tile_idx_x) + left_neighbor_tile = get_tile_or_none(tile_idx_y, tile_idx_x - 1) + + assert cur_tile is not None + + # Update cur_tile top-overlap and corresponding top-neighbor bottom-overlap. + if top_neighbor_tile is not None: + cur_tile.overlap.top = max(0, top_neighbor_tile.coords.bottom - cur_tile.coords.top) + top_neighbor_tile.overlap.bottom = cur_tile.overlap.top + + # Update cur_tile left-overlap and corresponding left-neighbor right-overlap. + if left_neighbor_tile is not None: + cur_tile.overlap.left = max(0, left_neighbor_tile.coords.right - cur_tile.coords.left) + left_neighbor_tile.overlap.right = cur_tile.overlap.left + return tiles diff --git a/invokeai/backend/tiles/utils.py b/invokeai/backend/tiles/utils.py index cf8e926aa5..4ad40ffa35 100644 --- a/invokeai/backend/tiles/utils.py +++ b/invokeai/backend/tiles/utils.py @@ -10,11 +10,22 @@ class TBLR(BaseModel): left: int right: int + def __eq__(self, other): + return ( + self.top == other.top + and self.bottom == other.bottom + and self.left == other.left + and self.right == other.right + ) + class Tile(BaseModel): coords: TBLR = Field(description="The coordinates of this tile relative to its parent image.") overlap: TBLR = Field(description="The amount of overlap with adjacent tiles on each side of this tile.") + def __eq__(self, other): + return self.coords == other.coords and self.overlap == other.overlap + def paste(dst_image: np.ndarray, src_image: np.ndarray, box: TBLR, mask: Optional[np.ndarray] = None): """Paste a source image into a destination image. diff --git a/tests/backend/tiles/test_tiles.py b/tests/backend/tiles/test_tiles.py new file mode 100644 index 0000000000..332ab15005 --- /dev/null +++ b/tests/backend/tiles/test_tiles.py @@ -0,0 +1,84 @@ +import pytest + +from invokeai.backend.tiles.tiles import calc_tiles_with_overlap +from invokeai.backend.tiles.utils import TBLR, Tile + +#################################### +# Test calc_tiles_with_overlap(...) +#################################### + + +def test_calc_tiles_with_overlap_single_tile(): + """Test calc_tiles_with_overlap() behavior when a single tile covers the image.""" + tiles = calc_tiles_with_overlap(image_height=512, image_width=1024, tile_height=512, tile_width=1024, overlap=64) + + expected_tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=1024), overlap=TBLR(top=0, bottom=0, left=0, right=0)) + ] + + assert tiles == expected_tiles + + +def test_calc_tiles_with_overlap_evenly_divisible(): + """Test calc_tiles_with_overlap() behavior when the image is evenly covered by multiple tiles.""" + # Parameters chosen so that image is evenly covered by 2 rows, 3 columns of tiles. + tiles = calc_tiles_with_overlap(image_height=576, image_width=1600, tile_height=320, tile_width=576, overlap=64) + + expected_tiles = [ + # Row 0 + Tile(coords=TBLR(top=0, bottom=320, left=0, right=576), overlap=TBLR(top=0, bottom=64, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=320, left=512, right=1088), overlap=TBLR(top=0, bottom=64, left=64, right=64)), + Tile(coords=TBLR(top=0, bottom=320, left=1024, right=1600), overlap=TBLR(top=0, bottom=64, left=64, right=0)), + # Row 1 + Tile(coords=TBLR(top=256, bottom=576, left=0, right=576), overlap=TBLR(top=64, bottom=0, left=0, right=64)), + Tile(coords=TBLR(top=256, bottom=576, left=512, right=1088), overlap=TBLR(top=64, bottom=0, left=64, right=64)), + Tile(coords=TBLR(top=256, bottom=576, left=1024, right=1600), overlap=TBLR(top=64, bottom=0, left=64, right=0)), + ] + + assert tiles == expected_tiles + + +def test_calc_tiles_with_overlap_not_evenly_divisible(): + """Test calc_tiles_with_overlap() behavior when the image requires 'uneven' overlaps to achieve proper coverage.""" + # Parameters chosen so that image is covered by 2 rows and 3 columns of tiles, with uneven overlaps. + tiles = calc_tiles_with_overlap(image_height=400, image_width=1200, tile_height=256, tile_width=512, overlap=64) + + expected_tiles = [ + # Row 0 + Tile(coords=TBLR(top=0, bottom=256, left=0, right=512), overlap=TBLR(top=0, bottom=112, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=256, left=448, right=960), overlap=TBLR(top=0, bottom=112, left=64, right=272)), + Tile(coords=TBLR(top=0, bottom=256, left=688, right=1200), overlap=TBLR(top=0, bottom=112, left=272, right=0)), + # Row 1 + Tile(coords=TBLR(top=144, bottom=400, left=0, right=512), overlap=TBLR(top=112, bottom=0, left=0, right=64)), + Tile( + coords=TBLR(top=144, bottom=400, left=448, right=960), overlap=TBLR(top=112, bottom=0, left=64, right=272) + ), + Tile( + coords=TBLR(top=144, bottom=400, left=688, right=1200), overlap=TBLR(top=112, bottom=0, left=272, right=0) + ), + ] + + assert tiles == expected_tiles + + +@pytest.mark.parametrize( + ["image_height", "image_width", "tile_height", "tile_width", "overlap", "raises"], + [ + (128, 128, 128, 128, 127, False), # OK + (128, 128, 128, 128, 0, False), # OK + (128, 128, 64, 64, 0, False), # OK + (128, 128, 129, 128, 0, True), # tile_height exceeds image_height. + (128, 128, 128, 129, 0, True), # tile_width exceeds image_width. + (128, 128, 64, 128, 64, True), # overlap equals tile_height. + (128, 128, 128, 64, 64, True), # overlap equals tile_width. + ], +) +def test_calc_tiles_with_overlap_input_validation( + image_height: int, image_width: int, tile_height: int, tile_width: int, overlap: int, raises: bool +): + """Test that calc_tiles_with_overlap() raises an exception if the inputs are invalid.""" + if raises: + with pytest.raises(AssertionError): + calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) + else: + calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) From 76b888de1769239ef072cc830a06c9d9bacfbc52 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 15:42:23 -0500 Subject: [PATCH 11/45] Add unit tests for merge_tiles_with_linear_blending(...). --- invokeai/backend/tiles/tiles.py | 11 +-- tests/backend/tiles/test_tiles.py | 142 +++++++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 10 deletions(-) diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py index 5e5c4b7050..3d64e3e145 100644 --- a/invokeai/backend/tiles/tiles.py +++ b/invokeai/backend/tiles/tiles.py @@ -90,13 +90,6 @@ def calc_tiles_with_overlap( return tiles -# TODO(ryand): -# - Test with blend_amount=0 -# - Test tiles that go off of the dst_image. -# - Test mismatched tiles and tile_images lengths. -# - Test mismatched - - def merge_tiles_with_linear_blending( dst_image: np.ndarray, tiles: list[Tile], tile_images: list[np.ndarray], blend_amount: int ): @@ -145,7 +138,7 @@ def merge_tiles_with_linear_blending( # Apply the blend gradient to the mask. Note that we use `*=` rather than `=` to achieve more natural # behavior on the corners where vertical and horizontal blending gradients overlap. mask[blend_start_top : blend_start_top + blend_amount, :] *= gradient_top_y - # HACK(ryand): For debugging + # For visual debugging: # tile_image[blend_start_top : blend_start_top + blend_amount, :] = 0 # Left blending: @@ -155,7 +148,7 @@ def merge_tiles_with_linear_blending( blend_start_left = tile.overlap.left // 2 - blend_amount // 2 mask[:, :blend_start_left] = 0.0 mask[:, blend_start_left : blend_start_left + blend_amount] *= gradient_left_x - # HACK(ryand): For debugging + # For visual debugging: # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 paste(dst_image=dst_image, src_image=tile_image, box=tile.coords, mask=mask) diff --git a/tests/backend/tiles/test_tiles.py b/tests/backend/tiles/test_tiles.py index 332ab15005..353e65d336 100644 --- a/tests/backend/tiles/test_tiles.py +++ b/tests/backend/tiles/test_tiles.py @@ -1,6 +1,7 @@ +import numpy as np import pytest -from invokeai.backend.tiles.tiles import calc_tiles_with_overlap +from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending from invokeai.backend.tiles.utils import TBLR, Tile #################################### @@ -82,3 +83,142 @@ def test_calc_tiles_with_overlap_input_validation( calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) else: calc_tiles_with_overlap(image_height, image_width, tile_height, tile_width, overlap) + + +############################################# +# Test merge_tiles_with_linear_blending(...) +############################################# + + +@pytest.mark.parametrize("blend_amount", [0, 32]) +def test_merge_tiles_with_linear_blending_horizontal(blend_amount: int): + """Test merge_tiles_with_linear_blending(...) behavior when merging horizontally.""" + # Initialize 2 tiles side-by-side. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=512, left=448, right=960), overlap=TBLR(top=0, bottom=0, left=64, right=0)), + ] + + dst_image = np.zeros((512, 960, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. Pixel values are set based on the tile index. + tile_images = [ + np.zeros((512, 512, 3)) + 64, + np.zeros((512, 512, 3)) + 128, + ] + + # Calculate expected output. + expected_output = np.zeros((512, 960, 3), dtype=np.uint8) + expected_output[:, : 480 - (blend_amount // 2), :] = 64 + if blend_amount > 0: + gradient = np.linspace(start=64, stop=128, num=blend_amount, dtype=np.uint8).reshape((1, blend_amount, 1)) + expected_output[:, 480 - (blend_amount // 2) : 480 + (blend_amount // 2), :] = gradient + expected_output[:, 480 + (blend_amount // 2) :, :] = 128 + + merge_tiles_with_linear_blending( + dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=blend_amount + ) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +@pytest.mark.parametrize("blend_amount", [0, 32]) +def test_merge_tiles_with_linear_blending_vertical(blend_amount: int): + """Test merge_tiles_with_linear_blending(...) behavior when merging vertically.""" + # Initialize 2 tiles stacked vertically. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=64, left=0, right=0)), + Tile(coords=TBLR(top=448, bottom=960, left=0, right=512), overlap=TBLR(top=64, bottom=0, left=0, right=0)), + ] + + dst_image = np.zeros((960, 512, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. Pixel values are set based on the tile index. + tile_images = [ + np.zeros((512, 512, 3)) + 64, + np.zeros((512, 512, 3)) + 128, + ] + + # Calculate expected output. + expected_output = np.zeros((960, 512, 3), dtype=np.uint8) + expected_output[: 480 - (blend_amount // 2), :, :] = 64 + if blend_amount > 0: + gradient = np.linspace(start=64, stop=128, num=blend_amount, dtype=np.uint8).reshape((blend_amount, 1, 1)) + expected_output[480 - (blend_amount // 2) : 480 + (blend_amount // 2), :, :] = gradient + expected_output[480 + (blend_amount // 2) :, :, :] = 128 + + merge_tiles_with_linear_blending( + dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=blend_amount + ) + + np.testing.assert_array_equal(dst_image, expected_output, strict=True) + + +def test_merge_tiles_with_linear_blending_blend_amount_exceeds_vertical_overlap(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if 'blend_amount' exceeds the overlap between + any vertically adjacent tiles. + """ + # Initialize 2 tiles stacked vertically. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=64, left=0, right=0)), + Tile(coords=TBLR(top=448, bottom=960, left=0, right=512), overlap=TBLR(top=64, bottom=0, left=0, right=0)), + ] + + dst_image = np.zeros((960, 512, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. + tile_images = [np.zeros((512, 512, 3)), np.zeros((512, 512, 3))] + + # blend_amount=128 exceeds overlap of 64, so should raise exception. + with pytest.raises(AssertionError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=128) + + +def test_merge_tiles_with_linear_blending_blend_amount_exceeds_horizontal_overlap(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if 'blend_amount' exceeds the overlap between + any horizontally adjacent tiles. + """ + # Initialize 2 tiles side-by-side. + tiles = [ + Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=64)), + Tile(coords=TBLR(top=0, bottom=512, left=448, right=960), overlap=TBLR(top=0, bottom=0, left=64, right=0)), + ] + + dst_image = np.zeros((512, 960, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. + tile_images = [np.zeros((512, 512, 3)), np.zeros((512, 512, 3))] + + # blend_amount=128 exceeds overlap of 64, so should raise exception. + with pytest.raises(AssertionError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=128) + + +def test_merge_tiles_with_linear_blending_tiles_overflow_dst_image(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if any of the tiles overflows the + dst_image. + """ + tiles = [Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=0))] + + dst_image = np.zeros((256, 512, 3), dtype=np.uint8) + + # Prepare tile_images that match tiles. + tile_images = [np.zeros((512, 512, 3))] + + with pytest.raises(ValueError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=0) + + +def test_merge_tiles_with_linear_blending_mismatched_list_lengths(): + """Test that merge_tiles_with_linear_blending(...) raises an exception if the lengths of 'tiles' and 'tile_images' + do not match. + """ + tiles = [Tile(coords=TBLR(top=0, bottom=512, left=0, right=512), overlap=TBLR(top=0, bottom=0, left=0, right=0))] + + dst_image = np.zeros((256, 512, 3), dtype=np.uint8) + + # tile_images is longer than tiles, so should cause an exception. + tile_images = [np.zeros((512, 512, 3)), np.zeros((512, 512, 3))] + + with pytest.raises(ValueError): + merge_tiles_with_linear_blending(dst_image=dst_image, tiles=tiles, tile_images=tile_images, blend_amount=0) From 7f816c9243d5522173d5ee9de9c2dc0e87844991 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 20 Nov 2023 17:18:13 -0500 Subject: [PATCH 12/45] Tidy up tiles invocations, add documentation. --- invokeai/app/invocations/tiles.py | 92 ++++++++++++++++--------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index acc87a7864..c6499c45d6 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -15,65 +15,65 @@ from invokeai.app.invocations.baseinvocation import ( ) from invokeai.app.invocations.primitives import ImageField, ImageOutput from invokeai.app.services.image_records.image_records_common import ImageCategory, ResourceOrigin -from invokeai.backend.tiles.tiles import calc_tiles, merge_tiles_with_linear_blending +from invokeai.backend.tiles.tiles import calc_tiles_with_overlap, merge_tiles_with_linear_blending from invokeai.backend.tiles.utils import Tile -# TODO(ryand): Is this important? -_DIMENSION_MULTIPLE_OF = 8 - class TileWithImage(BaseModel): tile: Tile image: ImageField -@invocation_output("calc_tiles_output") -class CalcTilesOutput(BaseInvocationOutput): - # TODO(ryand): Add description from FieldDescriptions. - tiles: list[Tile] = OutputField(description="") +@invocation_output("calculate_image_tiles_output") +class CalculateImageTilesOutput(BaseInvocationOutput): + tiles: list[Tile] = OutputField(description="The tiles coordinates that cover a particular image shape.") -@invocation("calculate_tiles", title="Calculate Tiles", tags=["tiles"], category="tiles", version="1.0.0") -class CalcTiles(BaseInvocation): - """TODO(ryand)""" +@invocation("calculate_image_tiles", title="Calculate Image Tiles", tags=["tiles"], category="tiles", version="1.0.0") +class CalculateImageTiles(BaseInvocation): + """Calculate the coordinates and overlaps of tiles that cover a target image shape.""" - # Inputs - image_height: int = InputField(ge=1) - image_width: int = InputField(ge=1) - tile_height: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) - tile_width: int = InputField(ge=1, multiple_of=_DIMENSION_MULTIPLE_OF, default=576) - overlap: int = InputField(ge=0, multiple_of=_DIMENSION_MULTIPLE_OF, default=64) + image_height: int = InputField( + ge=1, default=1024, description="The image height, in pixels, to calculate tiles for." + ) + image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.") + tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.") + tile_width: int = InputField(ge=1, default=576, description="The tile width, in pixels.") + overlap: int = InputField( + ge=0, + default=128, + description="The target overlap, in pixels, between adjacent tiles. Adjacent tiles will overlap by at least this amount", + ) - def invoke(self, context: InvocationContext) -> CalcTilesOutput: - tiles = calc_tiles( + def invoke(self, context: InvocationContext) -> CalculateImageTilesOutput: + tiles = calc_tiles_with_overlap( image_height=self.image_height, image_width=self.image_width, tile_height=self.tile_height, tile_width=self.tile_width, overlap=self.overlap, ) - return CalcTilesOutput(tiles=tiles) + return CalculateImageTilesOutput(tiles=tiles) @invocation_output("tile_to_properties_output") class TileToPropertiesOutput(BaseInvocationOutput): - # TODO(ryand): Add descriptions. - coords_top: int = OutputField(description="") - coords_bottom: int = OutputField(description="") - coords_left: int = OutputField(description="") - coords_right: int = OutputField(description="") + coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.") + coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.") + coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.") + coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.") - overlap_top: int = OutputField(description="") - overlap_bottom: int = OutputField(description="") - overlap_left: int = OutputField(description="") - overlap_right: int = OutputField(description="") + overlap_top: int = OutputField(description="Overlap between this tile and its top neighbor.") + overlap_bottom: int = OutputField(description="Overlap between this tile and its bottom neighbor.") + overlap_left: int = OutputField(description="Overlap between this tile and its left neighbor.") + overlap_right: int = OutputField(description="Overlap between this tile and its right neighbor.") -@invocation("tile_to_properties") +@invocation("tile_to_properties", title="Tile to Properties", tags=["tiles"], category="tiles", version="1.0.0") class TileToProperties(BaseInvocation): """Split a Tile into its individual properties.""" - tile: Tile = InputField() + tile: Tile = InputField(description="The tile to split into properties.") def invoke(self, context: InvocationContext) -> TileToPropertiesOutput: return TileToPropertiesOutput( @@ -88,19 +88,20 @@ class TileToProperties(BaseInvocation): ) -# HACK(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve order. -# Can this be fixed? - - @invocation_output("pair_tile_image_output") class PairTileImageOutput(BaseInvocationOutput): - tile_with_image: TileWithImage = OutputField(description="") + tile_with_image: TileWithImage = OutputField(description="A tile description with its corresponding image.") @invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0") class PairTileImage(BaseInvocation): - image: ImageField = InputField() - tile: Tile = InputField() + """Pair an image with its tile properties.""" + + # TODO(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve + # order. Can this be fixed? + + image: ImageField = InputField(description="The tile image.") + tile: Tile = InputField(description="The tile properties.") def invoke(self, context: InvocationContext) -> PairTileImageOutput: return PairTileImageOutput( @@ -111,15 +112,18 @@ class PairTileImage(BaseInvocation): ) -@invocation("merge_tiles_to_image", title="Merge Tiles To Image", tags=["tiles"], category="tiles", version="1.0.0") +@invocation("merge_tiles_to_image", title="Merge Tiles to Image", tags=["tiles"], category="tiles", version="1.0.0") class MergeTilesToImage(BaseInvocation, WithMetadata, WithWorkflow): - """TODO(ryand)""" + """Merge multiple tile images into a single image.""" # Inputs - image_height: int = InputField(ge=1) - image_width: int = InputField(ge=1) - tiles_with_images: list[TileWithImage] = InputField() - blend_amount: int = InputField(ge=0) + image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") + image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") + tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") + blend_amount: int = InputField( + ge=0, + description="The amount to blend adjacent tiles in pixels. Must be <= the amount of overlap between adjacent tiles.", + ) def invoke(self, context: InvocationContext) -> ImageOutput: images = [twi.image for twi in self.tiles_with_images] From 67540c9ee064b1c4f5e9aa1c71df09be6300bbbd Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 23 Nov 2023 10:52:03 -0500 Subject: [PATCH 13/45] (minor) Add 'Invocation' suffix to all tiling node classes. --- invokeai/app/invocations/tiles.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index c6499c45d6..927e99be64 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -30,7 +30,7 @@ class CalculateImageTilesOutput(BaseInvocationOutput): @invocation("calculate_image_tiles", title="Calculate Image Tiles", tags=["tiles"], category="tiles", version="1.0.0") -class CalculateImageTiles(BaseInvocation): +class CalculateImageTilesInvocation(BaseInvocation): """Calculate the coordinates and overlaps of tiles that cover a target image shape.""" image_height: int = InputField( @@ -70,7 +70,7 @@ class TileToPropertiesOutput(BaseInvocationOutput): @invocation("tile_to_properties", title="Tile to Properties", tags=["tiles"], category="tiles", version="1.0.0") -class TileToProperties(BaseInvocation): +class TileToPropertiesInvocation(BaseInvocation): """Split a Tile into its individual properties.""" tile: Tile = InputField(description="The tile to split into properties.") @@ -94,7 +94,7 @@ class PairTileImageOutput(BaseInvocationOutput): @invocation("pair_tile_image", title="Pair Tile with Image", tags=["tiles"], category="tiles", version="1.0.0") -class PairTileImage(BaseInvocation): +class PairTileImageInvocation(BaseInvocation): """Pair an image with its tile properties.""" # TODO(ryand): The only reason that PairTileImage is needed is because the iterate/collect nodes don't preserve @@ -113,7 +113,7 @@ class PairTileImage(BaseInvocation): @invocation("merge_tiles_to_image", title="Merge Tiles to Image", tags=["tiles"], category="tiles", version="1.0.0") -class MergeTilesToImage(BaseInvocation, WithMetadata, WithWorkflow): +class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs From 843f2d71d663ac95f970645489fbc2f7f74be9fb Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 11:02:10 -0500 Subject: [PATCH 14/45] Copy CropLatentsInvocation from https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80. --- invokeai/app/invocations/latent.py | 53 ++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ab59b41865..26294ed7f7 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1166,3 +1166,56 @@ class BlendLatentsInvocation(BaseInvocation): # context.services.latents.set(name, resized_latents) context.services.latents.save(name, blended_latents) return build_latents_output(latents_name=name, latents=blended_latents) + + +@invocation( + "lcrop", + title="Crop Latents", + tags=["latents", "crop"], + category="latents", + version="1.0.0", +) +class CropLatentsInvocation(BaseInvocation): + """Crops latents""" + + latents: LatentsField = InputField( + description=FieldDescriptions.latents, + input=Input.Connection, + ) + width: int = InputField( + ge=64, + multiple_of=_downsampling_factor, + description=FieldDescriptions.width, + ) + height: int = InputField( + ge=64, + multiple_of=_downsampling_factor, + description=FieldDescriptions.width, + ) + x_offset: int = InputField( + ge=0, + multiple_of=_downsampling_factor, + description="x-coordinate", + ) + y_offset: int = InputField( + ge=0, + multiple_of=_downsampling_factor, + description="y-coordinate", + ) + + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = context.services.latents.get(self.latents.latents_name) + + x1 = self.x_offset // _downsampling_factor + y1 = self.y_offset // _downsampling_factor + x2 = x1 + (self.width // _downsampling_factor) + y2 = y1 + (self.height // _downsampling_factor) + + cropped_latents = latents[:, :, y1:y2, x1:x2] + + # resized_latents = resized_latents.to("cpu") + + name = f"{context.graph_execution_state_id}__{self.id}" + context.services.latents.save(name, cropped_latents) + + return build_latents_output(latents_name=name, latents=cropped_latents) From 18c6ff427ec2eabf7721ad078cd4179edf795d00 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 11:12:15 -0500 Subject: [PATCH 15/45] Use LATENT_SCALE_FACTOR = 8 constant in CropLatentsInvocation. --- invokeai/app/invocations/latent.py | 34 ++++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 26294ed7f7..49ffa1f7e9 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -79,6 +79,12 @@ DEFAULT_PRECISION = choose_precision(choose_torch_device()) SAMPLER_NAME_VALUES = Literal[tuple(SCHEDULER_MAP.keys())] +# HACK: Many nodes are currently hard-coded to use a fixed latent scale factor of 8. This is fragile, and will need to +# be addressed if future models use a different latent scale factor. Also, note that there may be places where the scale +# factor is hard-coded to a literal '8' rather than using this constant. +# The ratio of image:latent dimensions is LATENT_SCALE_FACTOR:1, or 8:1. +LATENT_SCALE_FACTOR = 8 + @invocation_output("scheduler_output") class SchedulerOutput(BaseInvocationOutput): @@ -394,9 +400,9 @@ class DenoiseLatentsInvocation(BaseInvocation): exit_stack: ExitStack, do_classifier_free_guidance: bool = True, ) -> List[ControlNetData]: - # assuming fixed dimensional scaling of 8:1 for image:latents - control_height_resize = latents_shape[2] * 8 - control_width_resize = latents_shape[3] * 8 + # Assuming fixed dimensional scaling of LATENT_SCALE_FACTOR. + control_height_resize = latents_shape[2] * LATENT_SCALE_FACTOR + control_width_resize = latents_shape[3] * LATENT_SCALE_FACTOR if control_input is None: control_list = None elif isinstance(control_input, list) and len(control_input) == 0: @@ -909,12 +915,12 @@ class ResizeLatentsInvocation(BaseInvocation): ) width: int = InputField( ge=64, - multiple_of=8, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) height: int = InputField( ge=64, - multiple_of=8, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) mode: LATENTS_INTERPOLATION_MODE = InputField(default="bilinear", description=FieldDescriptions.interp_mode) @@ -928,7 +934,7 @@ class ResizeLatentsInvocation(BaseInvocation): resized_latents = torch.nn.functional.interpolate( latents.to(device), - size=(self.height // 8, self.width // 8), + size=(self.height // LATENT_SCALE_FACTOR, self.width // LATENT_SCALE_FACTOR), mode=self.mode, antialias=self.antialias if self.mode in ["bilinear", "bicubic"] else False, ) @@ -1184,32 +1190,32 @@ class CropLatentsInvocation(BaseInvocation): ) width: int = InputField( ge=64, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) height: int = InputField( ge=64, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description=FieldDescriptions.width, ) x_offset: int = InputField( ge=0, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description="x-coordinate", ) y_offset: int = InputField( ge=0, - multiple_of=_downsampling_factor, + multiple_of=LATENT_SCALE_FACTOR, description="y-coordinate", ) def invoke(self, context: InvocationContext) -> LatentsOutput: latents = context.services.latents.get(self.latents.latents_name) - x1 = self.x_offset // _downsampling_factor - y1 = self.y_offset // _downsampling_factor - x2 = x1 + (self.width // _downsampling_factor) - y2 = y1 + (self.height // _downsampling_factor) + x1 = self.x_offset // LATENT_SCALE_FACTOR + y1 = self.y_offset // LATENT_SCALE_FACTOR + x2 = x1 + (self.width // LATENT_SCALE_FACTOR) + y2 = y1 + (self.height // LATENT_SCALE_FACTOR) cropped_latents = latents[:, :, y1:y2, x1:x2] From 7cab51745b8684e9737ccda334b8191c27272419 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 11:30:00 -0500 Subject: [PATCH 16/45] Improve documentation of CropLatentsInvocation. --- invokeai/app/invocations/latent.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 49ffa1f7e9..ad2de0d9ae 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1182,31 +1182,33 @@ class BlendLatentsInvocation(BaseInvocation): version="1.0.0", ) class CropLatentsInvocation(BaseInvocation): - """Crops latents""" + """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be + divisible by the latent scale factor of 8. + """ latents: LatentsField = InputField( description=FieldDescriptions.latents, input=Input.Connection, ) width: int = InputField( - ge=64, + ge=1, multiple_of=LATENT_SCALE_FACTOR, - description=FieldDescriptions.width, + description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) height: int = InputField( - ge=64, + ge=1, multiple_of=LATENT_SCALE_FACTOR, - description=FieldDescriptions.width, + description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) x_offset: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, - description="x-coordinate", + description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) y_offset: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, - description="y-coordinate", + description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) def invoke(self, context: InvocationContext) -> LatentsOutput: From 9b863fb9bcd5038324ae40ac0afdab5f8926570e Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 12:05:55 -0500 Subject: [PATCH 17/45] Rename CropLatentsInvocation -> CropLatentsCoreInvocation to prevent conflict with custom node. And other minor tidying. --- invokeai/app/invocations/latent.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index ad2de0d9ae..e48d7458d4 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1174,14 +1174,18 @@ class BlendLatentsInvocation(BaseInvocation): return build_latents_output(latents_name=name, latents=blended_latents) +# The Crop Latents node was copied from @skunkworxdark's implementation here: +# https://github.com/skunkworxdark/XYGrid_nodes/blob/74647fa9c1fa57d317a94bd43ca689af7f0aae5e/images_to_grids.py#L1117C1-L1167C80 @invocation( - "lcrop", + "crop_latents", title="Crop Latents", tags=["latents", "crop"], category="latents", version="1.0.0", ) -class CropLatentsInvocation(BaseInvocation): +# TODO(ryand): Named `CropLatentsCoreInvocation` to prevent a conflict with custom node `CropLatentsInvocation`. +# Currently, if the class names conflict then 'GET /openapi.json' fails. +class CropLatentsCoreInvocation(BaseInvocation): """Crops a latent-space tensor to a box specified in image-space. The box dimensions and coordinates must be divisible by the latent scale factor of 8. """ @@ -1219,9 +1223,7 @@ class CropLatentsInvocation(BaseInvocation): x2 = x1 + (self.width // LATENT_SCALE_FACTOR) y2 = y1 + (self.height // LATENT_SCALE_FACTOR) - cropped_latents = latents[:, :, y1:y2, x1:x2] - - # resized_latents = resized_latents.to("cpu") + cropped_latents = latents[..., y1:y2, x1:x2] name = f"{context.graph_execution_state_id}__{self.id}" context.services.latents.save(name, cropped_latents) From e5a212b5c877691cf04cbb1af2f98f3b8e1f477f Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 12:30:10 -0500 Subject: [PATCH 18/45] Update tiling nodes to use width-before-height field ordering convention. --- invokeai/app/invocations/tiles.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 927e99be64..350141a2f3 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -33,12 +33,12 @@ class CalculateImageTilesOutput(BaseInvocationOutput): class CalculateImageTilesInvocation(BaseInvocation): """Calculate the coordinates and overlaps of tiles that cover a target image shape.""" + image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.") image_height: int = InputField( ge=1, default=1024, description="The image height, in pixels, to calculate tiles for." ) - image_width: int = InputField(ge=1, default=1024, description="The image width, in pixels, to calculate tiles for.") - tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.") tile_width: int = InputField(ge=1, default=576, description="The tile width, in pixels.") + tile_height: int = InputField(ge=1, default=576, description="The tile height, in pixels.") overlap: int = InputField( ge=0, default=128, @@ -117,8 +117,8 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs - image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") + image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") blend_amount: int = InputField( ge=0, From b19ed36b43b9a261b77defc0589f796725a458f5 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 13:49:33 -0500 Subject: [PATCH 19/45] Add width and height fields to TileToPropertiesInvocation output to avoid having to calculate them with math nodes. --- invokeai/app/invocations/tiles.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 350141a2f3..934861f008 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -63,6 +63,14 @@ class TileToPropertiesOutput(BaseInvocationOutput): coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.") coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.") + # HACK: The width and height fields are 'meta' fields that can easily be calculated from the other fields on this + # object. Including redundant fields that can cheaply/easily be re-calculated goes against conventional API design + # principles. These fields are included, because 1) they are often useful in tiled workflows, and 2) they are + # difficult to calculate in a workflow (even though it's just a couple of subtraction nodes the graph gets + # surprisingly complicated). + width: int = OutputField(description="The width of the tile. Equal to coords_right - coords_left.") + height: int = OutputField(description="The height of the tile. Equal to coords_bottom - coords_top.") + overlap_top: int = OutputField(description="Overlap between this tile and its top neighbor.") overlap_bottom: int = OutputField(description="Overlap between this tile and its bottom neighbor.") overlap_left: int = OutputField(description="Overlap between this tile and its left neighbor.") @@ -81,6 +89,8 @@ class TileToPropertiesInvocation(BaseInvocation): coords_bottom=self.tile.coords.bottom, coords_left=self.tile.coords.left, coords_right=self.tile.coords.right, + width=self.tile.coords.right - self.tile.coords.left, + height=self.tile.coords.bottom - self.tile.coords.top, overlap_top=self.tile.overlap.top, overlap_bottom=self.tile.overlap.bottom, overlap_left=self.tile.overlap.left, From 32da359ba5e6ca86a398bf896dfa54f528c583aa Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 14:07:38 -0500 Subject: [PATCH 20/45] Infer a tight-fitting output image size from the passed tiles in MergeTilesToImageInvocation. --- invokeai/app/invocations/tiles.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index 934861f008..d1b51a43f0 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -127,8 +127,6 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): """Merge multiple tile images into a single image.""" # Inputs - image_width: int = InputField(ge=1, description="The width of the output image, in pixels.") - image_height: int = InputField(ge=1, description="The height of the output image, in pixels.") tiles_with_images: list[TileWithImage] = InputField(description="A list of tile images with tile properties.") blend_amount: int = InputField( ge=0, @@ -139,6 +137,13 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): images = [twi.image for twi in self.tiles_with_images] tiles = [twi.tile for twi in self.tiles_with_images] + # Infer the output image dimensions from the max/min tile limits. + height = 0 + width = 0 + for tile in tiles: + height = max(height, tile.coords.bottom) + width = max(width, tile.coords.right) + # Get all tile images for processing. # TODO(ryand): It pains me that we spend time PNG decoding each tile from disk when they almost certainly # existed in memory at an earlier point in the graph. @@ -152,7 +157,7 @@ class MergeTilesToImageInvocation(BaseInvocation, WithMetadata, WithWorkflow): # Check the first tile to determine how many image channels are expected in the output. channels = tile_np_images[0].shape[-1] dtype = tile_np_images[0].dtype - np_image = np.zeros(shape=(self.image_height, self.image_width, channels), dtype=dtype) + np_image = np.zeros(shape=(height, width, channels), dtype=dtype) merge_tiles_with_linear_blending( dst_image=np_image, tiles=tiles, tile_images=tile_np_images, blend_amount=self.blend_amount From bfdef120d1ef253448ddc7a2a99a99967332f26a Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Mon, 27 Nov 2023 23:34:45 -0500 Subject: [PATCH 21/45] Re-organize merge_tiles_with_linear_blending(...) to merge rows horizontally first and then vertically. This change achieves slightly more natural blending on the corners where 4 tiles overlap. --- invokeai/backend/tiles/tiles.py | 101 +++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/invokeai/backend/tiles/tiles.py b/invokeai/backend/tiles/tiles.py index 3d64e3e145..3a678d825e 100644 --- a/invokeai/backend/tiles/tiles.py +++ b/invokeai/backend/tiles/tiles.py @@ -114,6 +114,24 @@ def merge_tiles_with_linear_blending( tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.left) tiles_and_images = sorted(tiles_and_images, key=lambda x: x[0].coords.top) + # Organize tiles into rows. + tile_and_image_rows: list[list[tuple[Tile, np.ndarray]]] = [] + cur_tile_and_image_row: list[tuple[Tile, np.ndarray]] = [] + first_tile_in_cur_row, _ = tiles_and_images[0] + for tile_and_image in tiles_and_images: + tile, _ = tile_and_image + if not ( + tile.coords.top == first_tile_in_cur_row.coords.top + and tile.coords.bottom == first_tile_in_cur_row.coords.bottom + ): + # Store the previous row, and start a new one. + tile_and_image_rows.append(cur_tile_and_image_row) + cur_tile_and_image_row = [] + first_tile_in_cur_row, _ = tile_and_image + + cur_tile_and_image_row.append(tile_and_image) + tile_and_image_rows.append(cur_tile_and_image_row) + # Prepare 1D linear gradients for blending. gradient_left_x = np.linspace(start=0.0, stop=1.0, num=blend_amount) gradient_top_y = np.linspace(start=0.0, stop=1.0, num=blend_amount) @@ -122,33 +140,62 @@ def merge_tiles_with_linear_blending( # broadcasting to work correctly. gradient_top_y = np.expand_dims(gradient_top_y, axis=1) - for tile, tile_image in tiles_and_images: - # We expect tiles to be written left-to-right, top-to-bottom. We construct a mask that applies linear blending - # to the top and to the left of the current tile. The inverse linear blending is automatically applied to the - # bottom/right of the tiles that have already been pasted by the paste(...) operation. - tile_height, tile_width, _ = tile_image.shape - mask = np.ones(shape=(tile_height, tile_width), dtype=np.float64) + for tile_and_image_row in tile_and_image_rows: + first_tile_in_row, _ = tile_and_image_row[0] + row_height = first_tile_in_row.coords.bottom - first_tile_in_row.coords.top + row_image = np.zeros((row_height, dst_image.shape[1], dst_image.shape[2]), dtype=dst_image.dtype) + + # Blend the tiles in the row horizontally. + for tile, tile_image in tile_and_image_row: + # We expect the tiles to be ordered left-to-right. For each tile, we construct a mask that applies linear + # blending to the left of the current tile. The inverse linear blending is automatically applied to the + # right of the tiles that have already been pasted by the paste(...) operation. + tile_height, tile_width, _ = tile_image.shape + mask = np.ones(shape=(tile_height, tile_width), dtype=np.float64) + + # Left blending: + if tile.overlap.left > 0: + assert tile.overlap.left >= blend_amount + # Center the blending gradient in the middle of the overlap. + blend_start_left = tile.overlap.left // 2 - blend_amount // 2 + # The region left of the blending region is masked completely. + mask[:, :blend_start_left] = 0.0 + # Apply the blend gradient to the mask. + mask[:, blend_start_left : blend_start_left + blend_amount] = gradient_left_x + # For visual debugging: + # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 + + paste( + dst_image=row_image, + src_image=tile_image, + box=TBLR( + top=0, bottom=tile.coords.bottom - tile.coords.top, left=tile.coords.left, right=tile.coords.right + ), + mask=mask, + ) + + # Blend the row into the dst_image vertically. + # We construct a mask that applies linear blending to the top of the current row. The inverse linear blending is + # automatically applied to the bottom of the tiles that have already been pasted by the paste(...) operation. + mask = np.ones(shape=(row_image.shape[0], row_image.shape[1]), dtype=np.float64) # Top blending: - if tile.overlap.top > 0: - assert tile.overlap.top >= blend_amount - # Center the blending gradient in the middle of the overlap. - blend_start_top = tile.overlap.top // 2 - blend_amount // 2 - # The region above the blending region is masked completely. + # (See comments under 'Left blending' for an explanation of the logic.) + # We assume that the entire row has the same vertical overlaps as the first_tile_in_row. + if first_tile_in_row.overlap.top > 0: + assert first_tile_in_row.overlap.top >= blend_amount + blend_start_top = first_tile_in_row.overlap.top // 2 - blend_amount // 2 mask[:blend_start_top, :] = 0.0 - # Apply the blend gradient to the mask. Note that we use `*=` rather than `=` to achieve more natural - # behavior on the corners where vertical and horizontal blending gradients overlap. - mask[blend_start_top : blend_start_top + blend_amount, :] *= gradient_top_y + mask[blend_start_top : blend_start_top + blend_amount, :] = gradient_top_y # For visual debugging: - # tile_image[blend_start_top : blend_start_top + blend_amount, :] = 0 - - # Left blending: - # (See comments under 'top blending' for an explanation of the logic.) - if tile.overlap.left > 0: - assert tile.overlap.left >= blend_amount - blend_start_left = tile.overlap.left // 2 - blend_amount // 2 - mask[:, :blend_start_left] = 0.0 - mask[:, blend_start_left : blend_start_left + blend_amount] *= gradient_left_x - # For visual debugging: - # tile_image[:, blend_start_left : blend_start_left + blend_amount] = 0 - - paste(dst_image=dst_image, src_image=tile_image, box=tile.coords, mask=mask) + # row_image[blend_start_top : blend_start_top + blend_amount, :] = 0 + paste( + dst_image=dst_image, + src_image=row_image, + box=TBLR( + top=first_tile_in_row.coords.top, + bottom=first_tile_in_row.coords.bottom, + left=0, + right=row_image.shape[1], + ), + mask=mask, + ) From 57e70aaf5006316cda784dd5d32ba9180b32c6da Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Wed, 29 Nov 2023 10:23:55 -0500 Subject: [PATCH 22/45] Change input field ordering of CropLatentsCoreInvocation to match ImageCropInvocation. --- invokeai/app/invocations/latent.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index e48d7458d4..34ef3421f8 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1194,16 +1194,6 @@ class CropLatentsCoreInvocation(BaseInvocation): description=FieldDescriptions.latents, input=Input.Connection, ) - width: int = InputField( - ge=1, - multiple_of=LATENT_SCALE_FACTOR, - description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) - height: int = InputField( - ge=1, - multiple_of=LATENT_SCALE_FACTOR, - description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", - ) x_offset: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, @@ -1214,6 +1204,16 @@ class CropLatentsCoreInvocation(BaseInvocation): multiple_of=LATENT_SCALE_FACTOR, description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) + width: int = InputField( + ge=1, + multiple_of=LATENT_SCALE_FACTOR, + description="The width (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) + height: int = InputField( + ge=1, + multiple_of=LATENT_SCALE_FACTOR, + description="The height (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", + ) def invoke(self, context: InvocationContext) -> LatentsOutput: latents = context.services.latents.get(self.latents.latents_name) From 984e609c61525fae4ff2cc205ac19aa38286d542 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 30 Nov 2023 10:44:21 -0500 Subject: [PATCH 23/45] (minor) Tweak field ordering and field names for tiling nodes. --- invokeai/app/invocations/latent.py | 8 ++++---- invokeai/app/invocations/tiles.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/invokeai/app/invocations/latent.py b/invokeai/app/invocations/latent.py index 34ef3421f8..218e05a986 100644 --- a/invokeai/app/invocations/latent.py +++ b/invokeai/app/invocations/latent.py @@ -1194,12 +1194,12 @@ class CropLatentsCoreInvocation(BaseInvocation): description=FieldDescriptions.latents, input=Input.Connection, ) - x_offset: int = InputField( + x: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, description="The left x coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", ) - y_offset: int = InputField( + y: int = InputField( ge=0, multiple_of=LATENT_SCALE_FACTOR, description="The top y coordinate (in px) of the crop rectangle in image space. This value will be converted to a dimension in latent space.", @@ -1218,8 +1218,8 @@ class CropLatentsCoreInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> LatentsOutput: latents = context.services.latents.get(self.latents.latents_name) - x1 = self.x_offset // LATENT_SCALE_FACTOR - y1 = self.y_offset // LATENT_SCALE_FACTOR + x1 = self.x // LATENT_SCALE_FACTOR + y1 = self.y // LATENT_SCALE_FACTOR x2 = x1 + (self.width // LATENT_SCALE_FACTOR) y2 = y1 + (self.height // LATENT_SCALE_FACTOR) diff --git a/invokeai/app/invocations/tiles.py b/invokeai/app/invocations/tiles.py index d1b51a43f0..3055c1baae 100644 --- a/invokeai/app/invocations/tiles.py +++ b/invokeai/app/invocations/tiles.py @@ -58,10 +58,10 @@ class CalculateImageTilesInvocation(BaseInvocation): @invocation_output("tile_to_properties_output") class TileToPropertiesOutput(BaseInvocationOutput): - coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.") - coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.") coords_left: int = OutputField(description="Left coordinate of the tile relative to its parent image.") coords_right: int = OutputField(description="Right coordinate of the tile relative to its parent image.") + coords_top: int = OutputField(description="Top coordinate of the tile relative to its parent image.") + coords_bottom: int = OutputField(description="Bottom coordinate of the tile relative to its parent image.") # HACK: The width and height fields are 'meta' fields that can easily be calculated from the other fields on this # object. Including redundant fields that can cheaply/easily be re-calculated goes against conventional API design @@ -85,10 +85,10 @@ class TileToPropertiesInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> TileToPropertiesOutput: return TileToPropertiesOutput( - coords_top=self.tile.coords.top, - coords_bottom=self.tile.coords.bottom, coords_left=self.tile.coords.left, coords_right=self.tile.coords.right, + coords_top=self.tile.coords.top, + coords_bottom=self.tile.coords.bottom, width=self.tile.coords.right - self.tile.coords.left, height=self.tile.coords.bottom - self.tile.coords.top, overlap_top=self.tile.overlap.top, From aadcde3edd12b8b81d22d9b6cc8205bf1e460a44 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Mon, 13 Nov 2023 18:08:17 +1100 Subject: [PATCH 24/45] feat(ui): use IndexedDB for persistence IndexedDB has a much larger storage limit than LocalStorage, and is widely supported. Implemented as a custom storage driver for `redux-remember` via `idb-keyval`. `idb-keyval` is a simple wrapper for IndexedDB that allows it to be used easily as a key-value store. The logic to clear persisted storage has been updated throughout the app. --- invokeai/frontend/web/package.json | 1 + .../frontend/web/src/app/components/App.tsx | 6 ++++-- .../web/src/app/components/InvokeAIUI.tsx | 6 +++--- .../src/app/components/ThemeLocaleProvider.tsx | 2 +- .../frontend/web/src/app/store/constants.ts | 9 +-------- invokeai/frontend/web/src/app/store/store.ts | 18 ++++++++++++++---- .../web/src/common/hooks/useClearStorage.ts | 12 ++++++++++++ .../components/SettingsModal/SettingsModal.tsx | 16 +++++----------- invokeai/frontend/web/yarn.lock | 5 +++++ 9 files changed, 46 insertions(+), 29 deletions(-) create mode 100644 invokeai/frontend/web/src/common/hooks/useClearStorage.ts diff --git a/invokeai/frontend/web/package.json b/invokeai/frontend/web/package.json index 6f160bae46..6a6b79c3b7 100644 --- a/invokeai/frontend/web/package.json +++ b/invokeai/frontend/web/package.json @@ -75,6 +75,7 @@ "framer-motion": "^10.16.4", "i18next": "^23.6.0", "i18next-http-backend": "^2.3.1", + "idb-keyval": "^6.2.1", "konva": "^9.2.3", "lodash-es": "^4.17.21", "nanostores": "^0.9.4", diff --git a/invokeai/frontend/web/src/app/components/App.tsx b/invokeai/frontend/web/src/app/components/App.tsx index 63533aee0d..73bd92ffab 100644 --- a/invokeai/frontend/web/src/app/components/App.tsx +++ b/invokeai/frontend/web/src/app/components/App.tsx @@ -21,6 +21,7 @@ import GlobalHotkeys from './GlobalHotkeys'; import PreselectedImage from './PreselectedImage'; import Toaster from './Toaster'; import { useSocketIO } from 'app/hooks/useSocketIO'; +import { useClearStorage } from 'common/hooks/useClearStorage'; const DEFAULT_CONFIG = {}; @@ -36,15 +37,16 @@ const App = ({ config = DEFAULT_CONFIG, selectedImage }: Props) => { const language = useAppSelector(languageSelector); const logger = useLogger('system'); const dispatch = useAppDispatch(); + const clearStorage = useClearStorage(); // singleton! useSocketIO(); const handleReset = useCallback(() => { - localStorage.clear(); + clearStorage(); location.reload(); return false; - }, []); + }, [clearStorage]); useEffect(() => { i18n.changeLanguage(language); diff --git a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx index 459ac65635..64d0d8d3ab 100644 --- a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx +++ b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx @@ -9,6 +9,9 @@ import { $projectId } from 'app/store/nanostores/projectId'; import { $queueId, DEFAULT_QUEUE_ID } from 'app/store/nanostores/queueId'; import { store } from 'app/store/store'; import { PartialAppConfig } from 'app/types/invokeai'; +import Loading from 'common/components/Loading/Loading'; +import AppDndContext from 'features/dnd/components/AppDndContext'; +import 'i18n'; import React, { PropsWithChildren, ReactNode, @@ -19,9 +22,6 @@ import React, { import { Provider } from 'react-redux'; import { addMiddleware, resetMiddlewares } from 'redux-dynamic-middlewares'; import { ManagerOptions, SocketOptions } from 'socket.io-client'; -import Loading from 'common/components/Loading/Loading'; -import AppDndContext from 'features/dnd/components/AppDndContext'; -import 'i18n'; const App = lazy(() => import('./App')); const ThemeLocaleProvider = lazy(() => import('./ThemeLocaleProvider')); diff --git a/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx b/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx index a9d56a7f16..ba0aaa5823 100644 --- a/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx +++ b/invokeai/frontend/web/src/app/components/ThemeLocaleProvider.tsx @@ -9,9 +9,9 @@ import { TOAST_OPTIONS, theme as invokeAITheme } from 'theme/theme'; import '@fontsource-variable/inter'; import { MantineProvider } from '@mantine/core'; +import { useMantineTheme } from 'mantine-theme/theme'; import 'overlayscrollbars/overlayscrollbars.css'; import 'theme/css/overlayscrollbars.css'; -import { useMantineTheme } from 'mantine-theme/theme'; type ThemeLocaleProviderProps = { children: ReactNode; diff --git a/invokeai/frontend/web/src/app/store/constants.ts b/invokeai/frontend/web/src/app/store/constants.ts index 6d48762bef..c2f3a5e10b 100644 --- a/invokeai/frontend/web/src/app/store/constants.ts +++ b/invokeai/frontend/web/src/app/store/constants.ts @@ -1,8 +1 @@ -export const LOCALSTORAGE_KEYS = [ - 'chakra-ui-color-mode', - 'i18nextLng', - 'ROARR_FILTER', - 'ROARR_LOG', -]; - -export const LOCALSTORAGE_PREFIX = '@@invokeai-'; +export const STORAGE_PREFIX = '@@invokeai-'; diff --git a/invokeai/frontend/web/src/app/store/store.ts b/invokeai/frontend/web/src/app/store/store.ts index d9bc7b085d..a0230c2807 100644 --- a/invokeai/frontend/web/src/app/store/store.ts +++ b/invokeai/frontend/web/src/app/store/store.ts @@ -23,9 +23,9 @@ import systemReducer from 'features/system/store/systemSlice'; import hotkeysReducer from 'features/ui/store/hotkeysSlice'; import uiReducer from 'features/ui/store/uiSlice'; import dynamicMiddlewares from 'redux-dynamic-middlewares'; -import { rememberEnhancer, rememberReducer } from 'redux-remember'; +import { Driver, rememberEnhancer, rememberReducer } from 'redux-remember'; import { api } from 'services/api'; -import { LOCALSTORAGE_PREFIX } from './constants'; +import { STORAGE_PREFIX } from './constants'; import { serialize } from './enhancers/reduxRemember/serialize'; import { unserialize } from './enhancers/reduxRemember/unserialize'; import { actionSanitizer } from './middleware/devtools/actionSanitizer'; @@ -33,6 +33,7 @@ import { actionsDenylist } from './middleware/devtools/actionsDenylist'; import { stateSanitizer } from './middleware/devtools/stateSanitizer'; import { listenerMiddleware } from './middleware/listenerMiddleware'; import { $store } from './nanostores/store'; +import { createStore as createIDBKeyValStore, get, set } from 'idb-keyval'; const allReducers = { canvas: canvasReducer, @@ -74,16 +75,25 @@ const rememberedKeys: (keyof typeof allReducers)[] = [ 'modelmanager', ]; +// Create a custom idb-keyval store (just needed to customize the name) +export const idbKeyValStore = createIDBKeyValStore('invoke', 'invoke-store'); + +// Create redux-remember driver, wrapping idb-keyval +const idbKeyValDriver: Driver = { + getItem: (key) => get(key, idbKeyValStore), + setItem: (key, value) => set(key, value, idbKeyValStore), +}; + export const store = configureStore({ reducer: rememberedRootReducer, enhancers: (existingEnhancers) => { return existingEnhancers .concat( - rememberEnhancer(window.localStorage, rememberedKeys, { + rememberEnhancer(idbKeyValDriver, rememberedKeys, { persistDebounce: 300, serialize, unserialize, - prefix: LOCALSTORAGE_PREFIX, + prefix: STORAGE_PREFIX, }) ) .concat(autoBatchEnhancer()); diff --git a/invokeai/frontend/web/src/common/hooks/useClearStorage.ts b/invokeai/frontend/web/src/common/hooks/useClearStorage.ts new file mode 100644 index 0000000000..0ab4936d72 --- /dev/null +++ b/invokeai/frontend/web/src/common/hooks/useClearStorage.ts @@ -0,0 +1,12 @@ +import { idbKeyValStore } from 'app/store/store'; +import { clear } from 'idb-keyval'; +import { useCallback } from 'react'; + +export const useClearStorage = () => { + const clearStorage = useCallback(() => { + clear(idbKeyValStore); + localStorage.clear(); + }, []); + + return clearStorage; +}; diff --git a/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx b/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx index e1eeb19df3..7841a94d3f 100644 --- a/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx +++ b/invokeai/frontend/web/src/features/system/components/SettingsModal/SettingsModal.tsx @@ -14,11 +14,11 @@ import { } from '@chakra-ui/react'; import { createSelector } from '@reduxjs/toolkit'; import { VALID_LOG_LEVELS } from 'app/logging/logger'; -import { LOCALSTORAGE_KEYS, LOCALSTORAGE_PREFIX } from 'app/store/constants'; import { stateSelector } from 'app/store/store'; import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; import IAIButton from 'common/components/IAIButton'; import IAIMantineSelect from 'common/components/IAIMantineSelect'; +import { useClearStorage } from 'common/hooks/useClearStorage'; import { consoleLogLevelChanged, setEnableImageDebugging, @@ -164,20 +164,14 @@ const SettingsModal = ({ children, config }: SettingsModalProps) => { shouldEnableInformationalPopovers, } = useAppSelector(selector); + const clearStorage = useClearStorage(); + const handleClickResetWebUI = useCallback(() => { - // Only remove our keys - Object.keys(window.localStorage).forEach((key) => { - if ( - LOCALSTORAGE_KEYS.includes(key) || - key.startsWith(LOCALSTORAGE_PREFIX) - ) { - localStorage.removeItem(key); - } - }); + clearStorage(); onSettingsModalClose(); onRefreshModalOpen(); setInterval(() => setCountdown((prev) => prev - 1), 1000); - }, [onSettingsModalClose, onRefreshModalOpen]); + }, [clearStorage, onSettingsModalClose, onRefreshModalOpen]); useEffect(() => { if (countdown <= 0) { diff --git a/invokeai/frontend/web/yarn.lock b/invokeai/frontend/web/yarn.lock index e0a9db1c5e..6c661af24b 100644 --- a/invokeai/frontend/web/yarn.lock +++ b/invokeai/frontend/web/yarn.lock @@ -4158,6 +4158,11 @@ i18next@^23.6.0: dependencies: "@babel/runtime" "^7.22.5" +idb-keyval@^6.2.1: + version "6.2.1" + resolved "https://registry.yarnpkg.com/idb-keyval/-/idb-keyval-6.2.1.tgz#94516d625346d16f56f3b33855da11bfded2db33" + integrity sha512-8Sb3veuYCyrZL+VBt9LJfZjLUPWVvqn8tG28VqYNFCo43KHcKuq+b4EiXGeuaLAQWL2YmyDgMp2aSpH9JHsEQg== + ieee754@^1.1.13: version "1.2.1" resolved "https://registry.yarnpkg.com/ieee754/-/ieee754-1.2.1.tgz#8eb7a10a63fff25d15a57b001586d177d1b0d352" From e6fe2540b81cf5be4d403d6adc53d0c75b3922d1 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Thu, 30 Nov 2023 11:47:27 -0500 Subject: [PATCH 25/45] dynamically create indexedDB store using unique store key if available --- .../web/src/app/components/InvokeAIUI.tsx | 12 ++- invokeai/frontend/web/src/app/store/store.ts | 100 +++++++++--------- 2 files changed, 61 insertions(+), 51 deletions(-) diff --git a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx index 64d0d8d3ab..b190a36f06 100644 --- a/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx +++ b/invokeai/frontend/web/src/app/components/InvokeAIUI.tsx @@ -7,7 +7,8 @@ import { $headerComponent } from 'app/store/nanostores/headerComponent'; import { $isDebugging } from 'app/store/nanostores/isDebugging'; import { $projectId } from 'app/store/nanostores/projectId'; import { $queueId, DEFAULT_QUEUE_ID } from 'app/store/nanostores/queueId'; -import { store } from 'app/store/store'; +import { $store } from 'app/store/nanostores/store'; +import { createStore } from 'app/store/store'; import { PartialAppConfig } from 'app/types/invokeai'; import Loading from 'common/components/Loading/Loading'; import AppDndContext from 'features/dnd/components/AppDndContext'; @@ -18,6 +19,7 @@ import React, { lazy, memo, useEffect, + useMemo, } from 'react'; import { Provider } from 'react-redux'; import { addMiddleware, resetMiddlewares } from 'redux-dynamic-middlewares'; @@ -137,6 +139,14 @@ const InvokeAIUI = ({ }; }, [isDebugging]); + const store = useMemo(() => { + return createStore(projectId); + }, [projectId]); + + useEffect(() => { + $store.set(store); + }, [store]); + return ( diff --git a/invokeai/frontend/web/src/app/store/store.ts b/invokeai/frontend/web/src/app/store/store.ts index a0230c2807..87edba56e0 100644 --- a/invokeai/frontend/web/src/app/store/store.ts +++ b/invokeai/frontend/web/src/app/store/store.ts @@ -32,7 +32,6 @@ import { actionSanitizer } from './middleware/devtools/actionSanitizer'; import { actionsDenylist } from './middleware/devtools/actionsDenylist'; import { stateSanitizer } from './middleware/devtools/stateSanitizer'; import { listenerMiddleware } from './middleware/listenerMiddleware'; -import { $store } from './nanostores/store'; import { createStore as createIDBKeyValStore, get, set } from 'idb-keyval'; const allReducers = { @@ -84,57 +83,58 @@ const idbKeyValDriver: Driver = { setItem: (key, value) => set(key, value, idbKeyValStore), }; -export const store = configureStore({ - reducer: rememberedRootReducer, - enhancers: (existingEnhancers) => { - return existingEnhancers - .concat( - rememberEnhancer(idbKeyValDriver, rememberedKeys, { - persistDebounce: 300, - serialize, - unserialize, - prefix: STORAGE_PREFIX, - }) - ) - .concat(autoBatchEnhancer()); - }, - middleware: (getDefaultMiddleware) => - getDefaultMiddleware({ - serializableCheck: false, - immutableCheck: false, - }) - .concat(api.middleware) - .concat(dynamicMiddlewares) - .prepend(listenerMiddleware.middleware), - devTools: { - actionSanitizer, - stateSanitizer, - trace: true, - predicate: (state, action) => { - // TODO: hook up to the log level param in system slice - // manually type state, cannot type the arg - // const typedState = state as ReturnType; - - // TODO: doing this breaks the rtk query devtools, commenting out for now - // if (action.type.startsWith('api/')) { - // // don't log api actions, with manual cache updates they are extremely noisy - // return false; - // } - - if (actionsDenylist.includes(action.type)) { - // don't log other noisy actions - return false; - } - - return true; +export const createStore = (uniqueStoreKey?: string) => + configureStore({ + reducer: rememberedRootReducer, + enhancers: (existingEnhancers) => { + return existingEnhancers + .concat( + rememberEnhancer(idbKeyValDriver, rememberedKeys, { + persistDebounce: 300, + serialize, + unserialize, + prefix: uniqueStoreKey + ? `${STORAGE_PREFIX}-${uniqueStoreKey}-` + : STORAGE_PREFIX, + }) + ) + .concat(autoBatchEnhancer()); }, - }, -}); + middleware: (getDefaultMiddleware) => + getDefaultMiddleware({ + serializableCheck: false, + immutableCheck: false, + }) + .concat(api.middleware) + .concat(dynamicMiddlewares) + .prepend(listenerMiddleware.middleware), + devTools: { + actionSanitizer, + stateSanitizer, + trace: true, + predicate: (state, action) => { + // TODO: hook up to the log level param in system slice + // manually type state, cannot type the arg + // const typedState = state as ReturnType; -export type AppGetState = typeof store.getState; -export type RootState = ReturnType; + // TODO: doing this breaks the rtk query devtools, commenting out for now + // if (action.type.startsWith('api/')) { + // // don't log api actions, with manual cache updates they are extremely noisy + // return false; + // } + + if (actionsDenylist.includes(action.type)) { + // don't log other noisy actions + return false; + } + + return true; + }, + }, + }); + +export type RootState = ReturnType; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type AppThunkDispatch = ThunkDispatch; -export type AppDispatch = typeof store.dispatch; +export type AppDispatch = ReturnType['dispatch']; export const stateSelector = (state: RootState) => state; -$store.set(store); From a8ef4e5be847cb16528b3f1d5759dd086d1d00e3 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:02:02 +1100 Subject: [PATCH 26/45] fix(ui): fix types and storage prefix --- invokeai/frontend/web/src/app/store/store.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/invokeai/frontend/web/src/app/store/store.ts b/invokeai/frontend/web/src/app/store/store.ts index 87edba56e0..0e3634468b 100644 --- a/invokeai/frontend/web/src/app/store/store.ts +++ b/invokeai/frontend/web/src/app/store/store.ts @@ -94,7 +94,7 @@ export const createStore = (uniqueStoreKey?: string) => serialize, unserialize, prefix: uniqueStoreKey - ? `${STORAGE_PREFIX}-${uniqueStoreKey}-` + ? `${STORAGE_PREFIX}${uniqueStoreKey}-` : STORAGE_PREFIX, }) ) @@ -133,8 +133,11 @@ export const createStore = (uniqueStoreKey?: string) => }, }); -export type RootState = ReturnType; +export type AppGetState = ReturnType< + ReturnType['getState'] +>; +export type RootState = ReturnType['getState']>; // eslint-disable-next-line @typescript-eslint/no-explicit-any export type AppThunkDispatch = ThunkDispatch; -export type AppDispatch = ReturnType['dispatch']; +export type AppDispatch = ReturnType['dispatch']; export const stateSelector = (state: RootState) => state; From 0719a46372bde45bd0b60d251030e8a47b0d994b Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 1 Dec 2023 01:28:28 -0500 Subject: [PATCH 27/45] add support for SDXL textual inversion/embeddings --- invokeai/backend/model_management/lora.py | 52 +++++++++++++++---- .../backend/model_management/model_probe.py | 4 ++ 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/invokeai/backend/model_management/lora.py b/invokeai/backend/model_management/lora.py index 4389cacacc..f9c40f8386 100644 --- a/invokeai/backend/model_management/lora.py +++ b/invokeai/backend/model_management/lora.py @@ -192,10 +192,19 @@ class ModelPatcher: trigger += f"-!pad-{i}" return f"<{trigger}>" + def _get_ti_embedding(model_embeddings, ti): + # for SDXL models, select the embedding that matches the text encoder's dimensions + if ti.embedding_2 is not None: + return ti.embedding_2 if ti.embedding_2.shape[1] == model_embeddings.weight.data[0].shape[0] else ti.embedding + else: + return ti.embedding + # modify tokenizer new_tokens_added = 0 for ti_name, ti in ti_list: - for i in range(ti.embedding.shape[0]): + ti_embedding = _get_ti_embedding(text_encoder.get_input_embeddings(), ti) + + for i in range(ti_embedding.shape[0]): new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti_name, i)) # modify text_encoder @@ -203,9 +212,10 @@ class ModelPatcher: model_embeddings = text_encoder.get_input_embeddings() for ti_name, ti in ti_list: + ti_tokens = [] - for i in range(ti.embedding.shape[0]): - embedding = ti.embedding[i] + for i in range(ti_embedding.shape[0]): + embedding = ti_embedding[i] trigger = _get_trigger(ti_name, i) token_id = ti_tokenizer.convert_tokens_to_ids(trigger) @@ -272,7 +282,8 @@ class ModelPatcher: class TextualInversionModel: - embedding: torch.Tensor # [n, 768]|[n, 1280] + embedding: torch.Tensor # [n, 768]|[n, 1280] + embedding_2: Optional[torch.Tensor] = None # [n, 768]|[n, 1280] - for SDXL models @classmethod def from_checkpoint( @@ -296,7 +307,7 @@ class TextualInversionModel: if "string_to_param" in state_dict: if len(state_dict["string_to_param"]) > 1: print( - f'Warn: Embedding "{file_path.name}" contains multiple tokens, which is not supported. The first' + f'Warn: Embedding "{file_path.name}" contains multiple tokens, which is not supported. The first', " token will be used." ) @@ -306,6 +317,11 @@ class TextualInversionModel: elif "emb_params" in state_dict: result.embedding = state_dict["emb_params"] + # v5(sdxl safetensors file) + elif "clip_g" in state_dict and "clip_l" in state_dict: + result.embedding = state_dict["clip_g"] + result.embedding_2 = state_dict["clip_l"] + # v4(diffusers bin files) else: result.embedding = next(iter(state_dict.values())) @@ -316,6 +332,7 @@ class TextualInversionModel: if not isinstance(result.embedding, torch.Tensor): raise ValueError(f"Invalid embeddings file: {file_path.name}") + return result @@ -342,6 +359,13 @@ class TextualInversionManager(BaseTextualInversionManager): if token_id in self.pad_tokens: new_token_ids.extend(self.pad_tokens[token_id]) + # Do not exceed the max model input size + # The -2 here is compensating for compensate compel.embeddings_provider.get_token_ids(), + # which first removes and then adds back the start and end tokens. + max_length = list(self.tokenizer.max_model_input_sizes.values())[0] - 2 + if len(new_token_ids) > max_length: + new_token_ids = new_token_ids[0:max_length] + return new_token_ids @@ -490,14 +514,20 @@ class ONNXModelPatcher: trigger += f"-!pad-{i}" return f"<{trigger}>" + # modify text_encoder + orig_embeddings = text_encoder.tensors["text_model.embeddings.token_embedding.weight"] + # modify tokenizer new_tokens_added = 0 for ti_name, ti in ti_list: - for i in range(ti.embedding.shape[0]): - new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti_name, i)) - # modify text_encoder - orig_embeddings = text_encoder.tensors["text_model.embeddings.token_embedding.weight"] + if ti.embedding_2 is not None: + ti_embedding = ti.embedding_2 if ti.embedding_2.shape[1] == orig_embeddings.shape[0] else ti.embedding + else: + ti_embedding = ti.embedding + + for i in range(ti_embedding.shape[0]): + new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti_name, i)) embeddings = np.concatenate( (np.copy(orig_embeddings), np.zeros((new_tokens_added, orig_embeddings.shape[1]))), @@ -506,8 +536,8 @@ class ONNXModelPatcher: for ti_name, ti in ti_list: ti_tokens = [] - for i in range(ti.embedding.shape[0]): - embedding = ti.embedding[i].detach().numpy() + for i in range(ti_embedding.shape[0]): + embedding = ti_embedding[i].detach().numpy() trigger = _get_trigger(ti_name, i) token_id = ti_tokenizer.convert_tokens_to_ids(trigger) diff --git a/invokeai/backend/model_management/model_probe.py b/invokeai/backend/model_management/model_probe.py index aebe30f116..af4f3f2a62 100644 --- a/invokeai/backend/model_management/model_probe.py +++ b/invokeai/backend/model_management/model_probe.py @@ -373,12 +373,16 @@ class TextualInversionCheckpointProbe(CheckpointProbeBase): token_dim = list(checkpoint["string_to_param"].values())[0].shape[-1] elif "emb_params" in checkpoint: token_dim = checkpoint["emb_params"].shape[-1] + elif "clip_g" in checkpoint: + token_dim = checkpoint["clip_g"].shape[-1] else: token_dim = list(checkpoint.values())[0].shape[0] if token_dim == 768: return BaseModelType.StableDiffusion1 elif token_dim == 1024: return BaseModelType.StableDiffusion2 + elif token_dim == 1280: + return BaseModelType.StableDiffusionXL else: return None From f95ce1870c03381999c637b46c991e47dd4aa520 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 1 Dec 2023 01:46:12 -0500 Subject: [PATCH 28/45] fix ruff format check --- invokeai/backend/model_management/lora.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/invokeai/backend/model_management/lora.py b/invokeai/backend/model_management/lora.py index f9c40f8386..acd1f6bab6 100644 --- a/invokeai/backend/model_management/lora.py +++ b/invokeai/backend/model_management/lora.py @@ -195,7 +195,11 @@ class ModelPatcher: def _get_ti_embedding(model_embeddings, ti): # for SDXL models, select the embedding that matches the text encoder's dimensions if ti.embedding_2 is not None: - return ti.embedding_2 if ti.embedding_2.shape[1] == model_embeddings.weight.data[0].shape[0] else ti.embedding + return ( + ti.embedding_2 + if ti.embedding_2.shape[1] == model_embeddings.weight.data[0].shape[0] + else ti.embedding + ) else: return ti.embedding @@ -212,7 +216,6 @@ class ModelPatcher: model_embeddings = text_encoder.get_input_embeddings() for ti_name, ti in ti_list: - ti_tokens = [] for i in range(ti_embedding.shape[0]): embedding = ti_embedding[i] @@ -282,7 +285,7 @@ class ModelPatcher: class TextualInversionModel: - embedding: torch.Tensor # [n, 768]|[n, 1280] + embedding: torch.Tensor # [n, 768]|[n, 1280] embedding_2: Optional[torch.Tensor] = None # [n, 768]|[n, 1280] - for SDXL models @classmethod @@ -308,7 +311,7 @@ class TextualInversionModel: if len(state_dict["string_to_param"]) > 1: print( f'Warn: Embedding "{file_path.name}" contains multiple tokens, which is not supported. The first', - " token will be used." + " token will be used.", ) result.embedding = next(iter(state_dict["string_to_param"].values())) @@ -319,8 +322,8 @@ class TextualInversionModel: # v5(sdxl safetensors file) elif "clip_g" in state_dict and "clip_l" in state_dict: - result.embedding = state_dict["clip_g"] - result.embedding_2 = state_dict["clip_l"] + result.embedding = state_dict["clip_g"] + result.embedding_2 = state_dict["clip_l"] # v4(diffusers bin files) else: @@ -332,7 +335,6 @@ class TextualInversionModel: if not isinstance(result.embedding, torch.Tensor): raise ValueError(f"Invalid embeddings file: {file_path.name}") - return result @@ -520,9 +522,10 @@ class ONNXModelPatcher: # modify tokenizer new_tokens_added = 0 for ti_name, ti in ti_list: - if ti.embedding_2 is not None: - ti_embedding = ti.embedding_2 if ti.embedding_2.shape[1] == orig_embeddings.shape[0] else ti.embedding + ti_embedding = ( + ti.embedding_2 if ti.embedding_2.shape[1] == orig_embeddings.shape[0] else ti.embedding + ) else: ti_embedding = ti.embedding From 5a3f1f2b2228d6f8d4f09f95fec93e738d89f5d8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 1 Dec 2023 01:59:26 -0500 Subject: [PATCH 29/45] fix ruff github format errors --- invokeai/backend/model_management/lora.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/model_management/lora.py b/invokeai/backend/model_management/lora.py index acd1f6bab6..3d2136659f 100644 --- a/invokeai/backend/model_management/lora.py +++ b/invokeai/backend/model_management/lora.py @@ -215,7 +215,7 @@ class ModelPatcher: text_encoder.resize_token_embeddings(init_tokens_count + new_tokens_added, pad_to_multiple_of) model_embeddings = text_encoder.get_input_embeddings() - for ti_name, ti in ti_list: + for ti_name, _ in ti_list: ti_tokens = [] for i in range(ti_embedding.shape[0]): embedding = ti_embedding[i] @@ -537,7 +537,7 @@ class ONNXModelPatcher: axis=0, ) - for ti_name, ti in ti_list: + for ti_name, _ in ti_list: ti_tokens = [] for i in range(ti_embedding.shape[0]): embedding = ti_embedding[i].detach().numpy() From 04ddcf53f31d1a5545d6911a76eb378b54b3d2a9 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Fri, 1 Dec 2023 10:09:39 -0500 Subject: [PATCH 30/45] Set minimum numpy version to ensure that np.testing.assert_array_equal() supports the 'strict' argument. --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 08858059fe..961a8335e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,8 @@ dependencies = [ "invisible-watermark~=0.2.0", # needed to install SDXL base and refiner using their repo_ids "matplotlib", # needed for plotting of Penner easing functions "mediapipe", # needed for "mediapipeface" controlnet model - "numpy", + # Minimum numpy version of 1.24.0 is needed to use the 'strict' argument to np.testing.assert_array_equal(). + "numpy>=1.24.0", "npyscreen", "omegaconf", "onnx", From cff6600ded6860476b607e304d92660de37a690b Mon Sep 17 00:00:00 2001 From: Riccardo Giovanetti Date: Fri, 1 Dec 2023 18:04:03 +0100 Subject: [PATCH 31/45] translationBot(ui): update translation (Italian) Currently translated at 94.4% (1248 of 1321 strings) Co-authored-by: Riccardo Giovanetti Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/ Translation: InvokeAI/Web UI --- invokeai/frontend/web/public/locales/it.json | 56 ++++++++++++++++---- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/invokeai/frontend/web/public/locales/it.json b/invokeai/frontend/web/public/locales/it.json index 5123e8b07c..3f76e80a52 100644 --- a/invokeai/frontend/web/public/locales/it.json +++ b/invokeai/frontend/web/public/locales/it.json @@ -91,7 +91,19 @@ "controlNet": "ControlNet", "auto": "Automatico", "simple": "Semplice", - "details": "Dettagli" + "details": "Dettagli", + "format": "formato", + "unknown": "Sconosciuto", + "folder": "Cartella", + "error": "Errore", + "installed": "Installato", + "template": "Schema", + "outputs": "Uscite", + "data": "Dati", + "somethingWentWrong": "Qualcosa è andato storto", + "copyError": "$t(gallery.copy) Errore", + "input": "Ingresso", + "notInstalled": "Non $t(common.installed)" }, "gallery": { "generations": "Generazioni", @@ -122,7 +134,14 @@ "preparingDownload": "Preparazione del download", "preparingDownloadFailed": "Problema durante la preparazione del download", "downloadSelection": "Scarica gli elementi selezionati", - "noImageSelected": "Nessuna immagine selezionata" + "noImageSelected": "Nessuna immagine selezionata", + "deleteSelection": "Elimina la selezione", + "image": "immagine", + "drop": "Rilascia", + "unstarImage": "Rimuovi preferenza immagine", + "dropOrUpload": "$t(gallery.drop) o carica", + "starImage": "Immagine preferita", + "dropToUpload": "$t(gallery.drop) per aggiornare" }, "hotkeys": { "keyboardShortcuts": "Tasti rapidi", @@ -477,7 +496,8 @@ "modelType": "Tipo di modello", "customConfigFileLocation": "Posizione del file di configurazione personalizzato", "vaePrecision": "Precisione VAE", - "noModelSelected": "Nessun modello selezionato" + "noModelSelected": "Nessun modello selezionato", + "conversionNotSupported": "Conversione non supportata" }, "parameters": { "images": "Immagini", @@ -838,7 +858,8 @@ "menu": "Menu", "showGalleryPanel": "Mostra il pannello Galleria", "loadMore": "Carica altro", - "mode": "Modalità" + "mode": "Modalità", + "resetUI": "$t(accessibility.reset) l'Interfaccia Utente" }, "ui": { "hideProgressImages": "Nascondi avanzamento immagini", @@ -1040,7 +1061,15 @@ "updateAllNodes": "Aggiorna tutti i nodi", "unableToUpdateNodes_one": "Impossibile aggiornare {{count}} nodo", "unableToUpdateNodes_many": "Impossibile aggiornare {{count}} nodi", - "unableToUpdateNodes_other": "Impossibile aggiornare {{count}} nodi" + "unableToUpdateNodes_other": "Impossibile aggiornare {{count}} nodi", + "addLinearView": "Aggiungi alla vista Lineare", + "outputFieldInInput": "Campo di uscita in ingresso", + "unableToMigrateWorkflow": "Impossibile migrare il flusso di lavoro", + "unableToUpdateNode": "Impossibile aggiornare nodo", + "unknownErrorValidatingWorkflow": "Errore sconosciuto durante la convalida del flusso di lavoro", + "collectionFieldType": "{{name}} Raccolta", + "collectionOrScalarFieldType": "{{name}} Raccolta|Scalare", + "nodeVersion": "Versione Nodo" }, "boards": { "autoAddBoard": "Aggiungi automaticamente bacheca", @@ -1062,7 +1091,10 @@ "deleteBoardOnly": "Elimina solo la Bacheca", "deleteBoard": "Elimina Bacheca", "deleteBoardAndImages": "Elimina Bacheca e Immagini", - "deletedBoardsCannotbeRestored": "Le bacheche eliminate non possono essere ripristinate" + "deletedBoardsCannotbeRestored": "Le bacheche eliminate non possono essere ripristinate", + "movingImagesToBoard_one": "Spostare {{count}} immagine nella bacheca:", + "movingImagesToBoard_many": "Spostare {{count}} immagini nella bacheca:", + "movingImagesToBoard_other": "Spostare {{count}} immagini nella bacheca:" }, "controlnet": { "contentShuffleDescription": "Rimescola il contenuto di un'immagine", @@ -1136,7 +1168,8 @@ "megaControl": "Mega ControlNet", "minConfidence": "Confidenza minima", "scribble": "Scribble", - "amult": "Angolo di illuminazione" + "amult": "Angolo di illuminazione", + "coarse": "Approssimativo" }, "queue": { "queueFront": "Aggiungi all'inizio della coda", @@ -1204,7 +1237,8 @@ "embedding": { "noMatchingEmbedding": "Nessun Incorporamento corrispondente", "addEmbedding": "Aggiungi Incorporamento", - "incompatibleModel": "Modello base incompatibile:" + "incompatibleModel": "Modello base incompatibile:", + "noEmbeddingsLoaded": "Nessun incorporamento caricato" }, "models": { "noMatchingModels": "Nessun modello corrispondente", @@ -1217,7 +1251,8 @@ "noRefinerModelsInstalled": "Nessun modello SDXL Refiner installato", "noLoRAsInstalled": "Nessun LoRA installato", "esrganModel": "Modello ESRGAN", - "addLora": "Aggiungi LoRA" + "addLora": "Aggiungi LoRA", + "noLoRAsLoaded": "Nessuna LoRA caricata" }, "invocationCache": { "disable": "Disabilita", @@ -1233,7 +1268,8 @@ "enable": "Abilita", "clear": "Svuota", "maxCacheSize": "Dimensione max cache", - "cacheSize": "Dimensione cache" + "cacheSize": "Dimensione cache", + "useCache": "Usa Cache" }, "dynamicPrompts": { "seedBehaviour": { From 1fd6666682017bf4737025d58003746bcab09d48 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 30 Nov 2023 19:57:29 +1100 Subject: [PATCH 32/45] feat(backend): clear latents files on startup Adds logic to `DiskLatentsStorage.start()` to empty the latents folder on startup. Adds start and stop methods to `ForwardCacheLatentsStorage`. This is required for `DiskLatentsStorage.start()` to be called, due to how this particular service breaks the direct DI pattern, wrapping the underlying storage with a cache. --- .../latents_storage/latents_storage_disk.py | 19 +++++++++++++++++++ .../latents_storage_forward_cache.py | 14 ++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/invokeai/app/services/latents_storage/latents_storage_disk.py b/invokeai/app/services/latents_storage/latents_storage_disk.py index 6e7010bae0..58d1df5081 100644 --- a/invokeai/app/services/latents_storage/latents_storage_disk.py +++ b/invokeai/app/services/latents_storage/latents_storage_disk.py @@ -5,6 +5,8 @@ from typing import Union import torch +from invokeai.app.services.invoker import Invoker + from .latents_storage_base import LatentsStorageBase @@ -17,6 +19,23 @@ class DiskLatentsStorage(LatentsStorageBase): self.__output_folder = output_folder if isinstance(output_folder, Path) else Path(output_folder) self.__output_folder.mkdir(parents=True, exist_ok=True) + def start(self, invoker: Invoker) -> None: + self._invoker = invoker + + # Delete all latents files on startup + deleted_latents_count = 0 + freed_space = 0 + for latents_file in Path(self.__output_folder).glob("*"): + if latents_file.is_file(): + freed_space += latents_file.stat().st_size + deleted_latents_count += 1 + latents_file.unlink() + if deleted_latents_count > 0: + freed_space_in_mb = round(freed_space / 1024 / 1024, 2) + self._invoker.services.logger.info( + f"Deleted {deleted_latents_count} latents files, freeing {freed_space_in_mb}MB" + ) + def get(self, name: str) -> torch.Tensor: latent_path = self.get_path(name) return torch.load(latent_path) diff --git a/invokeai/app/services/latents_storage/latents_storage_forward_cache.py b/invokeai/app/services/latents_storage/latents_storage_forward_cache.py index da82b5904d..6232b76a27 100644 --- a/invokeai/app/services/latents_storage/latents_storage_forward_cache.py +++ b/invokeai/app/services/latents_storage/latents_storage_forward_cache.py @@ -5,6 +5,8 @@ from typing import Dict, Optional import torch +from invokeai.app.services.invoker import Invoker + from .latents_storage_base import LatentsStorageBase @@ -23,6 +25,18 @@ class ForwardCacheLatentsStorage(LatentsStorageBase): self.__cache_ids = Queue() self.__max_cache_size = max_cache_size + def start(self, invoker: Invoker) -> None: + self._invoker = invoker + start_op = getattr(self.__underlying_storage, "start", None) + if callable(start_op): + start_op(invoker) + + def stop(self, invoker: Invoker) -> None: + self._invoker = invoker + stop_op = getattr(self.__underlying_storage, "stop", None) + if callable(stop_op): + stop_op(invoker) + def get(self, name: str) -> torch.Tensor: cache_item = self.__get_cache(name) if cache_item is not None: From 0228aba06f38321456aa57f07e554629712d785c Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Thu, 30 Nov 2023 20:07:53 +1100 Subject: [PATCH 33/45] feat(backend): display freed space when cleaning DB --- .../latents_storage/latents_storage_disk.py | 2 +- invokeai/app/services/shared/sqlite.py | 21 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/invokeai/app/services/latents_storage/latents_storage_disk.py b/invokeai/app/services/latents_storage/latents_storage_disk.py index 58d1df5081..f59292bb91 100644 --- a/invokeai/app/services/latents_storage/latents_storage_disk.py +++ b/invokeai/app/services/latents_storage/latents_storage_disk.py @@ -33,7 +33,7 @@ class DiskLatentsStorage(LatentsStorageBase): if deleted_latents_count > 0: freed_space_in_mb = round(freed_space / 1024 / 1024, 2) self._invoker.services.logger.info( - f"Deleted {deleted_latents_count} latents files, freeing {freed_space_in_mb}MB" + f"Deleted {deleted_latents_count} latents files (freed {freed_space_in_mb}MB)" ) def get(self, name: str) -> torch.Tensor: diff --git a/invokeai/app/services/shared/sqlite.py b/invokeai/app/services/shared/sqlite.py index 3c75c3d6a7..511023bd8a 100644 --- a/invokeai/app/services/shared/sqlite.py +++ b/invokeai/app/services/shared/sqlite.py @@ -1,6 +1,7 @@ import sqlite3 import threading from logging import Logger +from pathlib import Path from invokeai.app.services.config import InvokeAIAppConfig @@ -8,25 +9,20 @@ sqlite_memory = ":memory:" class SqliteDatabase: - conn: sqlite3.Connection - lock: threading.RLock - _logger: Logger - _config: InvokeAIAppConfig - def __init__(self, config: InvokeAIAppConfig, logger: Logger): self._logger = logger self._config = config if self._config.use_memory_db: - location = sqlite_memory + self.db_path = sqlite_memory logger.info("Using in-memory database") else: db_path = self._config.db_path db_path.parent.mkdir(parents=True, exist_ok=True) - location = str(db_path) - self._logger.info(f"Using database at {location}") + self.db_path = str(db_path) + self._logger.info(f"Using database at {self.db_path}") - self.conn = sqlite3.connect(location, check_same_thread=False) + self.conn = sqlite3.connect(self.db_path, check_same_thread=False) self.lock = threading.RLock() self.conn.row_factory = sqlite3.Row @@ -37,10 +33,15 @@ class SqliteDatabase: def clean(self) -> None: try: + if self.db_path == sqlite_memory: + return + initial_db_size = Path(self.db_path).stat().st_size self.lock.acquire() self.conn.execute("VACUUM;") self.conn.commit() - self._logger.info("Cleaned database") + final_db_size = Path(self.db_path).stat().st_size + freed_space_in_mb = round((initial_db_size - final_db_size) / 1024 / 1024, 2) + self._logger.info(f"Cleaned database (freed {freed_space_in_mb}MB)") except Exception as e: self._logger.error(f"Error cleaning database: {e}") raise e From 3f0e0af177f73c9ed5053459619fe5744b3b4715 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:26:44 +1100 Subject: [PATCH 34/45] feat(backend): only log pruned queue items / db freed space if > 0 --- invokeai/app/services/session_queue/session_queue_sqlite.py | 3 ++- invokeai/app/services/shared/sqlite.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/invokeai/app/services/session_queue/session_queue_sqlite.py b/invokeai/app/services/session_queue/session_queue_sqlite.py index 7259a7bd0c..58d9d461ec 100644 --- a/invokeai/app/services/session_queue/session_queue_sqlite.py +++ b/invokeai/app/services/session_queue/session_queue_sqlite.py @@ -42,7 +42,8 @@ class SqliteSessionQueue(SessionQueueBase): self._set_in_progress_to_canceled() prune_result = self.prune(DEFAULT_QUEUE_ID) local_handler.register(event_name=EventServiceBase.queue_event, _func=self._on_session_event) - self.__invoker.services.logger.info(f"Pruned {prune_result.deleted} finished queue items") + if prune_result.deleted > 0: + self.__invoker.services.logger.info(f"Pruned {prune_result.deleted} finished queue items") def __init__(self, db: SqliteDatabase) -> None: super().__init__() diff --git a/invokeai/app/services/shared/sqlite.py b/invokeai/app/services/shared/sqlite.py index 511023bd8a..9cddb2b926 100644 --- a/invokeai/app/services/shared/sqlite.py +++ b/invokeai/app/services/shared/sqlite.py @@ -41,7 +41,8 @@ class SqliteDatabase: self.conn.commit() final_db_size = Path(self.db_path).stat().st_size freed_space_in_mb = round((initial_db_size - final_db_size) / 1024 / 1024, 2) - self._logger.info(f"Cleaned database (freed {freed_space_in_mb}MB)") + if freed_space_in_mb > 0: + self._logger.info(f"Cleaned database (freed {freed_space_in_mb}MB)") except Exception as e: self._logger.error(f"Error cleaning database: {e}") raise e From fb9b471150e9b38aafcad7f47b6b0c9ffaf179e6 Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Sat, 2 Dec 2023 11:45:39 +1100 Subject: [PATCH 35/45] feat(backend): move logic to clear latents to method --- .../latents_storage/latents_storage_disk.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/invokeai/app/services/latents_storage/latents_storage_disk.py b/invokeai/app/services/latents_storage/latents_storage_disk.py index f59292bb91..9192b9147f 100644 --- a/invokeai/app/services/latents_storage/latents_storage_disk.py +++ b/invokeai/app/services/latents_storage/latents_storage_disk.py @@ -21,20 +21,7 @@ class DiskLatentsStorage(LatentsStorageBase): def start(self, invoker: Invoker) -> None: self._invoker = invoker - - # Delete all latents files on startup - deleted_latents_count = 0 - freed_space = 0 - for latents_file in Path(self.__output_folder).glob("*"): - if latents_file.is_file(): - freed_space += latents_file.stat().st_size - deleted_latents_count += 1 - latents_file.unlink() - if deleted_latents_count > 0: - freed_space_in_mb = round(freed_space / 1024 / 1024, 2) - self._invoker.services.logger.info( - f"Deleted {deleted_latents_count} latents files (freed {freed_space_in_mb}MB)" - ) + self._delete_all_latents() def get(self, name: str) -> torch.Tensor: latent_path = self.get_path(name) @@ -51,3 +38,21 @@ class DiskLatentsStorage(LatentsStorageBase): def get_path(self, name: str) -> Path: return self.__output_folder / name + + def _delete_all_latents(self) -> None: + """ + Deletes all latents from disk. + Must be called after we have access to `self._invoker` (e.g. in `start()`). + """ + deleted_latents_count = 0 + freed_space = 0 + for latents_file in Path(self.__output_folder).glob("*"): + if latents_file.is_file(): + freed_space += latents_file.stat().st_size + deleted_latents_count += 1 + latents_file.unlink() + if deleted_latents_count > 0: + freed_space_in_mb = round(freed_space / 1024 / 1024, 2) + self._invoker.services.logger.info( + f"Deleted {deleted_latents_count} latents files (freed {freed_space_in_mb}MB)" + ) From 2d2ef5d72ca9db798a7518870109ac07ec0dc5be Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 2 Dec 2023 11:48:51 -0500 Subject: [PATCH 36/45] ensure that setting loglevel on one logger doesn't change others --- invokeai/backend/util/logging.py | 11 +++--- tests/backend/util/test_logging.py | 57 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 6 deletions(-) create mode 100644 tests/backend/util/test_logging.py diff --git a/invokeai/backend/util/logging.py b/invokeai/backend/util/logging.py index be8aeec9c9..67419bf834 100644 --- a/invokeai/backend/util/logging.py +++ b/invokeai/backend/util/logging.py @@ -342,8 +342,7 @@ class InvokeAILogger(object): # noqa D102 cls, name: str = "InvokeAI", config: InvokeAIAppConfig = InvokeAIAppConfig.get_config() ) -> logging.Logger: # noqa D102 if name in cls.loggers: - logger = cls.loggers[name] - logger.handlers.clear() + return cls.loggers[name] else: logger = logging.getLogger(name) logger.setLevel(config.log_level.upper()) # yes, strings work here @@ -358,7 +357,7 @@ class InvokeAILogger(object): # noqa D102 handlers = [] for handler in handler_strs: handler_name, *args = handler.split("=", 2) - args = args[0] if len(args) > 0 else None + arg = args[0] if len(args) > 0 else None # console and file get the fancy formatter. # syslog gets a simple one @@ -370,16 +369,16 @@ class InvokeAILogger(object): # noqa D102 handlers.append(ch) elif handler_name == "syslog": - ch = cls._parse_syslog_args(args) + ch = cls._parse_syslog_args(arg) handlers.append(ch) elif handler_name == "file": - ch = cls._parse_file_args(args) + ch = cls._parse_file_args(arg) ch.setFormatter(formatter()) handlers.append(ch) elif handler_name == "http": - ch = cls._parse_http_args(args) + ch = cls._parse_http_args(arg) handlers.append(ch) return handlers diff --git a/tests/backend/util/test_logging.py b/tests/backend/util/test_logging.py new file mode 100644 index 0000000000..dd3cee35bc --- /dev/null +++ b/tests/backend/util/test_logging.py @@ -0,0 +1,57 @@ +""" +Test interaction of logging with configuration system. +""" +import io +import logging +import re + +from invokeai.app.services.config import InvokeAIAppConfig +from invokeai.backend.util.logging import InvokeAILogger, LOG_FORMATTERS + + +# test formatting +# Would prefer to use the capfd/capsys fixture here, but it is broken +# when used with the logging module: https://github.com/pytest-dev/pytest/issue +def test_formatting(): + logger = InvokeAILogger.get_logger() + stream = io.StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(LOG_FORMATTERS["plain"]()) + logger.addHandler(handler) + logger.info("test1") + output = stream.getvalue() + assert re.search(r"\[InvokeAI\]::INFO --> test1$", output) + + handler.setFormatter(LOG_FORMATTERS["legacy"]()) + logger.info("test2") + output = stream.getvalue() + assert re.search(r">> test2$", output) + + +# test independence of two loggers with different names +def test_independence(): + logger1 = InvokeAILogger.get_logger() + logger2 = InvokeAILogger.get_logger("Test") + assert logger1.name == "InvokeAI" + assert logger2.name == "Test" + assert logger1.level == logging.INFO + assert logger2.level == logging.INFO + logger2.setLevel(logging.DEBUG) + assert logger1.level == logging.INFO + assert logger2.level == logging.DEBUG + + +# test that the logger is returned from two similar get_logger() calls +def test_retrieval(): + logger1 = InvokeAILogger.get_logger() + logger2 = InvokeAILogger.get_logger() + logger3 = InvokeAILogger.get_logger("Test") + assert logger1 == logger2 + assert logger1 != logger3 + + +# test that the configuration is used to set the initial logging level +def test_config(): + config = InvokeAIAppConfig(log_level="debug") + logger1 = InvokeAILogger.get_logger("DebugTest", config=config) + assert logger1.level == logging.DEBUG From bdb0d13a2d0d8dbb4c89de9bfc5f0d32bc83de59 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 2 Dec 2023 11:56:41 -0500 Subject: [PATCH 37/45] fix import order --- tests/backend/util/test_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/backend/util/test_logging.py b/tests/backend/util/test_logging.py index dd3cee35bc..0d229df7c1 100644 --- a/tests/backend/util/test_logging.py +++ b/tests/backend/util/test_logging.py @@ -6,7 +6,7 @@ import logging import re from invokeai.app.services.config import InvokeAIAppConfig -from invokeai.backend.util.logging import InvokeAILogger, LOG_FORMATTERS +from invokeai.backend.util.logging import LOG_FORMATTERS, InvokeAILogger # test formatting From d0464a57930f8a2d2e37f56ea58b75a258573a50 Mon Sep 17 00:00:00 2001 From: Alfie John Date: Sun, 3 Dec 2023 02:31:17 +1100 Subject: [PATCH 38/45] Tiny grammar fix --- docs/features/PROMPTS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/features/PROMPTS.md b/docs/features/PROMPTS.md index be11e4cce6..07b942177a 100644 --- a/docs/features/PROMPTS.md +++ b/docs/features/PROMPTS.md @@ -120,7 +120,7 @@ Generate an image with a given prompt, record the seed of the image, and then use the `prompt2prompt` syntax to substitute words in the original prompt for words in a new prompt. This works for `img2img` as well. -For example, consider the prompt `a cat.swap(dog) playing with a ball in the forest`. Normally, because of the word words interact with each other when doing a stable diffusion image generation, these two prompts would generate different compositions: +For example, consider the prompt `a cat.swap(dog) playing with a ball in the forest`. Normally, because the words interact with each other when doing a stable diffusion image generation, these two prompts would generate different compositions: - `a cat playing with a ball in the forest` - `a dog playing with a ball in the forest` From 4fc2ed71950a6af7862e96280e933d60ef14d2b6 Mon Sep 17 00:00:00 2001 From: Anthony Monthe Date: Mon, 4 Dec 2023 02:57:39 +0000 Subject: [PATCH 39/45] Added full-version endpoint (#5206) * Added get_app_deps endpoint * Use importlib.version & added deps --- invokeai/app/api/routers/app_info.py | 45 ++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/invokeai/app/api/routers/app_info.py b/invokeai/app/api/routers/app_info.py index 39d570ec99..2137aa9be7 100644 --- a/invokeai/app/api/routers/app_info.py +++ b/invokeai/app/api/routers/app_info.py @@ -1,7 +1,11 @@ import typing from enum import Enum +from importlib.metadata import PackageNotFoundError, version from pathlib import Path +from platform import python_version +from typing import Optional +import torch from fastapi import Body from fastapi.routing import APIRouter from pydantic import BaseModel, Field @@ -40,6 +44,24 @@ class AppVersion(BaseModel): version: str = Field(description="App version") +class AppDependencyVersions(BaseModel): + """App depencency Versions Response""" + + accelerate: str = Field(description="accelerate version") + compel: str = Field(description="compel version") + cuda: Optional[str] = Field(description="CUDA version") + diffusers: str = Field(description="diffusers version") + numpy: str = Field(description="Numpy version") + opencv: str = Field(description="OpenCV version") + onnx: str = Field(description="ONNX version") + pillow: str = Field(description="Pillow (PIL) version") + python: str = Field(description="Python version") + torch: str = Field(description="PyTorch version") + torchvision: str = Field(description="PyTorch Vision version") + transformers: str = Field(description="transformers version") + xformers: Optional[str] = Field(description="xformers version") + + class AppConfig(BaseModel): """App Config Response""" @@ -54,6 +76,29 @@ async def get_version() -> AppVersion: return AppVersion(version=__version__) +@app_router.get("/app_deps", operation_id="get_app_deps", status_code=200, response_model=AppDependencyVersions) +async def get_app_deps() -> AppDependencyVersions: + try: + xformers = version("xformers") + except PackageNotFoundError: + xformers = None + return AppDependencyVersions( + accelerate=version("accelerate"), + compel=version("compel"), + cuda=torch.version.cuda, + diffusers=version("diffusers"), + numpy=version("numpy"), + opencv=version("opencv-python"), + onnx=version("onnx"), + pillow=version("pillow"), + python=python_version(), + torch=torch.version.__version__, + torchvision=version("torchvision"), + transformers=version("transformers"), + xformers=xformers, + ) + + @app_router.get("/config", operation_id="get_config", status_code=200, response_model=AppConfig) async def get_config() -> AppConfig: infill_methods = ["tile", "lama", "cv2"] From 0fdcc0af658a5dbbed292f8e5658f4c93dfc9afc Mon Sep 17 00:00:00 2001 From: psychedelicious <4822129+psychedelicious@users.noreply.github.com> Date: Fri, 1 Dec 2023 07:41:01 +1100 Subject: [PATCH 40/45] feat(nodes): add index and total to iterate output --- invokeai/app/services/shared/graph.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/invokeai/app/services/shared/graph.py b/invokeai/app/services/shared/graph.py index c825a84011..854defc945 100644 --- a/invokeai/app/services/shared/graph.py +++ b/invokeai/app/services/shared/graph.py @@ -207,10 +207,12 @@ class IterateInvocationOutput(BaseInvocationOutput): item: Any = OutputField( description="The item being iterated over", title="Collection Item", ui_type=UIType._CollectionItem ) + index: int = OutputField(description="The index of the item", title="Index") + total: int = OutputField(description="The total number of items", title="Total") # TODO: Fill this out and move to invocations -@invocation("iterate", version="1.0.0") +@invocation("iterate", version="1.1.0") class IterateInvocation(BaseInvocation): """Iterates over a list of items""" @@ -221,7 +223,7 @@ class IterateInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> IterateInvocationOutput: """Produces the outputs as values""" - return IterateInvocationOutput(item=self.collection[self.index]) + return IterateInvocationOutput(item=self.collection[self.index], index=self.index, total=len(self.collection)) @invocation_output("collect_output") From e45704833e39fa1c872ef9e7dfc872c74e2845c0 Mon Sep 17 00:00:00 2001 From: Mary Hipp Date: Mon, 4 Dec 2023 13:55:13 -0500 Subject: [PATCH 41/45] if response for bulk download, dont close toast --- .../gallery/components/Boards/BoardContextMenu.tsx | 8 +++++++- .../ImageContextMenu/MultipleSelectionMenuItems.tsx | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardContextMenu.tsx b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardContextMenu.tsx index 092d4682f7..19c5f1a4e3 100644 --- a/invokeai/frontend/web/src/features/gallery/components/Boards/BoardContextMenu.tsx +++ b/invokeai/frontend/web/src/features/gallery/components/Boards/BoardContextMenu.tsx @@ -73,7 +73,13 @@ const BoardContextMenu = ({ addToast({ title: t('gallery.preparingDownload'), status: 'success', - ...(response.response ? { description: response.response } : {}), + ...(response.response + ? { + description: response.response, + duration: null, + isClosable: true, + } + : {}), }) ); } catch { diff --git a/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/MultipleSelectionMenuItems.tsx b/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/MultipleSelectionMenuItems.tsx index bb6751dcc3..273fa1ea54 100644 --- a/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/MultipleSelectionMenuItems.tsx +++ b/invokeai/frontend/web/src/features/gallery/components/ImageContextMenu/MultipleSelectionMenuItems.tsx @@ -59,7 +59,13 @@ const MultipleSelectionMenuItems = () => { addToast({ title: t('gallery.preparingDownload'), status: 'success', - ...(response.response ? { description: response.response } : {}), + ...(response.response + ? { + description: response.response, + duration: null, + isClosable: true, + } + : {}), }) ); } catch { From 0463541d9922d887dd19cd672628902fd3f4b44e Mon Sep 17 00:00:00 2001 From: Mary Hipp Rogers Date: Mon, 4 Dec 2023 16:01:49 -0500 Subject: [PATCH 42/45] dont set socketURL until socket is initialized (#5229) * dont set socketURL until socket is initialized * cleanup * feat(ui): simplify `socketUrl` memo no need to mutate the string; just return early if using baseUrl --------- Co-authored-by: Mary Hipp Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com> --- .../frontend/web/src/app/hooks/useSocketIO.ts | 89 +++++++------------ 1 file changed, 34 insertions(+), 55 deletions(-) diff --git a/invokeai/frontend/web/src/app/hooks/useSocketIO.ts b/invokeai/frontend/web/src/app/hooks/useSocketIO.ts index 91048fa63c..b2f08b2815 100644 --- a/invokeai/frontend/web/src/app/hooks/useSocketIO.ts +++ b/invokeai/frontend/web/src/app/hooks/useSocketIO.ts @@ -3,8 +3,8 @@ import { $authToken } from 'app/store/nanostores/authToken'; import { $baseUrl } from 'app/store/nanostores/baseUrl'; import { $isDebugging } from 'app/store/nanostores/isDebugging'; import { useAppDispatch } from 'app/store/storeHooks'; -import { MapStore, WritableAtom, atom, map } from 'nanostores'; -import { useEffect } from 'react'; +import { MapStore, atom, map } from 'nanostores'; +import { useEffect, useMemo } from 'react'; import { ClientToServerEvents, ServerToClientEvents, @@ -16,57 +16,10 @@ import { ManagerOptions, Socket, SocketOptions, io } from 'socket.io-client'; declare global { interface Window { $socketOptions?: MapStore>; - $socketUrl?: WritableAtom; } } -const makeSocketOptions = (): Partial => { - const socketOptions: Parameters[0] = { - timeout: 60000, - path: '/ws/socket.io', - autoConnect: false, // achtung! removing this breaks the dynamic middleware - forceNew: true, - }; - - // if building in package mode, replace socket url with open api base url minus the http protocol - if (['nodes', 'package'].includes(import.meta.env.MODE)) { - const authToken = $authToken.get(); - if (authToken) { - // TODO: handle providing jwt to socket.io - socketOptions.auth = { token: authToken }; - } - - socketOptions.transports = ['websocket', 'polling']; - } - - return socketOptions; -}; - -const makeSocketUrl = (): string => { - const wsProtocol = window.location.protocol === 'https:' ? 'wss' : 'ws'; - let socketUrl = `${wsProtocol}://${window.location.host}`; - if (['nodes', 'package'].includes(import.meta.env.MODE)) { - const baseUrl = $baseUrl.get(); - if (baseUrl) { - //eslint-disable-next-line - socketUrl = baseUrl.replace(/^https?\:\/\//i, ''); - } - } - return socketUrl; -}; - -const makeSocket = (): Socket => { - const socketOptions = makeSocketOptions(); - const socketUrl = $socketUrl.get(); - const socket: Socket = io( - socketUrl, - { ...socketOptions, ...$socketOptions.get() } - ); - return socket; -}; - export const $socketOptions = map>({}); -export const $socketUrl = atom(makeSocketUrl()); export const $isSocketInitialized = atom(false); /** @@ -74,23 +27,50 @@ export const $isSocketInitialized = atom(false); */ export const useSocketIO = () => { const dispatch = useAppDispatch(); - const socketOptions = useStore($socketOptions); - const socketUrl = useStore($socketUrl); const baseUrl = useStore($baseUrl); const authToken = useStore($authToken); + const addlSocketOptions = useStore($socketOptions); + + const socketUrl = useMemo(() => { + const wsProtocol = window.location.protocol === 'https:' ? 'wss' : 'ws'; + if (baseUrl) { + return baseUrl.replace(/^https?:\/\//i, ''); + } + + return `${wsProtocol}://${window.location.host}`; + }, [baseUrl]); + + const socketOptions = useMemo(() => { + const options: Parameters[0] = { + timeout: 60000, + path: '/ws/socket.io', + autoConnect: false, // achtung! removing this breaks the dynamic middleware + forceNew: true, + }; + + if (authToken) { + options.auth = { token: authToken }; + options.transports = ['websocket', 'polling']; + } + + return { ...options, ...addlSocketOptions }; + }, [authToken, addlSocketOptions]); useEffect(() => { if ($isSocketInitialized.get()) { // Singleton! return; } - const socket = makeSocket(); + + const socket: Socket = io( + socketUrl, + socketOptions + ); setEventListeners({ dispatch, socket }); socket.connect(); if ($isDebugging.get()) { window.$socketOptions = $socketOptions; - window.$socketUrl = $socketUrl; console.log('Socket initialized', socket); } @@ -99,11 +79,10 @@ export const useSocketIO = () => { return () => { if ($isDebugging.get()) { window.$socketOptions = undefined; - window.$socketUrl = undefined; console.log('Socket teardown', socket); } socket.disconnect(); $isSocketInitialized.set(false); }; - }, [dispatch, socketOptions, socketUrl, baseUrl, authToken]); + }, [dispatch, socketOptions, socketUrl]); }; From 3c7d1fcd323a3283a64784f584e73932f4cb8693 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 4 Dec 2023 22:41:59 -0500 Subject: [PATCH 43/45] clean up get_logger() call --- invokeai/app/api/routers/model_records.py | 2 +- invokeai/backend/util/logging.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/invokeai/app/api/routers/model_records.py b/invokeai/app/api/routers/model_records.py index cffca25d9f..87d8211d12 100644 --- a/invokeai/app/api/routers/model_records.py +++ b/invokeai/app/api/routers/model_records.py @@ -141,7 +141,7 @@ async def del_model_record( status_code=201, ) async def add_model_record( - config: Annotated[AnyModelConfig, Body(description="Model config", discriminator="type")] + config: Annotated[AnyModelConfig, Body(description="Model config", discriminator="type")], ) -> AnyModelConfig: """ Add a model using the configuration information appropriate for its type. diff --git a/invokeai/backend/util/logging.py b/invokeai/backend/util/logging.py index 67419bf834..c607e91835 100644 --- a/invokeai/backend/util/logging.py +++ b/invokeai/backend/util/logging.py @@ -343,12 +343,12 @@ class InvokeAILogger(object): # noqa D102 ) -> logging.Logger: # noqa D102 if name in cls.loggers: return cls.loggers[name] - else: - logger = logging.getLogger(name) + + logger = logging.getLogger(name) logger.setLevel(config.log_level.upper()) # yes, strings work here for ch in cls.get_loggers(config): logger.addHandler(ch) - cls.loggers[name] = logger + cls.loggers[name] = logger return cls.loggers[name] @classmethod From 5f122186bd5f45671ff0e30618ae7dea76eeed4e Mon Sep 17 00:00:00 2001 From: Surisen Date: Tue, 5 Dec 2023 04:06:36 +0000 Subject: [PATCH 44/45] translationBot(ui): update translation (Chinese (Simplified)) Currently translated at 99.8% (1317 of 1319 strings) Co-authored-by: Surisen Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/zh_Hans/ Translation: InvokeAI/Web UI --- .../frontend/web/public/locales/zh_CN.json | 103 ++++++++++++++---- 1 file changed, 83 insertions(+), 20 deletions(-) diff --git a/invokeai/frontend/web/public/locales/zh_CN.json b/invokeai/frontend/web/public/locales/zh_CN.json index 24105f2b40..40d4630861 100644 --- a/invokeai/frontend/web/public/locales/zh_CN.json +++ b/invokeai/frontend/web/public/locales/zh_CN.json @@ -99,7 +99,17 @@ "data": "数据", "safetensors": "Safetensors", "outpaint": "外扩绘制", - "details": "详情" + "details": "详情", + "format": "格式", + "unknown": "未知", + "folder": "文件夹", + "error": "错误", + "installed": "已安装", + "file": "文件", + "somethingWentWrong": "出了点问题", + "copyError": "$t(gallery.copy) 错误", + "input": "输入", + "notInstalled": "非 $t(common.installed)" }, "gallery": { "generations": "生成的图像", @@ -130,7 +140,12 @@ "preparingDownload": "准备下载", "preparingDownloadFailed": "准备下载时出现问题", "downloadSelection": "下载所选内容", - "noImageSelected": "无选中的图像" + "noImageSelected": "无选中的图像", + "deleteSelection": "删除所选内容", + "image": "图像", + "drop": "弃用", + "dropOrUpload": "$t(gallery.drop) 或上传", + "dropToUpload": "$t(gallery.drop) 以上传" }, "hotkeys": { "keyboardShortcuts": "键盘快捷键", @@ -486,7 +501,8 @@ "alpha": "Alpha", "vaePrecision": "VAE 精度", "checkpointOrSafetensors": "$t(common.checkpoint) / $t(common.safetensors)", - "noModelSelected": "无选中的模型" + "noModelSelected": "无选中的模型", + "conversionNotSupported": "转换尚未支持" }, "parameters": { "images": "图像", @@ -615,7 +631,10 @@ "seamlessX": "无缝 X", "seamlessY": "无缝 Y", "maskEdge": "遮罩边缘", - "unmasked": "取消遮罩" + "unmasked": "取消遮罩", + "cfgRescaleMultiplier": "CFG 重缩放倍数", + "cfgRescale": "CFG 重缩放", + "useSize": "使用尺寸" }, "settings": { "models": "模型", @@ -655,7 +674,8 @@ "clearIntermediatesDisabled": "队列为空才能清理中间产物", "enableNSFWChecker": "启用成人内容检测器", "enableInvisibleWatermark": "启用不可见水印", - "enableInformationalPopovers": "启用信息弹窗" + "enableInformationalPopovers": "启用信息弹窗", + "reloadingIn": "重新加载中" }, "toast": { "tempFoldersEmptied": "临时文件夹已清空", @@ -739,7 +759,8 @@ "imageUploadFailed": "图像上传失败", "problemImportingMask": "导入遮罩时出现问题", "baseModelChangedCleared_other": "基础模型已更改, 已清除或禁用 {{count}} 个不兼容的子模型", - "setAsCanvasInitialImage": "设为画布初始图像" + "setAsCanvasInitialImage": "设为画布初始图像", + "invalidUpload": "无效的上传" }, "unifiedCanvas": { "layer": "图层", @@ -748,7 +769,7 @@ "maskingOptions": "遮罩选项", "enableMask": "启用遮罩", "preserveMaskedArea": "保留遮罩区域", - "clearMask": "清除遮罩", + "clearMask": "清除遮罩 (Shift+C)", "brush": "刷子", "eraser": "橡皮擦", "fillBoundingBox": "填充选择区域", @@ -801,7 +822,8 @@ "betaPreserveMasked": "保留遮罩层", "antialiasing": "抗锯齿", "showResultsOn": "显示结果 (开)", - "showResultsOff": "显示结果 (关)" + "showResultsOff": "显示结果 (关)", + "saveMask": "保存 $t(unifiedCanvas.mask)" }, "accessibility": { "modelSelect": "模型选择", @@ -826,7 +848,9 @@ "menu": "菜单", "showGalleryPanel": "显示图库浮窗", "loadMore": "加载更多", - "mode": "模式" + "mode": "模式", + "resetUI": "$t(accessibility.reset) UI", + "createIssue": "创建问题" }, "ui": { "showProgressImages": "显示处理中的图片", @@ -877,7 +901,7 @@ "animatedEdges": "边缘动效", "nodeTemplate": "节点模板", "pickOne": "选择一个", - "unableToLoadWorkflow": "无法验证工作流", + "unableToLoadWorkflow": "无法加载工作流", "snapToGrid": "对齐网格", "noFieldsLinearview": "线性视图中未添加任何字段", "nodeSearch": "检索节点", @@ -929,7 +953,7 @@ "skippingUnknownOutputType": "跳过未知类型的输出", "latentsFieldDescription": "Latents 可以在节点间传递。", "denoiseMaskFieldDescription": "去噪遮罩可以在节点间传递", - "missingTemplate": "缺失模板", + "missingTemplate": "无效的节点:类型为 {{type}} 的节点 {{node}} 缺失模板(无已安装模板?)", "outputSchemaNotFound": "未找到输出模式", "latentsPolymorphicDescription": "Latents 可以在节点间传递。", "colorFieldDescription": "一种 RGBA 颜色。", @@ -957,7 +981,7 @@ "collectionItem": "项目合集", "controlCollectionDescription": "节点间传递的控制信息。", "skippedReservedInput": "跳过保留的输入", - "outputFields": "输出", + "outputFields": "输出区域", "edge": "边缘", "inputNode": "输入节点", "enumDescription": "枚举 (Enums) 可能是多个选项的一个数值。", @@ -992,7 +1016,7 @@ "string": "字符串", "inputFields": "输入", "uNetFieldDescription": "UNet 子模型。", - "mismatchedVersion": "不匹配的版本", + "mismatchedVersion": "无效的节点:类型为 {{type}} 的节点 {{node}} 版本不匹配(是否尝试更新?)", "vaeFieldDescription": "Vae 子模型。", "imageFieldDescription": "图像可以在节点间传递。", "outputNode": "输出节点", @@ -1050,8 +1074,36 @@ "latentsPolymorphic": "Latents 多态", "conditioningField": "条件", "latentsField": "Latents", - "updateAllNodes": "更新所有节点", - "unableToUpdateNodes_other": "{{count}} 个节点无法完成更新" + "updateAllNodes": "更新节点", + "unableToUpdateNodes_other": "{{count}} 个节点无法完成更新", + "inputFieldTypeParseError": "无法解析 {{node}} 的输入类型 {{field}}。({{message}})", + "unsupportedArrayItemType": "不支持的数组类型 \"{{type}}\"", + "addLinearView": "添加到线性视图", + "targetNodeFieldDoesNotExist": "无效的边缘:{{node}} 的目标/输入区域 {{field}} 不存在", + "unsupportedMismatchedUnion": "合集或标量类型与基类 {{firstType}} 和 {{secondType}} 不匹配", + "allNodesUpdated": "已更新所有节点", + "sourceNodeDoesNotExist": "无效的边缘:{{node}} 的源/输出节点不存在", + "unableToExtractEnumOptions": "无法提取枚举选项", + "unableToParseFieldType": "无法解析类型", + "outputFieldInInput": "输入中的输出区域", + "unrecognizedWorkflowVersion": "无法识别的工作流架构版本:{{version}}", + "outputFieldTypeParseError": "无法解析 {{node}} 的输出类型 {{field}}。({{message}})", + "sourceNodeFieldDoesNotExist": "无效的边缘:{{node}} 的源/输出区域 {{field}} 不存在", + "unableToGetWorkflowVersion": "无法获取工作流架构版本", + "nodePack": "节点包", + "unableToExtractSchemaNameFromRef": "无法从参考中提取架构名", + "unableToMigrateWorkflow": "无法迁移工作流", + "unknownOutput": "未知输出:{{name}}", + "unableToUpdateNode": "无法更新节点", + "unknownErrorValidatingWorkflow": "验证工作流时出现未知错误", + "collectionFieldType": "{{name}} 合集", + "unknownNodeType": "未知节点类型", + "targetNodeDoesNotExist": "无效的边缘:{{node}} 的目标/输入节点不存在", + "unknownFieldType": "$t(nodes.unknownField) 类型:{{type}}", + "collectionOrScalarFieldType": "{{name}} 合集 | 标量", + "nodeVersion": "节点版本", + "deletedInvalidEdge": "已删除无效的边缘 {{source}} -> {{target}}", + "unknownInput": "未知输入:{{name}}" }, "controlnet": { "resize": "直接缩放", @@ -1245,7 +1297,8 @@ "fit": "图生图匹配", "recallParameters": "召回参数", "noRecallParameters": "未找到要召回的参数", - "vae": "VAE" + "vae": "VAE", + "cfgRescaleMultiplier": "$t(parameters.cfgRescaleMultiplier)" }, "models": { "noMatchingModels": "无相匹配的模型", @@ -1258,7 +1311,8 @@ "noRefinerModelsInstalled": "无已安装的 SDXL Refiner 模型", "noLoRAsInstalled": "无已安装的 LoRA", "esrganModel": "ESRGAN 模型", - "addLora": "添加 LoRA" + "addLora": "添加 LoRA", + "noLoRAsLoaded": "无已加载的 LoRA" }, "boards": { "autoAddBoard": "自动添加面板", @@ -1280,12 +1334,14 @@ "deleteBoardOnly": "仅删除面板", "deleteBoard": "删除面板", "deleteBoardAndImages": "删除面板和图像", - "deletedBoardsCannotbeRestored": "已删除的面板无法被恢复" + "deletedBoardsCannotbeRestored": "已删除的面板无法被恢复", + "movingImagesToBoard_other": "移动 {{count}} 张图像到面板:" }, "embedding": { "noMatchingEmbedding": "不匹配的 Embedding", "addEmbedding": "添加 Embedding", - "incompatibleModel": "不兼容的基础模型:" + "incompatibleModel": "不兼容的基础模型:", + "noEmbeddingsLoaded": "无已加载的 Embedding" }, "dynamicPrompts": { "seedBehaviour": { @@ -1514,6 +1570,12 @@ "ControlNet 为生成过程提供引导,为生成具有受控构图、结构、样式的图像提供帮助,具体的功能由所选的模型决定。" ], "heading": "ControlNet" + }, + "paramCFGRescaleMultiplier": { + "heading": "CFG 重缩放倍数", + "paragraphs": [ + "CFG 引导的重缩放倍率,用于通过 zero-terminal SNR (ztsnr) 训练的模型。推荐设为 0.7。" + ] } }, "invocationCache": { @@ -1530,7 +1592,8 @@ "enable": "启用", "clear": "清除", "maxCacheSize": "最大缓存大小", - "cacheSize": "缓存大小" + "cacheSize": "缓存大小", + "useCache": "使用缓存" }, "hrf": { "enableHrf": "启用高分辨率修复", From e990235d32a386c40f479e232899abe9d03e0a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EC=8A=B9=EC=84=9D?= Date: Tue, 5 Dec 2023 04:06:38 +0000 Subject: [PATCH 45/45] translationBot(ui): update translation (Korean) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently translated at 5.2% (70 of 1321 strings) Co-authored-by: 이승석 Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ko/ Translation: InvokeAI/Web UI --- invokeai/frontend/web/public/locales/ko.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/invokeai/frontend/web/public/locales/ko.json b/invokeai/frontend/web/public/locales/ko.json index 8baab54ac9..9bee147c3e 100644 --- a/invokeai/frontend/web/public/locales/ko.json +++ b/invokeai/frontend/web/public/locales/ko.json @@ -72,5 +72,13 @@ }, "unifiedCanvas": { "betaPreserveMasked": "마스크 레이어 유지" + }, + "accessibility": { + "previousImage": "이전 이미지", + "modifyConfig": "Config 수정", + "nextImage": "다음 이미지", + "mode": "모드", + "menu": "메뉴", + "modelSelect": "모델 선택" } }