Compare commits

..

628 Commits

Author SHA1 Message Date
59327e827b Create new data structures for captioned images, and a list of captioned images. Create auto_caption_image node which can take a single image or list of images to caption 2024-05-17 14:31:33 -04:00
a18d7adad4 fix(ui): allow image dims multiple of 32 with SDXL and T2I adapter
See https://github.com/invoke-ai/InvokeAI/pull/6342#issuecomment-2109912452 for discussion.
2024-05-17 23:38:54 +10:00
32dff2c4e3 feat(ui): copy/paste input edges when copying node
- Copy edges to selected nodes on copy
- If pasted with `ctrl/meta-shift-v`, also paste the input edges
2024-05-17 23:12:29 +10:00
575ecb4028 feat(ui): prevent connections to direct-only inputs 2024-05-17 22:08:40 +10:00
ad8778df6c feat(ui): extract node execution state from nodesSlice
This state is ephemeral and not undoable.
2024-05-17 13:24:23 +10:00
d2f5103f9f fix(ui): ignore actions from other slices in nodesSlice history 2024-05-17 13:24:23 +10:00
dd42a56084 tests(ui): fix parseSchema test fixture
The schema fixture wasn't formatted quite right - doesn't affect the test but still.
2024-05-17 13:24:23 +10:00
23ac340a3f tests(ui): add test for parseSchema 2024-05-17 13:24:23 +10:00
6791b4eaa8 chore(ui): lint 2024-05-17 13:24:23 +10:00
a8b042177d feat(ui): connection validation for collection items types 2024-05-17 13:24:23 +10:00
76825f4261 fix(ui): allow collect node inputs to connect to multiple fields when using lazy connect 2024-05-17 13:24:23 +10:00
78cb4d75ad fix(ui): use elevateEdgesOnSelect so last-selected edge is the interactable one when updating edges 2024-05-17 13:24:23 +10:00
a18bbac262 fix(ui): jank interaction between edge update and autoconnect 2024-05-17 13:24:23 +10:00
9ff5596963 feat(ui): hide values for connected fields 2024-05-17 13:24:23 +10:00
8ea596b1e9 fix(ui): janky editable field title
- Do not allow whitespace-only field titles
- Make only preview text trigger editable
- Tooltip over the preview, not the whole "row"
2024-05-17 13:24:23 +10:00
e3a143eaed fix(ui): fix jank w/ stale connections 2024-05-17 13:24:23 +10:00
c359ab6d9b fix(ui): fix dependency tracking for copy/paste hotkeys 2024-05-17 13:24:23 +10:00
dbfaa07e03 feat(ui): add checks for undo/redo actions 2024-05-17 13:24:23 +10:00
7f78fe7a36 feat(ui): move viewport state to nanostores 2024-05-17 13:24:23 +10:00
6cf5b402c6 feat(ui): remove extraneous selectedEdges and selectedNodes state 2024-05-17 13:24:23 +10:00
b0c7c7cb47 feat(ui): remove remaining extraneous state from nodes slice 2024-05-17 13:24:23 +10:00
4d68cd8dbb feat(ui): recreate edge auto-add-node logic 2024-05-17 13:24:23 +10:00
2c1fa30639 feat(ui): recreate edge autoconnect logic 2024-05-17 13:24:23 +10:00
708c68413d tidy(ui): add type for templates 2024-05-17 13:24:23 +10:00
1d884fb794 feat(ui): move invocation templates out of redux
Templates are stored in nanostores. All hooks, selectors, etc are reworked to reference the nanostore.
2024-05-17 13:24:23 +10:00
f6a44681a8 feat(ui): move invocation templates out of redux (wip) 2024-05-17 13:24:23 +10:00
d4df312300 feat(ui): move nodes copy/paste out of slice 2024-05-17 13:24:23 +10:00
9c0d44b412 feat(ui): split workflow editor settings to separate slice
We need the undoable slice to be only undoable state - settings are not undoable.
2024-05-17 13:24:23 +10:00
27826369f0 feat(ui): make nodesSlice undoable 2024-05-17 13:24:23 +10:00
31d8b50276 [Refactor] Update min and max values for LoRACard weight input 2024-05-17 10:38:26 +10:00
40b4fa7238 feat(ui): SDXL clip skip
Uses the same CLIP Skip value for both CLIP1 and CLIP2.

Adjusted SDXL CLIP Skip min/max/markers to be within the valid range (0 to 11).

Closes #4583
2024-05-16 07:49:30 -04:00
3b1743b7c2 docs: fix install reqs link 2024-05-16 10:37:42 +10:00
f489c818f1 docs(ui): add comments to nsfw & watermarker helpers 2024-05-15 14:09:44 +10:00
af477fa295 tidy(ui): remove unused modelLoader from refiner helper 2024-05-15 14:09:44 +10:00
0ff0290735 tidy(ui): use Invocation<> helper type in canvas graph builders, elsewhere 2024-05-15 14:09:44 +10:00
67dbe6d949 tidy(ui): use Invocation<> helper type in OG control adapters 2024-05-15 14:09:44 +10:00
4c3c2297b9 tidy(ui): organise graph builder files 2024-05-15 14:09:44 +10:00
cadea55521 tidy(ui): organise graph builder files 2024-05-15 14:09:44 +10:00
c8f30b1392 tidy(ui): move testing-only types to test file 2024-05-15 14:09:44 +10:00
3d14a98abf tidy(ui): use Invocation<> type in control layers types 2024-05-15 14:09:44 +10:00
77024bfca7 fix(ui): fix sdxl generation mode metadata 2024-05-15 14:09:44 +10:00
4a1c3786a1 tidy(ui): organise CL graph builder 2024-05-15 14:09:44 +10:00
b239891986 tidy(ui): clean up base model handling in graph builder 2024-05-15 14:09:44 +10:00
9fb03d43ff tests(ui): get coverage to 100% for graph builder 2024-05-15 14:09:44 +10:00
bdc59786bd tidy(ui): clean up graph builder helper functions 2024-05-15 14:09:44 +10:00
fb6e926500 tidy(ui): remove extraneous graph validate calls 2024-05-15 14:09:44 +10:00
48ccd63dba feat(ui): use integrated metadata helper 2024-05-15 14:09:44 +10:00
ee647a05dc feat(ui): move metadata util to graph class
No good reason to have it be separate. A bit cleaner this way.
2024-05-15 14:09:44 +10:00
154b52ca4d docs(ui): update docstrings for Graph builder 2024-05-15 14:09:44 +10:00
5dd460c3ce chore(ui): knip 2024-05-15 14:09:44 +10:00
4897ce2a13 tidy(ui): remove unused files 2024-05-15 14:09:44 +10:00
5425526d50 feat(ui): use graph builder for generation tab sdxl 2024-05-15 14:09:44 +10:00
5a4b050e66 feat(ui): use asserts in graph builder 2024-05-15 14:09:44 +10:00
8d39520232 feat(ui): port NSFW and watermark nodes to graph builder 2024-05-15 14:09:44 +10:00
04d12a1e98 feat(ui): add HRF graph builder helper 2024-05-15 14:09:44 +10:00
39aa70963b docs(ui): update docstrings for addGenerationTabSeamless 2024-05-15 14:09:44 +10:00
5743254a41 fix(ui): use arrays for edge methods 2024-05-15 14:09:44 +10:00
c538ffea26 tidy(ui): remove console.log 2024-05-15 14:09:44 +10:00
e8d3a7c870 feat(ui): support multiple fields for getEdgesTo, getEdgesFrom, deleteEdgesTo, deleteEdgesFrom 2024-05-15 14:09:44 +10:00
2be66b1546 feat(ui): add deleteNode and getEdges to graph util 2024-05-15 14:09:44 +10:00
76e181fd44 build(ui): add eslint no-console rule 2024-05-15 14:09:44 +10:00
b5d42fbc66 tidy(ui): remove unused graph helper 2024-05-15 14:09:44 +10:00
b463cd763e tidy(ui): remove extraneous is_intermediate node fields 2024-05-15 14:09:44 +10:00
eb320df41d feat(ui): use new lora loaders, simplify VAE loader, seamless 2024-05-15 14:09:44 +10:00
de1869773f chore(ui): typegen 2024-05-15 14:09:44 +10:00
ef89c7e537 feat(nodes): add LoRASelectorInvocation, LoRACollectionLoader, SDXLLoRACollectionLoader
These simplify loading multiple LoRAs. Instead of requiring chained lora loader nodes, configure each LoRA (model & weight) with a selector, collect them, then send the collection to the collection loader to apply all of the LoRAs to the UNet/CLIP models.

The collection loaders accept a single lora or collection of loras.
2024-05-15 14:09:44 +10:00
008645d386 fix(ui): work through merge conflicts (wip) 2024-05-15 14:09:44 +10:00
f8042ffb41 WIP, sd1.5 works 2024-05-15 14:09:44 +10:00
dbe22be598 feat(ui): use graph utils in builders (wip) 2024-05-15 14:09:44 +10:00
8f6078d007 feat(ui): refine graph building util
Simpler types and API surface.
2024-05-15 14:09:44 +10:00
4020bf47e2 feat(ui): add MetadataUtil class
Provides methods for manipulating a graph's metadata.
2024-05-15 14:09:44 +10:00
9d685da759 feat(ui): add stateful Graph class
This stateful class provides abstractions for building a graph. It exposes graph methods like adding and removing nodes and edges.

The methods are documented, tested, and strongly typed.
2024-05-15 14:09:44 +10:00
e3289856c0 feat(ui): add and use type helpers for invocations and invocation outputs 2024-05-15 14:09:44 +10:00
47b8153728 build(ui): enable TS strictPropertyInitialization
https://www.typescriptlang.org/tsconfig/#strictPropertyInitialization
2024-05-15 14:09:44 +10:00
7901e4c082 chore(ui): typegen 2024-05-15 14:09:44 +10:00
18b0977a31 feat(api): add InvocationOutputMap to OpenAPI schema
This dynamically generated schema object maps node types to their pydantic schemas. This makes it much simpler to infer node types in the UI.
2024-05-15 14:09:44 +10:00
fc6b214470 tests(ui): set up vitest coverage 2024-05-15 14:09:44 +10:00
e22211dac0 fix: Fix Outpaint not applying the expanded mask correctly
In unscaled situations
2024-05-15 13:59:01 +10:00
e222484663 chore: v4.2.1 (#6362)
## Summary

Bump to v4.2.1

## Related Issues / Discussions

n/a

## QA Instructions

n/a

## Merge Plan

Do the release after merging.

## Checklist

- [x] _The PR has a short but descriptive title, suitable for a
changelog_
- [ ] _Tests added / updated (if applicable)_
- [ ] _Documentation added / updated (if applicable)_
2024-05-14 03:17:03 +05:30
2a9cea6689 Update invokeai_version.py
Bump to v4.2.1
2024-05-14 07:37:02 +10:00
93da75209c feat(nodes): use new blur_if_nsfw method 2024-05-14 07:23:38 +10:00
9c819f0fd8 fix(nodes): fix nsfw checker model download 2024-05-14 07:23:38 +10:00
eef6fcf286 translationBot(ui): update translation (Russian)
Currently translated at 100.0% (1210 of 1210 strings)

Co-authored-by: Васянатор <ilabulanov339@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ru/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
e375d9f787 translationBot(ui): update translation (Italian)
Currently translated at 98.5% (1192 of 1210 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.5% (1192 of 1210 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.5% (1192 of 1210 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.5% (1192 of 1210 strings)

Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
ab18174774 translationBot(ui): update translation (Spanish)
Currently translated at 31.3% (379 of 1208 strings)

Co-authored-by: gallegonovato <fran-carro@hotmail.es>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/es/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
9265841384 translationBot(ui): update translation files
Updated by "Cleanup translation files" hook in Weblate.

translationBot(ui): update translation files

Updated by "Cleanup translation files" hook in Weblate.

Co-authored-by: Hosted Weblate <hosted@weblate.org>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
c5fd08125d translationBot(ui): update translation (Italian)
Currently translated at 98.5% (1192 of 1210 strings)

Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
11d88dae7f translationBot(ui): update translation (Russian)
Currently translated at 100.0% (1210 of 1210 strings)

Co-authored-by: Васянатор <ilabulanov339@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ru/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
3b495659b0 translationBot(ui): update translation files
Updated by "Cleanup translation files" hook in Weblate.

Co-authored-by: Hosted Weblate <hosted@weblate.org>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
15c9a3a4b6 translationBot(ui): update translation (Italian)
Currently translated at 98.3% (1189 of 1209 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.3% (1189 of 1209 strings)

Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
60e77e4ed6 translationBot(ui): update translation (Chinese (Simplified))
Currently translated at 77.8% (922 of 1185 strings)

Co-authored-by: flower_elf <miaoju2005@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/zh_Hans/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
fa832a8ac6 translationBot(ui): update translation (Russian)
Currently translated at 100.0% (1209 of 1209 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1209 of 1209 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1188 of 1188 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1185 of 1185 strings)

Co-authored-by: Васянатор <ilabulanov339@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ru/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
f7834d7d59 translationBot(ui): update translation files
Updated by "Cleanup translation files" hook in Weblate.

translationBot(ui): update translation files

Updated by "Cleanup translation files" hook in Weblate.

translationBot(ui): update translation files

Updated by "Cleanup translation files" hook in Weblate.

Co-authored-by: Hosted Weblate <hosted@weblate.org>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
63d7461510 translationBot(ui): update translation (German)
Currently translated at 71.9% (839 of 1166 strings)

Co-authored-by: Alexander Eichhorn <pfannkuchensack@einfach-doof.de>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/de/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
1de704160e translationBot(ui): update translation (Russian)
Currently translated at 97.3% (1154 of 1185 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1174 of 1174 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1173 of 1173 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1166 of 1166 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1165 of 1165 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1149 of 1149 strings)

translationBot(ui): update translation (Russian)

Currently translated at 100.0% (1147 of 1147 strings)

Co-authored-by: Васянатор <ilabulanov339@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ru/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
b118a2565c translationBot(ui): update translation (Italian)
Currently translated at 96.0% (1138 of 1185 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.4% (1156 of 1174 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.3% (1155 of 1174 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.4% (1129 of 1147 strings)

Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translation: InvokeAI/Web UI
2024-05-14 07:15:12 +10:00
eb166baafe fix(ui): invoke button shows loading while queueing
Make the Invoke button show a loading spinner while queueing.

The queue mutations need to be awaited else the `isLoading` state doesn't work as expected. I feel like I should understand why, but I don't...
2024-05-13 11:53:29 +10:00
818d37f304 fix(api): retain cover image when converting model to diffusers
We need to retrieve and re-save the image, because a conversion to diffusers creates a new model record, with a new key.

See: https://old.reddit.com/r/StableDiffusion/comments/1cnx40d/invoke_42_control_layers_regional_guidance_w_text/l3bv152/
2024-05-13 08:46:07 +10:00
9cdb801c1c fix(api): add cover image to update model response
Fixes a bug where the image _appears_ to be reset when editing a model.

See: https://old.reddit.com/r/StableDiffusion/comments/1cnx40d/invoke_42_control_layers_regional_guidance_w_text/l3asdej/
2024-05-13 08:46:07 +10:00
5da8cde4fc fix(ui): disable listening on CA and II layers (#6332)
## Summary

Do not listen for mouse events on CA and II layers (which are not
interact-able).

## Related Issues / Discussions

Closes #6331

## QA Instructions

Move a CA or II layer above a regional guidance layer. The move tool
should now work.

## Merge Plan

n/a

## Checklist

- [x] _The PR has a short but descriptive title, suitable for a
changelog_
- [ ] _Tests added / updated (if applicable)_
- [ ] _Documentation added / updated (if applicable)_
2024-05-13 04:07:27 +05:30
6ec3dc0c0d Merge branch 'main' into psyche/fix/ui/cl-listening-layers 2024-05-13 04:05:35 +05:30
6050dffb25 fix(ui): use translations for canvas layer select (#6357)
## Summary

Use translations instead of plain strings.

## Related Issues / Discussions


https://discord.com/channels/1020123559063990373/1054129386447716433/1239181243078279208

## QA Instructions

The layer select should still work.

## Merge Plan

n/a

## Checklist

- [x] _The PR has a short but descriptive title, suitable for a
changelog_
- [ ] _Tests added / updated (if applicable)_
- [ ] _Documentation added / updated (if applicable)_
2024-05-13 04:04:13 +05:30
93efeafe30 Merge branch 'main' into psyche/fix/ui/canvas-layer-translations 2024-05-13 04:02:23 +05:30
f167e8a8d3 fix(ui): jank in depthanything model size select (#6335)
## Summary

The select had a default search value, which meant it only showed
"small" as an option on first load.

## Related Issues / Discussions

n/a

## QA Instructions

- Add a CA layer
- Expand advanced
- Set processor to depth anything
- Click the model size dropdown, it should show all 3 sizes

## Merge Plan

n/a

## Checklist

- [x] _The PR has a short but descriptive title, suitable for a
changelog_
- [ ] _Tests added / updated (if applicable)_
- [ ] _Documentation added / updated (if applicable)_
2024-05-13 04:01:58 +05:30
124d49f35e fix(ui): use translations for canvas layer select 2024-05-13 08:30:18 +10:00
52d8efa892 Merge branch 'main' into psyche/fix/ui/depth-anything-select 2024-05-13 04:00:07 +05:30
4ea8416c68 fix(ui): use pluralization for invoke button tooltip 2024-05-13 08:29:31 +10:00
8dd0bfb068 feat(ui): use new model type grouping for control adapters in control layers 2024-05-13 08:29:31 +10:00
6ff1c7d541 feat(ui): add group by base & type to useGroupedModelCombobox hook
This allows comboboxes for models to have more granular groupings. For example, Control Adapter models can be grouped by base model & model type.

Before:
- `SD-1`
- `SDXL`

After:
- `SD-1 / ControlNet`
- `SD-1 / T2I Adapter`
- `SDXL / ControlNet`
- `SDXL / T2I Adapter`
2024-05-13 08:29:31 +10:00
19f5a9c3a9 feat(ui): better invoke button checks
- Improved/more thorough checking before invoking for control layers
- Improved styling for the tooltip
2024-05-13 08:29:31 +10:00
d9ce9c62ac feat(ui): disable invoke button when t2i adapter used w/ image dims that are not multiples of 64 2024-05-13 08:29:31 +10:00
cdc468a38c Merge branch 'main' into psyche/fix/ui/depth-anything-select 2024-05-13 03:57:47 +05:30
2656f13a4a fix(ui): CA processor cancellation
When a control adapter processor config is changed, if we were already processing an image, that batch is immediately canceled. This prevents the processed image from getting stuck in a weird state if you change or reset the processor at the right (err, wrong?) moment.

- Update internal state for control adapters to track processor batches, instead of just having a flag indicating if the image is processing. Add a slice migration to not break the user's existing app state.
- Update preprocessor listener with more sophisticated logic to handle canceling the batch and resetting the processed image when the config changes or is reset.
- Fixed error handling that erroneously showed "failed to queue graph" errors when an active listener instance is canceled, need to check the abort signal.
2024-05-13 08:23:02 +10:00
da61396b1c cleanup: seamless unused older code cleanup 2024-05-13 08:11:08 +10:00
6c9fb617dc fix: fix seamless 2024-05-13 08:11:08 +10:00
5dd73fe53e fix(ui): jank in depthanything model size select 2024-05-10 09:52:30 +10:00
e6793be465 fix(ui): disable listening on CA and II layers
Closes #6331
2024-05-10 06:42:53 +10:00
63e62c5720 Update INSTALL_REQUIREMENTS.md - 'linux only' under AMD for SDXL.
Moved 'Linux only.' back from under NVIDIA to under AMD for the SDXL hardware requirements.
2024-05-09 10:56:23 -04:00
0848cb8ebd Update invokeai_version.py 2024-05-09 08:01:40 -04:00
1b777bb972 Revert "feat(ui): negative prompt boxes are italicized"
This reverts commit 49c4704379.
2024-05-09 07:52:52 -04:00
029ee90351 docs(ui): add comment & TODO for konva bug 2024-05-09 07:52:52 -04:00
2f9a064d48 feat(ui): ip adapter layers are selectable
This is largely an internal change, and it should have been this way from the start - less tip-toeing around layer types. The user-facing change is when you click an IP Adapter layer, it is highlighted. That's it.
2024-05-09 07:52:52 -04:00
b180666497 feat(ui): disable spellcheck on prompt boxes
These are almost guaranteed to have non-english words - disable the spellcheck to prevent red squigglies.
2024-05-09 07:52:52 -04:00
4740cd4f64 feat(ui): add "global" to global prompt placeholders 2024-05-09 07:52:52 -04:00
8b51298ba1 feat(ui): negative prompt boxes are italicized 2024-05-09 07:52:52 -04:00
1533429e54 feat(ui): optimized empty mask logic
Turns out, it's more efficient to just use the bbox logic for empty mask calculations. We already track if if the bbox needs updating, so this calculation does minimal work.

The dedicated calculation wasn't able to use the bbox tracking so it ran far more often than the bbox calculation.

Removed the "fast" bbox calculation logic, bc the new logic means we are continually updating the bbox in the background - not only when the user switches to the move tool and/or selects a layer.

The bbox calculation logic is split out from the bbox rendering logic to support this.

Result - better perf overall, with the empty mask handling retained.
2024-05-09 07:52:52 -04:00
fc000214a5 feat(ui): check for transparency and clear masks if no pixel data
Mask vector data includes additive (brush, rect) shapes and subtractive (eraser) shapes. A different composite operation is used to draw a shape, depending on whether it is additive or subtractive.

This means that a mask may have vector objects, but once rendered, is _visually_ empty (fully transparent). The only way determine if a mask is visually empty is to render it and check every pixel.

When we generate and save layer metadata, these fully erased masks are still used. Generating with an empty mask is a no-op in the backend, so we want to avoid this and not pollute graphs/metadata.

Previously, we did that pixel-based when calculating the bbox, which we only did when using the move tool, and only for the selected layer.

This change introduces a simpler function to check if a mask is transparent, and if so, deletes all its objects to reset it. This allows us skip these no-op layers entirely.

This check is debounced to 300 ms, trailing edge only.
2024-05-09 07:52:52 -04:00
f631aea4ee fix(ui): skip RG layers with no mask
These do not need to be added to the graph or metadata, as they are no-ops on the backend.
2024-05-09 07:52:52 -04:00
32f4c1f966 fix(ui): memoize mouse event handlers
This prevents resetting the stage event handlers on every frame. Whoops!
2024-05-09 07:52:52 -04:00
adebe639e3 tidy(ui): remove errant console.logs 2024-05-09 07:52:52 -04:00
44280ed472 fix(ui): layer recall uses fresh ids
When layer metadata is stored, the layer IDs are included. When recalling the metadata, we need to assign fresh IDs, else we can end up with multiple layers with the same ID, which of course causes all sorts of issues.
2024-05-09 07:52:52 -04:00
cec8840038 fix(ui): handle disabled RG layers
Was missing a check for `layer.isEnabled`.
2024-05-09 07:52:52 -04:00
fc7f484935 feat(ui): add data-testids to control layers components:
- Add Layer Menu Button: `control-layers-add-layer-menu-button`
- Delete All Layers Button: `control-layers-delete-all-layers-button`
- CL Layer List: `control-layers-layer-list`
- CL Canvas: `control-layers-canvas`
- Toggle Metadata Button: `toggle-show-metadata-button`
- Toggle Progress Button: `toggle-show-progress-button`
- Toggle Viewer Menu Button: `toggle-viewer-menu-button`
- Settings Tab Button: `generation-tab-settings-tab-button`
- Control Layers Tab Button: `generation-tab-control-layers-tab-button`
2024-05-09 07:03:13 +10:00
1aa7cd57c2 feat(ui): add invert brush scroll checkbox to control layers settings 2024-05-09 07:03:13 +10:00
722a91aedb fix(ui): canvas toolbar centering 2024-05-09 07:03:13 +10:00
03c24ca9cb lint fix 2024-05-08 15:49:37 -04:00
5820579237 switch to generation tab when someone sends to img2img 2024-05-08 15:49:37 -04:00
6c768bfe7e fix(ui): viewer toggle prevents progress toggle interaction 2024-05-08 08:39:18 -04:00
5ca794b94f feat(ui): show progress toggle on control layers toolbar 2024-05-08 08:39:18 -04:00
d20695260d feat(ui): open viewer on enqueue from generation tab 2024-05-08 08:39:18 -04:00
d8557d573b Revert "feat(ui): extend zod with a is typeguard` method"
This reverts commit 0f45933791.
2024-05-08 08:39:18 -04:00
6c1fd584d2 feat(ui): pre-CL control adapter metadata recall 2024-05-08 08:39:18 -04:00
e8e764be20 feat(ui): revise image viewer
- Viewer only exists on Generation tab
- Viewer defaults to open
- When clicking the Control Layers tab on the left panel, close the viewer (i.e. open the CL editor)
- Do not switch to editor when adding layers (this is handled by clicking the Control Layers tab)
- Do not open viewer when single-clicking images in gallery
- _Do_ open viewer when _double_-clicking images in gallery
- Do not change viewer state when switching between app tabs (this no longer makes sense; the viewer only exists on generation tab)
- Change the button to a drop down menu that states what you are currently doing, e.g. Viewing vs Editing
2024-05-08 08:39:18 -04:00
e8023c44b0 chore(ui): lint 2024-05-08 08:39:18 -04:00
a3a6449786 feat(ui): versioned control layers metadata 2024-05-08 08:39:18 -04:00
e9d2ffe3d7 fix(ui): process control image on recall if no processed image 2024-05-08 08:39:18 -04:00
23ad6fb730 feat(ui): handle missing images/models when recalling control layers 2024-05-08 08:39:18 -04:00
00f36cb491 tidy(ui): clean up control layers graph builder 2024-05-08 08:39:18 -04:00
3f489c92c8 feat(ui): handle initial image layers in control layers helper 2024-05-08 08:39:18 -04:00
f147f99bef feat(ui): better metadata labels for layers 2024-05-08 08:39:18 -04:00
6107e3d281 fix(ui): fix zControlAdapterBase schema weight 2024-05-08 08:39:18 -04:00
de33d6e647 fix(ui): metadata "Layers" -> "Layer" 2024-05-08 08:39:18 -04:00
e36e5871a1 chore(ui): lint 2024-05-08 08:39:18 -04:00
8b25c1a62e tidy(ui): remove extraneous metadata handlers 2024-05-08 08:39:18 -04:00
dfbd7eb1cf feat(ui): individual layer recall 2024-05-08 08:39:18 -04:00
b43b2714cc feat(ui): add fracturedjsonjs to pretty-serialize objects
In use on the metadata viewer - makes it sooo much easier on the eyes.
2024-05-08 08:39:18 -04:00
e537de2f6d feat(ui): layers recall
This still needs some finessing - needs logic depending on the tab...
2024-05-08 08:39:18 -04:00
ccd399e277 feat(ui): add getIsVisible to metadata handlers 2024-05-08 08:39:18 -04:00
bfad814862 fix(ui): fix IPAdapterConfigV2 schema weight 2024-05-08 08:39:18 -04:00
6e8b7f9421 feat(ui): write layers to metadata 2024-05-08 08:39:18 -04:00
e47629cbe7 feat(ui): add zod schema for layers array 2024-05-08 08:39:18 -04:00
e840de27ed feat(ui): extend zod with a is typeguard` method
Feels dangerous, but it's very handy.
2024-05-08 08:39:18 -04:00
8342f32f2e refactor(ui): rewrite all types as zod schemas
This change prepares for safe metadata recall.
2024-05-08 08:39:18 -04:00
a7aa529b99 tidy(ui): "imageName" -> "name" 2024-05-08 08:39:18 -04:00
4adc592657 feat(ui): move strength to init image layer
This further splits the control layers state into its own thing.
2024-05-07 11:02:16 +10:00
e8d60e8d83 fix(ui): image metadata viewer stuck when spamming hotkey 2024-05-07 11:02:16 +10:00
886f5c90a3 feat(ui): move img2img strength out of advanced on canvas 2024-05-07 11:02:16 +10:00
5e684c11f1 Update invokeai_version.py 2024-05-07 09:09:10 +10:00
72ce239592 revert(ui): remove floating viewer
There are unresolved platform-specific issues with this component, and its utility is debatable.

Should be easy to just revert this commit to add it back in the future if desired.
2024-05-06 19:00:07 -04:00
a826f8f8c5 fix(ui): show total layer count in control layers tab 2024-05-06 19:00:07 -04:00
b6c19a8e47 feat(ui): close viewer when adding a RG layer 2024-05-06 19:00:07 -04:00
67d6cf19c6 fix(ui): switch to viewer if auto-switch is enabled 2024-05-06 19:00:07 -04:00
a9bf651c69 chore(ui): bump all deps 2024-05-06 19:00:07 -04:00
3bd5d9a8e4 fix(ui): memoize FloatingImageViewer
Maybe this will fix @JPPhoto's issue?
2024-05-06 19:00:07 -04:00
6249982d82 fix(ui): stuck viewer when spamming toggle
There are a number of bugs with `framer-motion` that can result in sync issues with AnimatePresence and the conditionally rendered component.

You can see this if you rapidly click an accordion, occasionally it gets out of sync and is closed when it should be open.

This is a bigger problem with the viewer where the user may hold down the `z` key. It's trivial to get it to lock up.

For now, just remove the animation entirely.

Upstream issues for reference:
https://github.com/framer/motion/issues/2023
https://github.com/framer/motion/issues/2618
https://github.com/framer/motion/issues/2554
2024-05-06 19:00:07 -04:00
6b98dba71d chore(ui): lint 2024-05-06 08:55:32 -04:00
c0065a65a0 feat(ui): floating viewer always shows progress, never shows metadata 2024-05-06 08:55:32 -04:00
cce3144c74 feat(ui): add floating image viewer 2024-05-06 08:55:32 -04:00
aab152a7e9 fix(ui): track mouse out flags correctly 2024-05-06 08:55:32 -04:00
c5b948bc3f feat(ui): fade layer selection color 2024-05-06 08:55:32 -04:00
44ecddae2e feat(ui): style Settings/Control Layers tabs like tabs 2024-05-06 08:55:32 -04:00
26847895b9 fix(ui): update hotkeys for viewer 2024-05-06 08:55:32 -04:00
e4a640f0a7 feat(ui): optimized rendering of selected layer
Instead of caching on every stroke, we can use a compositing rect when the layer is being drawn to improve performance.
2024-05-04 12:03:28 -04:00
b5b6a96d94 feat(ui): dynamic brush spacing
Scaled to 10% of brush size, clamped between 5px and 15px. This makes drawing feel a bit smoother, but maintains reasonable performance.
2024-05-04 12:03:28 -04:00
806a8f69c5 perf(ui): rerender of opacity sliders 2024-05-04 12:03:28 -04:00
ac0b9ba290 tidy(ui): $cursorPosition -> $lastCursorPos 2024-05-04 12:03:28 -04:00
7ca613d41c feat(ui): snap cursor pos when drawing rects
- Rects snap to stage edge when within a threshold (10 screen pixels)
- When mouse leaves stage, set last mousedown pos to null, preventing nonfunctional rect outlines

Partially addresses #6306.

There's a technical challenge to fully address the issue - mouse event are not fired when the mouse is outside the stage. While we could draw the rect even if the mouse leaves, we cannot update the rect's dimensions on mouse move, or complete the drawing on mouse up.

To fully address the issue, we'd need to a way to forward window events back to the stage, or at least handle window events. We can explore this later.
2024-05-04 12:03:28 -04:00
5cb1ff8679 fix(ui): open viewer on image click, not select 2024-05-04 12:03:28 -04:00
8794b99d51 fix(ui): save upscaled images to gallery on canvas tab 2024-05-03 23:15:10 -04:00
6bdded85da fix(ui): do not auto-hide next/prev image buttons 2024-05-03 23:15:10 -04:00
26613f10c7 feat(ui): close viewer when user switches tabs 2024-05-03 23:15:10 -04:00
6d2fe3b691 tidy(ui): clean up layer reset logic 2024-05-03 23:15:10 -04:00
2888845f7c fix(ui): invalidate mask cache when moving layer 2024-05-03 23:15:10 -04:00
4beccea6e7 fix(ui): do not run HRO if using an initial image 2024-05-03 23:15:10 -04:00
68d1458c83 fix(ui): address feedback 2024-05-04 08:40:12 +10:00
f4dde883ca feat: improve the switch states of the control layers / viewer area 2024-05-04 08:40:12 +10:00
be7eeb576b fix(ui): fix viewer getting stuck when spamming toggle 2024-05-03 20:57:18 +10:00
af9f0e0963 feat(ui): cache control layer mask images
When invoking with control layers, we were creating and uploading the mask images on every enqueue, even when the mask didn't change. The mask image can be cached to greatly reduce the number of uploads.

With this change, we are a bit smarter about the mask images:
- Check if there is an uploaded mask image name
- If so, attempt to retrieve its DTO. Typically it will be in the RTKQ cache, so there is no network request, but it will make a network request if not cached to confirm the image actually exists on the server.
- If we don't have an uploaded mask image name, or the request fails, we go ahead and upload the generated blob
- Update the layer's state with a reference to this uploaded image for next time
- Continue as before

Any time we modify the mask (drawing/erasing, resetting the layer), we invalidate that cached image name (set it to null).

We now only upload images when we need to and generation starts faster.
2024-05-03 20:57:18 +10:00
3cba53533d Update README.md 2024-05-03 17:31:50 +10:00
ab87511a03 Update INSTALLATION.md 2024-05-03 17:31:50 +10:00
af868b0ea6 Update 010_INSTALL_AUTOMATED.md 2024-05-03 17:31:50 +10:00
960eae8255 Update TRAINING.md 2024-05-03 17:30:42 +10:00
0787c6c746 Update invokeai_version.py 2024-05-03 13:23:19 +10:00
579d436934 fix(ui): floating param/gallery buttons 2024-05-02 23:09:26 -04:00
36f01988e8 chore(ui): lint 2024-05-02 23:09:26 -04:00
d9b92d19f9 feat(ui): clearer viewer/editor context switching 2024-05-02 23:09:26 -04:00
fdfc379a84 fix(ui): layer counts 2024-05-02 23:09:26 -04:00
2062cfe84a fix(ui): cursor when no renderable layers added 2024-05-02 23:09:26 -04:00
eb36e834b2 feat(ui): add fallback when no layers exist 2024-05-02 23:09:26 -04:00
2baa33730a fix(ui): fix control layer list layout 2024-05-02 23:09:26 -04:00
c30df7ce79 feat(ui): style settings/control layers tabs 2024-05-02 23:09:26 -04:00
f05ac5a7a5 chore(ui): bump @invoke-ai/ui-library 2024-05-02 23:09:26 -04:00
85dd78b8df fix(ui): handle deleting images in use in generation tab 2024-05-02 23:09:26 -04:00
4c7be03702 tidy(ui): rename generation tab graph builders 2024-05-02 23:09:26 -04:00
e354fee4f4 fix(ui): add img2img metadata to graphs 2024-05-02 23:09:26 -04:00
20e628297c fix(ui): smoother animations in current image preview 2024-05-02 23:09:26 -04:00
98664fc46f fix(ui): gallery prev/next buttons animations 2024-05-02 23:09:26 -04:00
33617fc06a feat(ui): rework image viewer
- Rework styling
- Replace "CurrentImageDisplay" entirely
- Add a super short fade to reduce jarring transition
- Make the viewer a singleton component, overlaid on everything else - reduces change when switching tabs
2024-05-02 23:09:26 -04:00
c05e52ebae fix(ui): do not delete all layers when using image as initial image 2024-05-02 23:09:26 -04:00
5734a97c55 fix(ui): do not attempt drawing when invalid layer type selected 2024-05-02 23:09:26 -04:00
94a73d5377 feat(ui): update mm-related translations 2024-05-02 23:09:26 -04:00
0f7fdabe9b feat(ui): rename tab identifiers
- "txt2img" -> "generation"
- "unifiedCanvas" -> "canvas"
- "modelManager" -> "models"
- "nodes" -> "workflows"
- Add UI slice migration setting the active tab to "generation"
2024-05-02 23:09:26 -04:00
7c1f1076b4 feat(ui): rename tabs
- "Text to Image" -> "Generation"
- "Unified Canvas" -> "Canvas"
- "Model Manager" -> "Models"
2024-05-02 23:09:26 -04:00
a6ac184211 tidy(ui): excise img2img tab 2024-05-02 23:09:26 -04:00
7d58908e32 fix(ui): fix img2img graphs w/ control layers 2024-05-02 23:09:26 -04:00
26d3ec3fce fix(ui): destroy initial image layer after deleting 2024-05-02 23:09:26 -04:00
dc81357152 feat(ui): add img2img via control layers to graph builders 2024-05-02 23:09:26 -04:00
c9886796f6 feat(ui): add image viewer overlay
- Works on txt2img, canvas and workflows tabs, img2img has its own side-by-side view
- In workflow editor, the is closeable only if you are in edit mode, else it's always there
- Press `i` to open
- Press `esc` to close
- Selecting an image or changing image selection opens the viewer
- When generating, if auto-switch to new image is enabled, the viewer opens when an image comes in

To support this change, I organized and restructured some tab stuff.
2024-05-02 23:09:26 -04:00
209ddc2037 fix(ui): do not toggle layers on double click of opacity popover 2024-05-02 23:09:26 -04:00
8b6a283eab feat(ui): add opacity to initial image layer 2024-05-02 23:09:26 -04:00
75be6814bb feat(ui): add renderer for initial image 2024-05-02 23:09:26 -04:00
1d213067e8 feat(ui): add initial image layer to CL 2024-05-02 23:09:26 -04:00
d67480d92c feat(ui): add layerwrapper component 2024-05-02 23:09:26 -04:00
d55ea318ec tidy(ui): remove unused gallery hotkeys 2024-05-02 23:09:26 -04:00
474eab6f8a fix(ui): clamp incoming w/h to ensure always a multiple of 8
When recalling metadata and/or using control image dimensions, it was possible to set a width or height that was not a multiple of 8, resulting in generation failures.

Added a `clamp` option to the w/h actions to fix this. The option is used for all untrusted sources - everything except for the w/h number inputs, which clamp the values themselves.
2024-05-02 23:09:26 -04:00
1b13fee256 fix(ui): firefox drawing lag
Firefox v125.0.3 and below has a bug where `mouseenter` events are fired continually during mouse moves. The issue isn't present on FF v126.0b6 Developer Edition. It's not clear if the issue is present on FF nightly, and we're not sure if it will actually be fixed in the stable v126 release.

The control layers drawing logic relied on on `mouseenter` events to create new lines, and `mousemove` to extend existing lines. On the affected version of FF, all line extensions are turned into new lines, resulting in very poor performance, noncontiguous lines, and way-too-big internal state.

To resolve this, the drawing handling was updated to not use `mouseenter` at all. As a bonus, resolving this issue has resulted in simpler logic for drawing on the canvas.
2024-05-02 23:09:26 -04:00
6363095b29 feat(ui): control adapter recall for control layers
- Add set of metadata handlers for the control layers CAs
- Use these conditionally depending on the active tab - when recalling on txt2img, the CAs go to control layers, else they go to the old CA area.
2024-05-02 23:09:26 -04:00
4cd78b9478 feat(ui): add getImageDTO imperative RTKQ helper 2024-05-02 23:09:26 -04:00
2cde8a643e tidy(ui): suffix a control adapter types/objects with V2
Prevent mixing the old and new implementations up
2024-05-02 23:09:26 -04:00
f9555f03f5 tidy(ui): "CONTROLNET_PROCESSORS" -> "CA_PROCESSOR_DATA" 2024-05-02 23:09:26 -04:00
b1d8f3a3f9 tidy(ui): revert changes to old CA implementation
These changes were left over from the previous attempt to handle control adapters in control layers with the same logic. Control Layers are now handled totally separately, so these changes may be reverted.
2024-05-02 23:09:26 -04:00
33a9f9a4dc fix(nodes): fix constraints in cnet processors
There were some invalid constraints with the processors - minimum of 0 for resolution or multiple of 64 for resolution.

Made minimum 1px and no multiple ofs.
2024-05-02 12:24:04 +10:00
c35625eb44 feat(ui): processor layout changes 2024-05-01 21:48:47 -04:00
6f572e1cce fix(ui): convert t2i to cnet and vice-versa when model changes 2024-05-01 21:48:47 -04:00
54acd3f2b1 ci(ui): restore error status for circular deps 2024-05-01 21:48:47 -04:00
6e966909ab chore(ui): lint 2024-05-01 21:48:47 -04:00
311ba8c04b fix(ui): ensure canvas size is correctly updated when model changed
Closes #6293
2024-05-01 21:48:47 -04:00
1b617768cf fix(ui): canvas infinite loop when setting bbox dims
When typing in a number into the w/h number inputs, if the number is less than the step, it appears the value of 0 is used. This is unexpected; it means Chakra isn't clamping the value correctly (or maybe our wrapper isn't clamping it).

Add checks to never bail if the width or height value from the number input component is 0.
2024-05-01 21:48:47 -04:00
8ceb94497e fix(ui): fix canvas rendering of control images 2024-05-01 21:48:47 -04:00
efb571401c feat(ui): tweak control adapter layout 2024-05-01 21:48:47 -04:00
ffba4871d0 tidy(ui): "scribble" -> "Scribble" 2024-05-01 21:48:47 -04:00
9437d701b2 fix(ui): disable clear processor when no processor selected 2024-05-01 21:48:47 -04:00
6effa19626 fix(ui): edge cases in auto-process 2024-05-01 21:48:47 -04:00
45c2ac41d5 feat(ui): processor layout/styling 2024-05-01 21:48:47 -04:00
ca1c3c0873 fix(ui): do not re-process if processor config hasn't changed 2024-05-01 21:48:47 -04:00
47ee08db91 fix(ui): processor select styling 2024-05-01 21:48:47 -04:00
c96b98fc9e feat(ui): auto-process for control layer CAs 2024-05-01 21:48:47 -04:00
905baf2787 refactor(ui): continue wiring up CA logic across (wip)
It works!
2024-05-01 21:48:47 -04:00
0e55488ff6 refactor(ui): wire up CA logic across (wip) 2024-05-01 21:48:47 -04:00
424a27eeda refactor(ui): add CA processor config components (wip) 2024-05-01 21:48:47 -04:00
6007218a51 refactor(ui): add CA config components (wip) 2024-05-01 21:48:47 -04:00
811e8a5a8b refactor(ui): rename & export actions from CL slice 2024-05-01 21:48:47 -04:00
121918352a refactor(ui): add control layers separate control adapter implementation (wip)
- Revise control adapter config types
- Recreate all control adapter mutations in control layers slice
- Bit of renaming along the way - typing 'RegionalGuidanceLayer' over and over again was getting tedious
2024-05-01 21:48:47 -04:00
3717321480 tidy(ui): organize layer components 2024-05-01 21:48:47 -04:00
4a250bdf9c Add TCD scheduler (#6086)
Adds the TCD scheduler to better support.
https://huggingface.co/h1t/TCD-SDXL-LoRA or checkpoints that have been
made with TCD

Example:
TCD Lora with Euler A

![b0ad6174-cd2b-49fe-ae42-3a83bc6ae571](https://github.com/invoke-ai/InvokeAI/assets/82827604/d823cb2f-4d9c-4f93-9fc2-e63773a378b6)

TCD Lora with TCD scheduler

![74495a51-eeac-45e6-9983-fb6551a5bdef](https://github.com/invoke-ai/InvokeAI/assets/82827604/c87604d8-a44e-4fb9-a7be-ef2600784727)
2024-05-01 12:57:01 +05:30
dce8b88aaf fix: change eta only for TCD Scheduler 2024-05-01 12:47:46 +05:30
1bdcbe3284 cleanup: use dict update to actually update the scheduler keyword args 2024-05-01 12:22:39 +05:30
88ac3bc7f0 Merge branch 'main' into main 2024-04-30 16:51:44 -04:00
abb3bb9f7e Update invokeai_version.py 2024-05-01 06:30:28 +10:00
2ddb82200c fix: Manually update eta(gamma) to 1.0 for TCDScheduler
seems to work best with invoke at 4 steps
2024-05-01 01:20:53 +05:30
38880cde5c chore: update schema 2024-05-01 01:20:22 +05:30
39ab4dd83e Merge branch 'main' into pr/6086 2024-05-01 00:37:06 +05:30
631878b212 feat(ui): border radius on canvas 2024-04-30 08:10:59 -04:00
7a5399e83c feat(ui): display message when no layers are added 2024-04-30 08:10:59 -04:00
e90775731d fix(ui): layer layout orientation 2024-04-30 08:10:59 -04:00
3f26880493 fix(ui): "Global Settings" -> "Settings" 2024-04-30 08:10:59 -04:00
21cf1004db fix(ui): layers default to expanded 2024-04-30 08:10:59 -04:00
d74cd12aa6 feat(ui): collapsible layers 2024-04-30 08:10:59 -04:00
cf1883585d chore(ui): lint 2024-04-30 08:10:59 -04:00
8a791d4f16 feat(ui): make control image opacity filter toggleable 2024-04-30 08:10:59 -04:00
1212698059 tidy(ui): more renaming of components 2024-04-30 08:10:59 -04:00
ba6db33b39 tidy(ui): more renaming of components 2024-04-30 08:10:59 -04:00
b3dbfdaa02 tidy(ui): more renaming of components 2024-04-30 08:10:59 -04:00
3441187c23 tidy(ui): "regional prompts" -> "control layers" 2024-04-30 08:10:59 -04:00
8de56fd77c tidy(ui): move regionalPrompts files to controlLayers 2024-04-30 08:10:59 -04:00
22bd33b7c6 chore(ui): lint 2024-04-30 08:10:59 -04:00
2af5c4be9f fix(ui): ip adapter layers are not selectable 2024-04-30 08:10:59 -04:00
415a41e21a perf(ui): reset maskobjects when layer has no bbox (all objects erased) 2024-04-30 08:10:59 -04:00
aa2ca03056 fix(ui): filter layers based on tab when disabling invoke button 2024-04-30 08:10:59 -04:00
a20faca20f feat(ui): layer layout tweaks 2024-04-30 08:10:59 -04:00
9d042baf48 fix(ui): ip adapter layers always at bottom of list 2024-04-30 08:10:59 -04:00
6195741814 feat(ui): move global mask opacity to settings popover 2024-04-30 08:10:59 -04:00
c2f8adf93e fix(ui): deselect other layers when new layer added 2024-04-30 08:10:59 -04:00
ace3955760 fix(ui): tool preview/cursor when non-interactable layer selected 2024-04-30 08:10:59 -04:00
720e16cea6 feat(ui): tweak layer list styling to better indicate selectablility 2024-04-30 08:10:59 -04:00
a357a1ac9d feat(ui): remove select layer on click in canvas
It's very easy to end up in a spot where you cannot select a layer at all to move it around. Too tricky to handle otherwise.
2024-04-30 08:10:59 -04:00
22f160bfcc fix(ui): unlink control adapter opaicty from global mask opacity 2024-04-30 08:10:59 -04:00
fa637b5c59 fix(ui): add missed ca layer opacity logic
didn't stage the right changes a few commits back
2024-04-30 08:10:59 -04:00
1f68a60752 feat(ui): hold shift to use control image size w/o model constraints 2024-04-30 08:10:59 -04:00
048bd18e10 feat(ui): separate ca layer opacity 2024-04-30 08:10:59 -04:00
e5ec529f0f feat(ui): fix layer arranging 2024-04-30 08:10:59 -04:00
d884c15d0c feat(ui): update layer menus 2024-04-30 08:10:59 -04:00
9ee7cad613 feat(ui): make control layer ui exclusive to txt2img tab 2024-04-30 08:10:59 -04:00
629110784d fix(ui): delete control layers correctly 2024-04-30 08:10:59 -04:00
c1666a8b5a fix(ui): select default control/ip adapter models in control layers 2024-04-30 08:10:59 -04:00
d14b315bc6 fix(ui): use optimal size when using control image dims 2024-04-30 08:10:59 -04:00
fe459295ea fix(ui): exclude disabled control adapters on control layers 2024-04-30 08:10:59 -04:00
9d67ec9efe fix(ui): toggle control adapter layer vis 2024-04-30 08:10:59 -04:00
5bf4d37949 perf(ui): reduce control image processing to when it is needed
Only should reprocess if the processor settings or the image has changed.
2024-04-30 08:10:59 -04:00
387ab9cee7 feat(ui): reset controlnet model to null instead of disabling when base model changes 2024-04-30 08:10:59 -04:00
56050f7887 fix(ui): fix canvas scaling when window is zoomed
Konva doesn't react to changes to window zoom/scale. If you open the tab at, say, 90%, then bump to 100%, the pixel ratio of the canvas doesn't change. This results in lower-quality renders on the canvas (generation is unaffected).
2024-04-30 08:10:59 -04:00
c354470cd1 perf(ui): do not cache controlnet images unless required 2024-04-30 08:10:59 -04:00
ded8267505 WIP control adapters in regional 2024-04-30 08:10:59 -04:00
e822897b1c feat(nodes): add prototype heuristic image resize node
Uses the fancy cnet resize that retains edges.
2024-04-30 08:10:59 -04:00
2d7b8c2a1b fix(backend): do not round image dims to 64 in controlnet processor resize
Rounding the dims results in control images that are subtly different than the input. We round to the nearest 8px later, there's no need to round now.
2024-04-30 08:10:59 -04:00
ebeae41cb2 tidy(ui): minor ca component tidy 2024-04-30 08:10:59 -04:00
6f5f3381f9 feat(ui): revise internal state for RCC 2024-04-30 08:10:59 -04:00
2f6fec8c6c chore(ui): lint 2024-04-30 08:10:59 -04:00
cc4bef4859 refactor(ui): move size state to regional 2024-04-30 08:10:59 -04:00
b6a45e53f1 refactor(ui): move positive2 and negative2 prompt to regional 2024-04-30 08:10:59 -04:00
1cf1e53a6c refactor(ui): move positive and negative prompt to regional 2024-04-30 08:10:59 -04:00
c686625076 feat(ui): add 'control_layer' type 2024-04-30 08:10:59 -04:00
d861bc690e feat(mm): handle PC_PATH_MAX on external drives on macOS
`PC_PATH_MAX` doesn't exist for (some?) external drives on macOS. We need error handling when retrieving this value.

Also added error handling for `PC_NAME_MAX` just in case. This does work for me for external drives on macOS, though.

Closes #6277
2024-04-30 07:57:03 -04:00
f262b9032d fix: changed validation to not error on connection 2024-04-28 12:48:56 -04:00
71c3197eab fix: denoise latents accepts CFG lists as input 2024-04-28 12:48:56 -04:00
241a1fdb57 feat(mm): support sdxl ckpt inpainting models
There are only a couple SDXL inpainting models, and my tests indicate they are not as good as SD1.5 inpainting, but at least we support them now.

- Add the config file. This matches what is used in A1111. The only difference from the non-inpainting SDXL config is the number of in-channels.
- Update the legacy config maps to use this config file.
2024-04-28 12:57:27 +10:00
3595beac1e docs: remove references to config script in CONFIGURATION.md 2024-04-25 17:49:32 -04:00
caa7c0f2bd docs: more pruning and tidying readme 2024-04-26 00:00:18 +10:00
d546823c4d docs: pruning and tidying readme 2024-04-26 00:00:18 +10:00
dac2d78da6 Update README.md 2024-04-26 00:00:18 +10:00
398f37c0ed tidy(backend): clean up controlnet_utils
- Use the our adaptation of the HWC3 function with better types
- Extraction some of the util functions, name them better, add comments
- Improve type annotations
- Remove unreachable codepaths
2024-04-25 13:20:09 +10:00
6b0bf59682 feat(backend): update nms util to make blur/thresholding optional 2024-04-25 13:20:09 +10:00
5b8f77f990 tidy(nodes): move cnet mode literals to utils
Now they can be used in type signatures without circular imports.
2024-04-25 13:20:09 +10:00
3207822738 Update invokeai_version.py 2024-04-25 12:31:59 +10:00
8d86fabf4b chore(ui): lint 2024-04-24 20:09:52 +10:00
af3e910ad3 fix(ui): fix layer arrangement 2024-04-24 20:09:52 +10:00
af25d00964 tidy(ui): use const for brush spacing 2024-04-24 20:09:52 +10:00
d4a30d08ef feat(ui): create new line when mouse held down, leaves canvas and comes back over 2024-04-24 20:09:52 +10:00
bd8a33e824 tidy(ui): clean up renderer functions
- Split logic to create layers/objects from the updating logic
- Organize and comment functions
2024-04-24 20:09:52 +10:00
b425646b7b chore(ui): lint 2024-04-24 20:09:52 +10:00
293e11cfa6 feat(ui): hide add prompt buttons when user has a prompt 2024-04-24 20:09:52 +10:00
c73aabdfbf feat(ui): regional control defaults to having a positive prompt 2024-04-24 20:09:52 +10:00
ca989c54b0 fix(ui): restore OG aspect ratio preview for non-t2i tabs 2024-04-24 20:09:52 +10:00
260e24733f fix: update SDXL IP Adpater starter model to be ViT-H 2024-04-24 00:08:21 -04:00
bb6e3e726d fix: update ip adapter starter models path (#6262)
## Summary

<!--A description of the changes in this PR. Include the kind of change
(fix, feature, docs, etc), the "why" and the "how". Screenshots or
videos are useful for frontend changes.-->

## Related Issues / Discussions

<!--WHEN APPLICABLE: List any related issues or discussions on github or
discord. If this PR closes an issue, please use the "Closes #1234"
format, so that the issue will be automatically closed when the PR
merges.-->

## QA Instructions

<!--WHEN APPLICABLE: Describe how we can test the changes in this PR.-->

## Merge Plan

<!--WHEN APPLICABLE: Large PRs, or PRs that touch sensitive things like
DB schemas, may need some care when merging. For example, a careful
rebase by the change author, timing to not interfere with a pending
release, or a message to contributors on discord after merging.-->

## Checklist

- [ ] _The PR has a short but descriptive title, suitable for a
changelog_
- [ ] _Tests added / updated (if applicable)_
- [ ] _Documentation added / updated (if applicable)_
2024-04-24 08:58:15 +05:30
6b394554e2 fix: update ip adapter starter models path 2024-04-24 08:48:25 +05:30
ae1955a1a8 feat(ui): update canvas graphs to provide unet 2024-04-23 07:32:53 -04:00
1bef13db37 feat(nodes): restore unet check on CreateGradientMaskInvocation
Special handling for inpainting models
2024-04-23 07:32:53 -04:00
a461537087 chore: ruff 2024-04-23 07:32:53 -04:00
99e28da19b feat(ui): add variant to model edit
Also simplify the layouting for all model view/edit components.
2024-04-23 07:32:53 -04:00
42a159beaa chore(ui): typegen 2024-04-23 07:32:53 -04:00
0aa5aadfe8 fix(mm): move variant to MainConfigBase
shoulda been here all along
2024-04-23 07:32:53 -04:00
2537d260e3 tests: add test for probing diffusers model variant type 2024-04-23 07:32:53 -04:00
bbf919a933 chore: frontend check error 2024-04-23 07:32:53 -04:00
01897ec576 remove extra inputs 2024-04-23 07:32:53 -04:00
bc12d6654e chore: comments and ruff 2024-04-23 07:32:53 -04:00
6d7c8d5f57 remove unet test 2024-04-23 07:32:53 -04:00
38604aa408 update canvas graphs 2024-04-23 07:32:53 -04:00
781de914f4 fix threshhold 2024-04-23 07:32:53 -04:00
c094bad233 add unet check in gradient mask node 2024-04-23 07:32:53 -04:00
0063014f2b gradient mask node test for inpaint 2024-04-23 07:32:53 -04:00
d7b5ad02e8 tests: add object serializer test for dangling folders
- Ensure they are deleted on init if ephemeral
- Ensure they are _not_ deleted on init if _not_ ephemeral
2024-04-23 17:12:14 +10:00
2cee436ecf tidy(app): remove unused class 2024-04-23 17:12:14 +10:00
e6386d969f fix(app): only clear tempdirs if ephemeral and before creating tempdir
Also, this needs to happen in init, else it deletes the temp dir created in init
2024-04-23 17:12:14 +10:00
4b2b983646 tidy(api): reverted unnecessary changes in dependencies.py 2024-04-23 17:12:14 +10:00
53808149fb moved cleanup routine into object_serializer_disk.py 2024-04-23 17:12:14 +10:00
21ba55d0a6 add an initialization function that removes dangling tmpdirs from outputs/tensors 2024-04-23 17:12:14 +10:00
28c28b2fc0 fix: 🐛 handle trigger phrase form submits 2024-04-23 16:42:40 +10:00
8b9c4c62a6 chore: v4.2.0a2 2024-04-23 13:08:26 +10:00
cf637ecaa6 fix(ui): disabled ip adapters applied to regional control 2024-04-23 13:08:26 +10:00
fca718bdd3 tidy(ui): remove extraneous cursor sync 2024-04-23 12:11:47 +10:00
5196a2efec fix(ui): minor canvas overflow 2024-04-23 12:11:47 +10:00
385e93443a feat(ui): rp hotkeys
- Shift+C: Reset selected layer mask (same as canvas)
- Shift+D: Delete selected layer (cannot be Del, that deletes an image in gallery)
- Shift+A: Add layer (cannot be Ctrl+Shift+N, that opens a new window)
- Ctrl/Cmd+Wheel: Brush size (same as canvas)
2024-04-23 12:11:47 +10:00
604217313a chore(ui): lint 2024-04-23 12:11:47 +10:00
229423b370 tidy(ui): memo aspectratiopreview 2024-04-23 12:11:47 +10:00
75a548e3eb perf(ui): debounce render wait = 300ms 2024-04-23 12:11:47 +10:00
24dbb65ebb perf(ui): add brush spacing
Only add point to line if the next point is 10 or more px from the last point
2024-04-23 12:11:47 +10:00
c915220965 feat(ui): aspect ratio preview is regional prompts canvas 2024-04-23 12:11:47 +10:00
bb37e25ed0 feat(ui): rp ui layout 2024-04-23 12:11:47 +10:00
dda1111f20 Make it alpha 2024-04-22 10:54:21 -04:00
9d71b91b7f chore: v4.2.0b1 2024-04-22 10:54:21 -04:00
714126b832 build(ui): temp disable circular dependency check
I'll need to think about how to fix this properly. For now, disable the check as the UI can still build fine.
2024-04-22 23:09:39 +10:00
a10c66797d chore(ui): lint 2024-04-22 23:09:39 +10:00
6dcaf75b5f feat(ui): regional prompts spray n pray
Trying a lot of different things as I iterated, so this is smooshed into one big commit... too hard to split it now.

- Iterated on IP adapter handling and UI. Unfortunately there is an bug related to undo/redo. The IP adapter state is split across the `controlAdapters` slice and the `regionalPrompts` slice, but only the `regionalPrompts` slice supports undo/redo. If you delete the IP adapter and then undo/redo to a history state where it existed, you'll get an error. The fix is likely to merge the slices... Maybe there's a workaround.
- Iterated on UI. I think the layers are OK now.
- Removed ability to disable RP globally for now. It's enabled if you have enabled RP layers.
- Many minor tweaks and fixes.
2024-04-22 23:09:39 +10:00
018845cda0 tidy(ui): regional prompts kind -> type 2024-04-22 23:09:39 +10:00
8c0a061fa0 fix(ui): hotkeys dependency array 2024-04-20 11:32:08 -04:00
4895875ded feat(ui): rects on regional prompt UI 2024-04-20 11:32:08 -04:00
cfddbda578 tidy(ui): clean up action names 2024-04-20 11:32:08 -04:00
58d3a9e7d4 refactor(ui): revise regional prompts state to support prompt-less mask layers
This structure is more adaptable to future features like IP-Adapter-only regions, controlnet layers, image masks, etc.
2024-04-20 11:32:08 -04:00
a00e703144 feat(nodes): image mask to tensor invocation
Thanks @JPPhoto!
2024-04-20 11:32:08 -04:00
e4024bdeb9 fix(ui): floor all pixel coords
This prevents rendering objects with sub-pixel positioning, which looks soft
2024-04-20 11:32:08 -04:00
944690ac8e feat(ui): remove drag distance on layers 2024-04-20 11:32:08 -04:00
a7d69aa0a9 fix(ui): brush preview cursor jank 2024-04-20 11:32:08 -04:00
15018fdbc0 fix(ui): brush preview not visible after hotkey 2024-04-20 11:32:08 -04:00
31ace9aff8 feat(ui): tool hotkeys for rp 2024-04-20 11:32:08 -04:00
3f4ea30113 fix(ui): fix missing bbox when a layer is empty 2024-04-20 11:32:08 -04:00
7edcadb371 fix(ui): bbox rendered slightly too small 2024-04-20 11:32:08 -04:00
d582203c62 chore(ui): lint 2024-04-20 14:54:49 +10:00
148a6c08ca fix(ui): fix bbox caching 2024-04-20 14:54:49 +10:00
1e904d281a feat(ui): hook up sd1.5 t2i graph to regional prompts 2024-04-20 14:54:49 +10:00
03d9a75720 feat(ui): better rp colors 2024-04-20 14:54:49 +10:00
5edce0a4de perf(ui): caching efficiency 2024-04-20 14:54:49 +10:00
604bf4e9ec perf(ui): use efficient group caching instead of a compositing rect
Seems to be the same speed and it's less complex.
2024-04-20 14:54:49 +10:00
39d036bb37 feat(ui): update move tool to show all bboxes, mouseover bbox strokes 2024-04-20 14:54:49 +10:00
8a69fbd336 perf(ui): more bbox optimizations
- Keep track of whether the bbox needs to be recalculated (e.g. had lines/points added)
- Keep track of whether the bbox has eraser strokes - if yes, we need to do the full pixel-perfect bbox calculation, otherwise we can use the faster getClientRect
- Use comparison rather than Math.min/max in bbox calculation (slightly faster)
- Return `null` if no pixel data at all in bbox
2024-04-20 14:54:49 +10:00
a71ed10b71 perf(ui): more efficient bbox method with smaller minimum offscreen canvas size 2024-04-20 14:54:49 +10:00
9d3978edcf fix(ui): give min dimensions to rp storybook 2024-04-20 14:54:49 +10:00
18e1d74917 fix(ui): group layer color change history 2024-04-20 14:54:49 +10:00
9276ecfd02 feat(ui): rp ui styling/layout 2024-04-19 09:32:56 -04:00
ea527f5fe1 feat(nodes): add beta classification to mask tensor nodes 2024-04-19 09:32:56 -04:00
d43f9732ab feat(ui): rp ui styling 2024-04-19 09:32:56 -04:00
c613839740 feat(ui): use translations for rp features 2024-04-19 09:32:56 -04:00
bb371cfeca feat(ui): minor styling rp 2024-04-19 09:32:56 -04:00
6a5510146c feat(ui): add default rp brush size 2024-04-19 09:32:56 -04:00
9667f77c41 feat(ui): rp editor styling 2024-04-19 09:32:56 -04:00
e93e0612af tidy(ui): selectedLayer -> selectedLayerId 2024-04-19 09:32:56 -04:00
9528287d56 feat(ui): move ephemeral tool state out of redux 2024-04-19 09:32:56 -04:00
14c722c265 tidy(ui): remove unused conditional 2024-04-19 09:32:56 -04:00
4b2cd2da9f feat(ui): remove special handling of main prompt
Until we have a good handle on what works best, leaving this to the user
2024-04-19 09:32:56 -04:00
3c5b728bee feat(ui): add enabled state for RP 2024-04-19 09:32:56 -04:00
9b5c47748d tidy(ui): isRegionalPromptLayer -> isRPLayer 2024-04-19 09:32:56 -04:00
eb781272f7 tidy(ui): organize rp layer components 2024-04-19 09:32:56 -04:00
642a0de3dd feat(ui): organize layer naming
prep for non-rp layer types
2024-04-19 09:32:56 -04:00
f3b4cecf2e feat(ui): invert tensor mask instead of loading mask image and converting to tensor second time
minor efficiency improvement
2024-04-19 09:32:56 -04:00
499e7a7b74 chore(ui): typegen 2024-04-19 09:32:56 -04:00
aace364677 feat(nodes): add InvertTensorMaskInvocation 2024-04-19 09:32:56 -04:00
c195094e91 fix(ui): do not open panels when collapsed and window resize 2024-04-19 09:32:56 -04:00
e6c57edf87 tidy(ui): shuffle graph builder logic 2024-04-19 09:32:56 -04:00
c217e052a8 tidy(ui): remove unused action 2024-04-19 09:32:56 -04:00
964e2236b9 feat(ui): do not add promptless conditioning nodes 2024-04-19 09:32:56 -04:00
a6e64423d9 feat(ui): per-layer autonegative 2024-04-19 09:32:56 -04:00
d3aa97ab99 feat(ui): add copy graph button to queue item detail view 2024-04-19 09:32:56 -04:00
0d8edd67ab fix(ui): group lines together in undo history 2024-04-19 09:32:56 -04:00
d9dd00ea20 feat(ui): undo/redo in regional prompts
using the `redux-undo` library
2024-04-19 09:32:56 -04:00
170763899a tidy(ui): tidy regional prompts graph helper, add comments 2024-04-19 09:32:56 -04:00
9e1a4a4a07 feat(ui): regional prompts default layer opacity 2024-04-19 09:32:56 -04:00
dcb4a40741 fix(ui): regional prompts brush preview wonkiness 2024-04-19 09:32:56 -04:00
f8bf985256 perf(ui): do not recreate map callback on every render 2024-04-19 09:32:56 -04:00
81f29b9624 tidy(ui): remove errant console.log 2024-04-19 09:32:56 -04:00
f2dde9a035 feat(ui): cleared selected layer styling 2024-04-19 09:32:56 -04:00
85f4a066fb feat(ui): use logger for stage renderer 2024-04-19 09:32:56 -04:00
b9e6b7ba48 feat(ui): restore layer arrange functionality 2024-04-19 09:32:56 -04:00
085f7bdbee feat(ui): add invert negative mode
Adds an additional negative conditioning using the inverted mask of the positive conditioning and the positive prompt. May be useful for mutually exclusive regions.
2024-04-19 09:32:56 -04:00
e4fcb6627a feat(ui): style regional prompt boxes 2024-04-19 09:32:56 -04:00
47aa6357d1 tidy(ui): organize regional prompts files 2024-04-19 09:32:56 -04:00
b81030fe27 tidy(ui): remove unused exports 2024-04-19 09:32:56 -04:00
a1a9f0da73 tidy(ui): remove more unused files 2024-04-19 09:32:56 -04:00
8f4f3b773c tidy(ui): remove unused files, code 2024-04-19 09:32:56 -04:00
00737efc31 tidy(ui): tidy naming of regional prompt utils 2024-04-19 09:32:56 -04:00
5924dc6ff6 feat(ui): transparency on regional prompts canvas 2024-04-19 09:32:56 -04:00
246fabf2a0 feat(ui): scaling regional prompt canvas 2024-04-19 09:32:56 -04:00
30e3e12513 feat(ui): layouting regional prompts 2024-04-19 09:32:56 -04:00
a5bfe2dccb feat(ui): support negative regional prompt 2024-04-19 09:32:56 -04:00
aa6bfc8645 fix(ui): wip misc regional prompting ui 2024-04-19 09:32:56 -04:00
20ccdb6c8f fix(ui): remove extra type in nodestate 2024-04-19 09:32:56 -04:00
8caa7bc2b1 feat(ui): abstract out bbox renderer 2024-04-19 09:32:56 -04:00
ede8826757 feat(ui): remove dep on stage in mouse handlers 2024-04-19 09:32:56 -04:00
ff7aa2558a feat(ui): display prompt when debugging regions 2024-04-19 09:32:56 -04:00
c9bf00b80b feat(ui): restore invoke button (wip) 2024-04-19 09:32:56 -04:00
1f8f429d55 feat(ui): abstract layer renderer 2024-04-19 09:32:56 -04:00
d34e431002 feat(ui): abstract brush preview logic 2024-04-19 09:32:56 -04:00
cdb481e836 feat(ui): use konva generics for types in selector functions 2024-04-19 09:32:56 -04:00
525e6d697c feat(ui): re-implement with imperative konva api (wip) 2024-04-19 09:32:56 -04:00
bbbb5479e8 feat(ui): re-implement with imperative konva api (wip) 2024-04-19 09:32:56 -04:00
ae7797f662 feat(ui): re-implement with imperative konva api (wip) 2024-04-19 09:32:56 -04:00
05deeb68fa feat(ui): draft of graph helper for regional prompts 2024-04-19 09:32:56 -04:00
602a59066e fix(nodes): handle invert in alpha_mask_to_tensor 2024-04-19 09:32:56 -04:00
d1db6198b5 perf(ui): memoize & otherwise optimize regional prompts ui 2024-04-19 09:32:56 -04:00
944fa1a847 chore(ui): lint 2024-04-19 09:32:56 -04:00
52e7daffe7 feat(ui): selected layer styling 2024-04-19 09:32:56 -04:00
cf4c1750cb fix(ui): caching broke layer rendering 2024-04-19 09:32:56 -04:00
de7ecc8e3e feat(ui): tweak bbox styling 2024-04-19 09:32:56 -04:00
6c0481ef51 fix(ui): do not reset layer position when toggling visibility 2024-04-19 09:32:56 -04:00
b9d0da44eb feat(ui): wip layer transparency 2024-04-19 09:32:56 -04:00
0a42d7d510 docs(ui): update docstrings for helper function 2024-04-19 09:32:56 -04:00
c1aae0815d feat(ui): regional prompting layout, styling 2024-04-19 09:32:56 -04:00
e7523bd1d9 fix(ui): fix layer debug 2024-04-19 09:32:56 -04:00
8911017bd1 feat(ui): selectable & draggable layers 2024-04-19 09:32:56 -04:00
fc26f3e430 feat(nodes): add alpha mask to tensor invocation 2024-04-19 09:32:56 -04:00
c89a24d1ea feat(ui): add util to get blobs from layers 2024-04-19 09:32:56 -04:00
52ba4966c9 feat(ui): wip regional prompting UI
- Add eraser tool, applies per layer
2024-04-19 09:32:56 -04:00
822dfa77fc feat(ui): wip regional prompting UI
- Arrange layers
- Layer visibility
- Layered brush preview
- Cleanup
2024-04-19 09:32:56 -04:00
83d359b681 feat(ui): wip regional prompting UI 2024-04-19 09:32:56 -04:00
f87eee810b feat(ui): rough out regional prompts components 2024-04-19 09:32:56 -04:00
1d1e4d02dc feat(ui): rough out regional prompts store 2024-04-19 09:32:56 -04:00
2b9f06dc4c Re-enable app shutdown actions (#6244)
* closes #6242

* only override sigINT during slow model scanning

* fix ruff formatting

---------

Co-authored-by: Lincoln Stein <lstein@gmail.com>
2024-04-19 06:45:42 -04:00
a35386f24c fix: IP Adapter Method having incorrect informational popover 2024-04-18 13:37:55 -04:00
ac1071a5e5 chore: v4.1.0 2024-04-18 07:19:22 +10:00
5295a398f3 translationBot(ui): update translation (Italian)
Currently translated at 98.4% (1122 of 1140 strings)

Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translation: InvokeAI/Web UI
2024-04-17 08:41:57 +10:00
0c7283c82d translationBot(ui): update translation (Turkish)
Currently translated at 50.8% (580 of 1140 strings)

translationBot(ui): update translation (Korean)

Currently translated at 43.3% (494 of 1140 strings)

translationBot(ui): update translation (Chinese (Simplified))

Currently translated at 80.9% (923 of 1140 strings)

translationBot(ui): update translation (Russian)

Currently translated at 98.8% (1127 of 1140 strings)

translationBot(ui): update translation (Dutch)

Currently translated at 63.7% (727 of 1140 strings)

translationBot(ui): update translation (Japanese)

Currently translated at 50.4% (575 of 1140 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.3% (1121 of 1140 strings)

translationBot(ui): update translation (Spanish)

Currently translated at 27.8% (317 of 1140 strings)

translationBot(ui): update translation (German)

Currently translated at 72.2% (824 of 1140 strings)

Co-authored-by: Anonymous <noreply@weblate.org>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/de/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/es/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ja/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ko/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/nl/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ru/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/tr/
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/zh_Hans/
Translation: InvokeAI/Web UI
2024-04-17 08:41:57 +10:00
73ad173c74 update labels for Style Only and CompositionOnly to be designated as beta 2024-04-17 08:29:10 +10:00
c828a4e59f Add IP Adapter Style & Composition Modes (#6213)
## Summary

Until now IP Adapter had complete control on the contents of the output.
With this PR, users are now able to select "Style Only" or "Composition
Only" to draw just the style or layout of the reference image.

Based off: https://arxiv.org/abs/2404.02733

### New IP Method Option

- `Full` - Both style and layout of the refence image are used.
- `Style Only` - Only the style of the image is used
- `Composition Only` - Only the composition of the image is used.


![opera_0BkqZTwObO](https://github.com/invoke-ai/InvokeAI/assets/54517381/1b2fbbba-44c9-4c25-87cb-3711a17d13e3)

### Example Result


![demo](https://github.com/invoke-ai/InvokeAI/assets/54517381/703f3de5-e685-4691-acda-9338a4c10796)

### Notes

- Supports both SDXL and SD1.5

### Testing

- Just check and test if it works as expected with all IP Adapter models
- both SDXL and SD1.5

## Merge Plan

Good to merge once tested for all edge cases.
2024-04-16 14:23:36 -04:00
6bab040d24 Merge branch 'main' into ip-adapter-style-comp 2024-04-16 21:14:06 +05:30
f46bbaf8c4 fix: make ip-adapter weights not be optional 2024-04-16 21:12:45 +05:30
fce6b3e44c maybe solve race issue 2024-04-16 13:09:26 +10:00
d27907cc6d fix: entire reshaping block needs to be skipped 2024-04-16 04:29:53 +05:30
7ee3fef2db cleanup: better var names for the ip adapter weight collection block 2024-04-16 04:23:50 +05:30
b39ce642b6 cleanup: raise ValueErrors when target_blocks dont match base model 2024-04-16 04:12:30 +05:30
a148c4322c fix: IP Adapter weights being incorrectly applied
They were being overwritten rather than being appended
2024-04-16 04:10:41 +05:30
f6b7bc5d98 fix: Dynamically adapt height of control adapter opts 2024-04-16 01:18:43 +05:30
5f6c6abf9c chore: change IPAdapterAttentionWeights to a dataclass 2024-04-15 23:38:55 +05:30
cd76a31a8f fix: IP Adapter method not being recalled 2024-04-15 22:29:32 +05:30
e93f4d632d [util] Add generic torch device class (#6174)
* introduce new abstraction layer for GPU devices

* add unit test for device abstraction

* fix ruff

* convert TorchDeviceSelect into a stateless class

* move logic to select context-specific execution device into context API

* add mock hardware environments to pytest

* remove dangling mocker fixture

* fix unit test for running on non-CUDA systems

* remove unimplemented get_execution_device() call

* remove autocast precision

* Multiple changes:

1. Remove TorchDeviceSelect.get_execution_device(), as well as calls to
   context.models.get_execution_device().
2. Rename TorchDeviceSelect to TorchDevice
3. Added back the legacy public API defined in `invocation_api`, including
   choose_precision().
4. Added a config file migration script to accommodate removal of precision=autocast.

* add deprecation warnings to choose_torch_device() and choose_precision()

* fix test crash

* remove app_config argument from choose_torch_device() and choose_torch_dtype()

---------

Co-authored-by: Lincoln Stein <lstein@gmail.com>
2024-04-15 13:12:49 +00:00
5a8489bbfc perf(ui): memoize infill components 2024-04-15 22:50:54 +10:00
a24c9d0f7a perf(ui): optimize useFeatureStatus 2024-04-15 22:50:54 +10:00
7a92afc117 perf(ui): fix rerenders in nodes
Unmemoized selector tanking perf
2024-04-15 22:50:54 +10:00
b508945b11 feat(ui): edge labels
Add setting to render labels with format `Source Node label -> Target Node label` on edges.
2024-04-15 22:48:46 +10:00
7cf788e658 Update deps to their lastest versions (#6178)
* Update deps to their lastest versions

* missed huggingface_hub

* bump accelerate

---------

Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com>
2024-04-15 00:48:39 +00:00
06bc38d3f4 Remove tag excluder 2024-04-15 09:14:49 +10:00
d3b0212da5 Scope project files to src dir (enables --production) 2024-04-15 09:14:49 +10:00
c2b79ce14c Replace @knipignore with paths config 2024-04-15 09:14:49 +10:00
70185b0173 translationBot(ui): update translation (Russian)
Currently translated at 99.5% (1128 of 1133 strings)

Co-authored-by: Васянатор <ilabulanov339@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/ru/
Translation: InvokeAI/Web UI
2024-04-15 09:12:38 +10:00
a83a0c6146 translationBot(ui): update translation (Chinese (Simplified))
Currently translated at 81.5% (924 of 1133 strings)

Co-authored-by: 怀瑾 <symant233@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/zh_Hans/
Translation: InvokeAI/Web UI
2024-04-15 09:12:38 +10:00
12f41039cc translationBot(ui): update translation (Italian)
Currently translated at 98.4% (1122 of 1140 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.4% (1120 of 1138 strings)

translationBot(ui): update translation (Italian)

Currently translated at 98.4% (1115 of 1133 strings)

Co-authored-by: Riccardo Giovanetti <riccardo.giovanetti@gmail.com>
Translate-URL: https://hosted.weblate.org/projects/invokeai/web-ui/it/
Translation: InvokeAI/Web UI
2024-04-15 09:12:38 +10:00
b3b5b7e261 Include hardcoded count of one to avoid translation issues on missing keys 2024-04-15 09:10:15 +10:00
f706a13230 Adjust gallery image length handling 2024-04-15 09:10:15 +10:00
22c6400bb8 Refactor i18n pluralization 2024-04-15 09:10:15 +10:00
1ca152f6c8 Apply eslint/prettier fixes 2024-04-15 09:10:15 +10:00
982e255878 Add dynamic label to delete button located at the top toolbar 2024-04-15 09:10:15 +10:00
7899149144 Remove unnecessary word 2024-04-15 09:10:15 +10:00
bef97b46bf Apply eslint/prettier fixes 2024-04-15 09:10:15 +10:00
cc256fee0e Modify the modal title to include selected image array length 2024-04-15 09:10:15 +10:00
ec69a58c8d Include plural variation for delete image modal title 2024-04-15 09:10:15 +10:00
ec67ba61db Pass an array of selected images instead of imageDTO 2024-04-15 09:10:15 +10:00
66126996e7 Import image selection 2024-04-15 09:10:15 +10:00
4eb66a9198 remove hires fix badge from settings when using sdxl 2024-04-15 07:57:58 +10:00
14e41a1fd9 Remove unnecessary whitespace 2024-04-15 07:54:36 +10:00
fc55522003 Import hook in the main App script 2024-04-15 07:54:36 +10:00
cd6d8ae9cc Add a hook as a singleton to update favicon and title upon queueSize change 2024-04-15 07:54:36 +10:00
2933eb594d Remove unnecessary code 2024-04-15 07:54:36 +10:00
4e08fab3f5 Apply brand red color and a black border 2024-04-15 07:54:36 +10:00
8bca7e2aa2 Apply eslint/prettier fixes 2024-04-15 07:54:36 +10:00
3706cf0ad4 Add JSDoc strings 2024-04-15 07:54:36 +10:00
a459361376 Modify the processing to consider the active queue length instead of in_progress only 2024-04-15 07:54:36 +10:00
bb330d50a6 Increase favicon alert detail size 2024-04-15 07:54:36 +10:00
102cb62960 Apply eslint/prettier fixes 2024-04-15 07:54:36 +10:00
8eeab22ecd Replace let with const 2024-04-15 07:54:36 +10:00
4343852b83 Update HTML page title and favicon upon queue item event 2024-04-15 07:54:36 +10:00
0a9bf25bff Implement updatePageTitle and updatePageFavicon methods 2024-04-15 07:54:36 +10:00
4cd09850b8 Add ID to the HTML link element 2024-04-15 07:54:36 +10:00
dbc586e0b2 Add alert variation for Invoke favicon 2024-04-15 07:54:36 +10:00
8426f1e7b2 fix(experimental): Possible fix for conflict with regional embed length mismatch
Pushing this so people can test it out and see if this needs to be handled in a different way.
2024-04-14 12:19:19 +05:30
c2e3c61f28 fix recall all when loras, controls, or hrf arent present 2024-04-14 16:49:14 +10:00
fbfa29c2ef Update GALLERY.md 2024-04-14 16:46:31 +10:00
9ee7b951eb Update GALLERY.md 2024-04-14 16:46:31 +10:00
29dd1bb35b Update GALLERY.md 2024-04-14 16:46:31 +10:00
68d8a2497e Update GALLERY.md 2024-04-14 16:46:31 +10:00
4b171fa696 Creation of GALLERY.md and related images
First draft of the walkthrough of the Gallery right-hand panel
2024-04-14 16:46:31 +10:00
d0beb45431 Create GALLERY.md 2024-04-14 16:46:31 +10:00
e724781a80 Update WEB.md
Correct stated location of Gallery panel.
2024-04-14 16:46:31 +10:00
636ece323f Update INSTALL_DEVELOPMENT.md 2024-04-14 15:24:00 +10:00
77b3281f08 prettier 2024-04-14 15:22:33 +10:00
bd7c8cd517 added info popover back to model, updated description hover to combobox only 2024-04-14 15:22:33 +10:00
489d485907 added missing description to control adapters hover 2024-04-14 15:22:33 +10:00
6eed5ad531 added button for hiding bounding box 2024-04-14 15:22:33 +10:00
9cb0f63c44 refactor: fix a bunch of type issues in custom_attention 2024-04-13 14:17:25 +05:30
2d5786d3bb fix: Incorrect composition blocks for SD1.5 2024-04-13 13:52:10 +05:30
27466ffa1a chore: update the ip adapter node version 2024-04-13 13:39:08 +05:30
f50b156511 chore: do not include custom nodes in schema 2024-04-13 12:43:49 +05:30
9fc73743b2 feat: support SD1.5 2024-04-13 12:30:39 +05:30
d4393e4170 chore: linter fixes 2024-04-13 12:14:45 +05:30
145a0b029e Merge branch 'ip-adapter-style-comp' of https://github.com/blessedcoolant/InvokeAI into ip-adapter-style-comp 2024-04-13 12:13:06 +05:30
f2506cc769 chore: ruff fixes
Revert "chore: ruff fixes"

This reverts commit af36fe8c1e.

Revert "chore: ruff fixes"

This reverts commit af36fe8c1e.
2024-04-13 12:12:33 +05:30
7a67fd6a06 Revert "chore: ruff fixes"
This reverts commit af36fe8c1e.
2024-04-13 12:10:20 +05:30
af36fe8c1e chore: ruff fixes 2024-04-13 12:08:52 +05:30
e9f16ac8c7 feat: add UI for IP Adapter Method 2024-04-13 12:06:59 +05:30
6ea183f0d4 wip: Initial Implementation IP Adapter Style & Comp Modes 2024-04-13 11:09:45 +05:30
24f2cde862 Remove type="submit" from all tsx files.
Fixes a problem on firefox, at least for me.
2024-04-12 09:09:32 +10:00
b18442ded4 fix(queue): poll queue on finished queue item
When a queue item is finished (completed, canceled, failed), immediately poll the queue for the next queue item.

Closes #6189
2024-04-12 07:31:47 +10:00
651c0b39b1 clear cache on all exceptions 2024-04-12 07:19:16 +10:00
46d23cd868 catch RunTimeError during model to() call rather than OutOfMemoryError 2024-04-12 07:19:16 +10:00
dedf0c6ffa fix ruff issues 2024-04-12 07:19:16 +10:00
579082ac10 [mm] clear the cache entry for a model that got an OOM during loading 2024-04-12 07:19:16 +10:00
7bc77ddb40 fix(nodes): doubly-noised latents
When using refiner with a mask (i.e. inpainting), we don't have noise provided as an input to the node.

This situation uniquely hits a code path that wasn't reviewed when gradient denoising was implemented.

That code path does two things wrong:
- It lerp'd the input latents. This was fixed in 5a1f4cb1ce.
- It added noise to the latents an extra time. This is fixed in this change.

We don't need to add noise in `latents_from_embeddings` because we do it just a lines later in `AddsMaskGuidance`.

- Remove the extraneous call to `add_noise`
- Make `seed` a required arg. We never call the function without seed anyways. If we refactor this in the future, it will be clearer that we need to look at how seed is handled.
- Move the call to create the noise to a deeper conditional, just before we call `AddsMaskGuidance`. The created noise tensor is now only used in that function, no need to create it every time.

Note: Whether or not having both noise and latents as inputs on the node is correct is a separate conversation. This change just fixes the issue with the current setup.
2024-04-11 07:21:50 -04:00
026d095afe fix(nodes): do not set seed on output latents from denoise latents
`LatentsField` objects have an optional `seed` field. This should only be populated when the latents are noise, generated from a seed.

`DenoiseLatentsInvocation` needs a seed value for scheduler initialization. It's used in a few places, and there is some logic for determining the seed to use with a series of fallbacks:
- Use the seed from the noise (a `LatentsField` object)
- Use the seed from the latents (a `LatentsField` object - normally it won't have a seed)
- Use `0` as a final fallback

In `DenoisLatentsInvocation`, we set the seed in the `LatentsOutput`, even though the output latents are not noise.

This is normally fine, but when we use refiner, we re-use the those same latents for the refiner denoise. This causes that characteristic same-seed-fried look on the refiner pass.

Simple fix - do not set the field in the output latents.
2024-04-11 07:21:50 -04:00
7e2ade50e1 fix(ui): canvas staging area & batch handling fixes
Handful of intertwined fixes.

- Create and use helper function to reset staging area.
- Clear staging area when queue items are canceled, failed, cleared, etc. Fixes a bug where the bbox ends up offset and images are put into the wrong spot.
- Fix a number of similar bugs where canvas would "forget" it had pending generations, but they continued to generate. Canvas needs to track batches that should be displayed in it using `state.canvas.batchIds`, and this was getting cleared without actually canceling those batches.
- Disable the `discard current image` button on canvas if there is only one image. Prevents accidentally canceling all canvas batches if you spam the button.
2024-04-10 21:48:34 +10:00
c0d54d5414 Revert "always enqueue with fresh bounding box"
This reverts commit fae51da278b39c61cbbea5de88661b4bc546f1ce.
2024-04-10 21:48:34 +10:00
98bfbb73ac always enqueue with fresh bounding box 2024-04-10 21:48:34 +10:00
f9af32a6d1 Fix the padding behavior when max-pooling regional IP-Adapter masks to mirror the downscaling behavior of SD and SDXL. Prior to this change, denoising with input latent dimensions that were not evenly divisible by 8 would raise an exception. 2024-04-09 16:50:43 -04:00
fba40eb1bd Fix the padding behavior when max-pooling regional prompt masks to mirror the downscaling behavior of SD and SDXL. Prior to this change, denoising with input latent dimensions that were not evenly divisible by 8 would raise an exception. 2024-04-09 16:50:43 -04:00
69f6c24f52 Fix field ordering (#6186)
Changed fields to go in w/h x/y order.

## Summary

The prior ordering of height, then width, and y, then x, doesn't match
up with the expected UX. This has been changed.

## Checklist

- [X] _The PR has a short but descriptive title, suitable for a
changelog_
- [ ] _Tests added / updated (if applicable)_
- [ ] _Documentation added / updated (if applicable)_
2024-04-10 01:00:22 +05:30
80d631118d Fix field ordering
Changed fields to go in w/h x/y order.
2024-04-09 14:17:55 -05:00
0c6dd32ece (minor) Fix IP-Adapter conditional logic in CustomAttnProcessor2_0. 2024-04-09 15:06:51 -04:00
0bdbfd4d1d Add support for IP-Adapter masks. 2024-04-09 15:06:51 -04:00
2e27ed5f3d Pass IP-Adapter scales through the cross_attn_kwargs pathway, since they are the same for all attention layers. This change also helps to prepare for adding IP-Adapter region masks. 2024-04-09 15:06:51 -04:00
babdc64b17 (minor) Fix typo in IP-Adapter field description. 2024-04-09 15:06:51 -04:00
54327ec4a7 Remove documentation references to prompt-to-prompt cross-attention control. 2024-04-09 10:57:02 -04:00
4a828818da Remove support for Prompt-to-Prompt cross-attention control (aka .swap()). This feature is not widely used. It does not work with SDXL and is incompatible with IP-Adapter and regional prompting. The implementation is also intertwined with both text embedding and the UNet attention layers, resulting in a high maintenance burden. For all of these reasons, we have decided to drop support. 2024-04-09 10:57:02 -04:00
fe386252f3 Revert "feat(nodes): add prompt region from image nodes"
This reverts commit 3a531c5097.
2024-04-09 08:12:12 -04:00
182810337c Add utility to_standard_float_mask(...) to convert various mask formats to a standardized format. 2024-04-09 08:12:12 -04:00
338bf808d6 Rename MaskField to be a generice TensorField. 2024-04-09 08:12:12 -04:00
5b5a4204a1 Fix dimensions of mask produced by ExtractMasksAndPromptsInvocation. Also, added a clearer error message in case the same error is introduced in the future. 2024-04-09 08:12:12 -04:00
75ef473748 Pull the upstream changes from diffusers' AttnProcessor2_0 into CustomAttnProcessor2_0. This fixes a bug in CustomAttnProcessor2_0 that was being triggered when peft was not installed. The bug was present in a block of code that was previously copied from diffusers. The bug seems to have been introduced during diffusers' migration to PEFT for their LoRA handling. The upstream bug was fixed in 531e719163. 2024-04-09 08:12:12 -04:00
926b8d0efe feat(nodes): add prompt region from image nodes 2024-04-09 08:12:12 -04:00
9d9d1761f3 (minor) The latest ruff version has _slightly_ different formatting preferences. 2024-04-09 08:12:12 -04:00
a78df8123f Update the diffusion logic to use the new regional prompting feature. 2024-04-09 08:12:12 -04:00
7ca677578e Create a UNetAttentionPatcher for patching UNet models with CustomAttnProcessor2_0 modules. 2024-04-09 08:12:12 -04:00
31c456c1e6 Update CustomAttention to support both IP-Adapters and regional prompting. 2024-04-09 08:12:12 -04:00
2ce79b61f5 Initialize a RegionalPromptAttnProcessor2_0 class by copying AttnProcessor2_0 from diffusers. 2024-04-09 08:12:12 -04:00
109e3f0e7f Add RegionalPromptData class for managing prompt region masks. 2024-04-09 08:12:12 -04:00
dc64fec771 Add support for lists of prompt embeddings to be passed to the DenoiseLatents invocation, and add handling of the conditioning region masks in DenoiseLatents. 2024-04-09 08:12:12 -04:00
d1e45585d0 Add TextConditioningRegions to the TextConditioningData data structure. 2024-04-09 08:12:12 -04:00
aba023e0c5 Improve documentation of conditioning_data.py. 2024-04-09 08:12:12 -04:00
e354c29b52 Rename ConditioningData -> TextConditioningData. 2024-04-09 08:12:12 -04:00
a7f363e654 Split ip_adapter_conditioning out from ConditioningData. 2024-04-09 08:12:12 -04:00
9b2162e564 Remove scheduler_args from ConditioningData structure. 2024-04-09 08:12:12 -04:00
4e64b26702 Update compel nodes to accept an optional prompt mask. 2024-04-09 08:12:12 -04:00
c22d772062 Add RectangleMaskInvocation. 2024-04-09 08:12:12 -04:00
d6be7662c9 Add a MaskField primitive, and add a mask to the ConditioningField primitive type. 2024-04-09 08:12:12 -04:00
95050088d1 chore: lint fixes 2024-04-09 14:13:10 +10:00
94b5084cd5 fix: one man's max is another man's min 2024-04-09 14:13:10 +10:00
ca0d60bee6 fix: set coherence denoise to 0.2 min for refiner models 2024-04-09 14:13:10 +10:00
fd1f240853 fix: SDXL Refiner not working properly with Inpainting 2024-04-09 14:13:10 +10:00
381b41a56e fix: Update SDXL Refiner graphs to use Gradient Mask 2024-04-09 14:13:10 +10:00
b58494c420 feat(ui): add graph-to-workflow debug helper
This is intended for debug usage, so it's hidden away in the workflow library `...` menu. Hold shift to see the button for it.

- Paste a graph (from a network request, for example) and then click the convert button to convert it to a workflow.
- Disable auto layout to stack the nodes with an offset (try it out). If you change this, you must re-convert to get the changes.
- Edit the workflow JSON if you need to tweak something before loading it.
2024-04-08 20:38:04 -04:00
dca30d5462 (feat) add a method to get the path of an image from the invocation context
Fixes #6175
2024-04-08 18:42:55 +10:00
9ab6655491 feat(backend): clean up choose_precision
- Allow user-defined precision on MPS.
- Use more explicit logic to handle all possible cases.
- Add comments.
- Remove the app_config args (they were effectively unused, just get the config using the singleton getter util)
2024-04-07 09:41:05 -04:00
29cfe5a274 fix(ui): handle multipleOf on number fields
This data is already in the template but it wasn't ever used.

One big place where this improves UX is the noise node. Previously, the UI let you change width and height in increments of 1, despite the template requiring a multiple of 8. It now works in multiples of 8.
2024-04-06 13:15:20 -04:00
07cb6c944e chore(ui): typegen 2024-04-03 17:18:12 +11:00
1d45ef529b fix(ui): move tcd scheduler to current zod schemas
It was in the v2 schemas which should be immutable and only used for migrations
2024-04-03 17:08:02 +11:00
0259114d9c Merge branch 'main' into main 2024-04-03 17:03:19 +11:00
51e515b925 tidy: use lowercase for tcd scheduler identifier 2024-04-03 17:03:02 +11:00
8c509295f9 chore: ruff 2024-04-03 17:02:45 +11:00
23da3de915 Update constants.ts 2024-03-29 12:39:08 +01:00
97579770e1 Update common.ts 2024-03-29 12:35:42 +01:00
1a83936cdd Merge branch 'invoke-ai:main' into main 2024-03-29 11:14:28 +01:00
80e311a069 Update schedulers.py 2024-03-28 22:52:15 +01:00
b6e6bdc195 Update schedulers.py 2024-03-28 22:51:59 +01:00
462 changed files with 22426 additions and 10358 deletions

495
README.md
View File

@ -2,21 +2,102 @@
![project hero](https://github.com/invoke-ai/InvokeAI/assets/31807370/6e3728c7-e90e-4711-905c-3b55844ff5be)
# Invoke - Professional Creative AI Tools for Visual Media
## To learn more about Invoke, or implement our Business solutions, visit [invoke.com](https://www.invoke.com/about)
# Invoke - Professional Creative AI Tools for Visual Media
#### To learn more about Invoke, or implement our Business solutions, visit [invoke.com]
[![discord badge]][discord link]
[![discord badge]][discord link] [![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link] [![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link] [![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link]
[![latest release badge]][latest release link] [![github stars badge]][github stars link] [![github forks badge]][github forks link]
</div>
[![CI checks on main badge]][CI checks on main link] [![latest commit to main badge]][latest commit to main link]
Invoke is a leading creative engine built to empower professionals and enthusiasts alike. Generate and create stunning visual media using the latest AI-driven technologies. Invoke offers an industry leading web-based UI, and serves as the foundation for multiple commercial products.
[![github open issues badge]][github open issues link] [![github open prs badge]][github open prs link] [![translation status badge]][translation status link]
[Installation and Updates][installation docs] - [Documentation and Tutorials][docs home] - [Bug Reports][github issues] - [Contributing][contributing docs]
<div align="center">
![Highlighted Features - Canvas and Workflows](https://github.com/invoke-ai/InvokeAI/assets/31807370/708f7a82-084f-4860-bfbe-e2588c53548d)
</div>
## Quick Start
1. Download and unzip the installer from the bottom of the [latest release][latest release link].
2. Run the installer script.
- **Windows**: Double-click on the `install.bat` script.
- **macOS**: Open a Terminal window, drag the file `install.sh` from Finder into the Terminal, and press enter.
- **Linux**: Run `install.sh`.
3. When prompted, enter a location for the install and select your GPU type.
4. Once the install finishes, find the directory you selected during install. The default location is `C:\Users\Username\invokeai` for Windows or `~/invokeai` for Linux/macOS.
5. Run the launcher script (`invoke.bat` for Windows, `invoke.sh` for macOS and Linux) the same way you ran the installer script in step 2.
6. Select option 1 to start the application. Once it starts up, open your browser and go to <http://localhost:9090>.
7. Open the model manager tab to install a starter model and then you'll be ready to generate.
More detail, including hardware requirements and manual install instructions, are available in the [installation documentation][installation docs].
## Troubleshooting, FAQ and Support
Please review our [FAQ][faq] for solutions to common installation problems and other issues.
For more help, please join our [Discord][discord link].
## Features
Full details on features can be found in [our documentation][features docs].
### Web Server & UI
Invoke runs a locally hosted web server & React UI with an industry-leading user experience.
### Unified Canvas
The Unified Canvas is a fully integrated canvas implementation with support for all core generation capabilities, in/out-painting, brush tools, and more. This creative tool unlocks the capability for artists to create with AI as a creative collaborator, and can be used to augment AI-generated imagery, sketches, photography, renders, and more.
### Workflows & Nodes
Invoke offers a fully featured workflow management solution, enabling users to combine the power of node-based workflows with the easy of a UI. This allows for customizable generation pipelines to be developed and shared by users looking to create specific workflows to support their production use-cases.
### Board & Gallery Management
Invoke features an organized gallery system for easily storing, accessing, and remixing your content in the Invoke workspace. Images can be dragged/dropped onto any Image-base UI element in the application, and rich metadata within the Image allows for easy recall of key prompts or settings used in your workflow.
### Other features
- Support for both ckpt and diffusers models
- SD1.5, SD2.0, and SDXL support
- Upscaling Tools
- Embedding Manager & Support
- Model Manager & Support
- Workflow creation & management
- Node-Based Architecture
## Contributing
Anyone who wishes to contribute to this project - whether documentation, features, bug fixes, code cleanup, testing, or code reviews - is very much encouraged to do so.
Get started with contributing by reading our [contribution documentation][contributing docs], joining the [#dev-chat] or the GitHub discussion board.
We hope you enjoy using Invoke as much as we enjoy creating it, and we hope you will elect to become part of our community.
## Thanks
Invoke is a combined effort of [passionate and talented people from across the world][contributors]. We thank them for their time, hard work and effort.
Original portions of the software are Copyright © 2024 by respective contributors.
[features docs]: https://invoke-ai.github.io/InvokeAI/features/
[faq]: https://invoke-ai.github.io/InvokeAI/help/FAQ/
[contributors]: https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/
[invoke.com]: https://www.invoke.com/about
[github issues]: https://github.com/invoke-ai/InvokeAI/issues
[docs home]: https://invoke-ai.github.io/InvokeAI
[installation docs]: https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/
[#dev-chat]: https://discord.com/channels/1020123559063990373/1049495067846524939
[contributing docs]: https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/
[CI checks on main badge]: https://flat.badgen.net/github/checks/invoke-ai/InvokeAI/main?label=CI%20status%20on%20main&cache=900&icon=github
[CI checks on main link]:https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain
[CI checks on main link]: https://github.com/invoke-ai/InvokeAI/actions?query=branch%3Amain
[discord badge]: https://flat.badgen.net/discord/members/ZmtBAhwWhy?icon=discord
[discord link]: https://discord.gg/ZmtBAhwWhy
[github forks badge]: https://flat.badgen.net/github/forks/invoke-ai/InvokeAI?icon=github
@ -30,402 +111,6 @@
[latest commit to main badge]: https://flat.badgen.net/github/last-commit/invoke-ai/InvokeAI/main?icon=github&color=yellow&label=last%20dev%20commit&cache=900
[latest commit to main link]: https://github.com/invoke-ai/InvokeAI/commits/main
[latest release badge]: https://flat.badgen.net/github/release/invoke-ai/InvokeAI/development?icon=github
[latest release link]: https://github.com/invoke-ai/InvokeAI/releases
[latest release link]: https://github.com/invoke-ai/InvokeAI/releases/latest
[translation status badge]: https://hosted.weblate.org/widgets/invokeai/-/svg-badge.svg
[translation status link]: https://hosted.weblate.org/engage/invokeai/
</div>
InvokeAI is a leading creative engine built to empower professionals
and enthusiasts alike. Generate and create stunning visual media using
the latest AI-driven technologies. InvokeAI offers an industry leading
Web Interface, interactive Command Line Interface, and also serves as
the foundation for multiple commercial products.
**Quick links**: [[How to
Install](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/)] [<a
href="https://discord.gg/ZmtBAhwWhy">Discord Server</a>] [<a
href="https://invoke-ai.github.io/InvokeAI/">Documentation and
Tutorials</a>]
[<a href="https://github.com/invoke-ai/InvokeAI/issues">Bug Reports</a>]
[<a
href="https://github.com/invoke-ai/InvokeAI/discussions">Discussion,
Ideas & Q&A</a>]
[<a
href="https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/">Contributing</a>]
<div align="center">
![Highlighted Features - Canvas and Workflows](https://github.com/invoke-ai/InvokeAI/assets/31807370/708f7a82-084f-4860-bfbe-e2588c53548d)
</div>
## Table of Contents
Table of Contents 📝
**Getting Started**
1. 🏁 [Quick Start](#quick-start)
3. 🖥️ [Hardware Requirements](#hardware-requirements)
**More About Invoke**
1. 🌟 [Features](#features)
2. 📣 [Latest Changes](#latest-changes)
3. 🛠️ [Troubleshooting](#troubleshooting)
**Supporting the Project**
1. 🤝 [Contributing](#contributing)
2. 👥 [Contributors](#contributors)
3. 💕 [Support](#support)
## Quick Start
For full installation and upgrade instructions, please see:
[InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/INSTALLATION/)
If upgrading from version 2.3, please read [Migrating a 2.3 root
directory to 3.0](#migrating-to-3) first.
### Automatic Installer (suggested for 1st time users)
1. Go to the bottom of the [Latest Release Page](https://github.com/invoke-ai/InvokeAI/releases/latest)
2. Download the .zip file for your OS (Windows/macOS/Linux).
3. Unzip the file.
4. **Windows:** double-click on the `install.bat` script. **macOS:** Open a Terminal window, drag the file `install.sh` from Finder
into the Terminal, and press return. **Linux:** run `install.sh`.
5. You'll be asked to confirm the location of the folder in which
to install InvokeAI and its image generation model files. Pick a
location with at least 15 GB of free memory. More if you plan on
installing lots of models.
6. Wait while the installer does its thing. After installing the software,
the installer will launch a script that lets you configure InvokeAI and
select a set of starting image generation models.
7. Find the folder that InvokeAI was installed into (it is not the
same as the unpacked zip file directory!) The default location of this
folder (if you didn't change it in step 5) is `~/invokeai` on
Linux/Mac systems, and `C:\Users\YourName\invokeai` on Windows. This directory will contain launcher scripts named `invoke.sh` and `invoke.bat`.
8. On Windows systems, double-click on the `invoke.bat` file. On
macOS, open a Terminal window, drag `invoke.sh` from the folder into
the Terminal, and press return. On Linux, run `invoke.sh`
9. Press 2 to open the "browser-based UI", press enter/return, wait a
minute or two for Stable Diffusion to start up, then open your browser
and go to http://localhost:9090.
10. Type `banana sushi` in the box on the top left and click `Invoke`
### Command-Line Installation (for developers and users familiar with Terminals)
You must have Python 3.10 through 3.11 installed on your machine. Earlier or
later versions are not supported.
Node.js also needs to be installed along with `pnpm` (can be installed with
the command `npm install -g pnpm` if needed)
1. Open a command-line window on your machine. The PowerShell is recommended for Windows.
2. Create a directory to install InvokeAI into. You'll need at least 15 GB of free space:
```terminal
mkdir invokeai
````
3. Create a virtual environment named `.venv` inside this directory and activate it:
```terminal
cd invokeai
python -m venv .venv --prompt InvokeAI
```
4. Activate the virtual environment (do it every time you run InvokeAI)
_For Linux/Mac users:_
```sh
source .venv/bin/activate
```
_For Windows users:_
```ps
.venv\Scripts\activate
```
5. Install the InvokeAI module and its dependencies. Choose the command suited for your platform & GPU.
_For Windows/Linux with an NVIDIA GPU:_
```terminal
pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu121
```
_For Linux with an AMD GPU:_
```sh
pip install InvokeAI --use-pep517 --extra-index-url https://download.pytorch.org/whl/rocm5.6
```
_For non-GPU systems:_
```terminal
pip install InvokeAI --use-pep517 --extra-index-url https://download.pytorch.org/whl/cpu
```
_For Macintoshes, either Intel or M1/M2/M3:_
```sh
pip install InvokeAI --use-pep517
```
6. Configure InvokeAI and install a starting set of image generation models (you only need to do this once):
```terminal
invokeai-configure --root .
```
Don't miss the dot at the end!
7. Launch the web server (do it every time you run InvokeAI):
```terminal
invokeai-web
```
8. Point your browser to http://localhost:9090 to bring up the web interface.
9. Type `banana sushi` in the box on the top left and click `Invoke`.
Be sure to activate the virtual environment each time before re-launching InvokeAI,
using `source .venv/bin/activate` or `.venv\Scripts\activate`.
## Detailed Installation Instructions
This fork is supported across Linux, Windows and Macintosh. Linux
users can use either an Nvidia-based card (with CUDA support) or an
AMD card (using the ROCm driver). For full installation and upgrade
instructions, please see:
[InvokeAI Installation Overview](https://invoke-ai.github.io/InvokeAI/installation/INSTALL_SOURCE/)
<a name="migrating-to-3"></a>
### Migrating a v2.3 InvokeAI root directory
The InvokeAI root directory is where the InvokeAI startup file,
installed models, and generated images are stored. It is ordinarily
named `invokeai` and located in your home directory. The contents and
layout of this directory has changed between versions 2.3 and 3.0 and
cannot be used directly.
We currently recommend that you use the installer to create a new root
directory named differently from the 2.3 one, e.g. `invokeai-3` and
then use a migration script to copy your 2.3 models into the new
location. However, if you choose, you can upgrade this directory in
place. This section gives both recipes.
#### Creating a new root directory and migrating old models
This is the safer recipe because it leaves your old root directory in
place to fall back on.
1. Follow the instructions above to create and install InvokeAI in a
directory that has a different name from the 2.3 invokeai directory.
In this example, we will use "invokeai-3"
2. When you are prompted to select models to install, select a minimal
set of models, such as stable-diffusion-v1.5 only.
3. After installation is complete launch `invokeai.sh` (Linux/Mac) or
`invokeai.bat` and select option 8 "Open the developers console". This
will take you to the command line.
4. Issue the command `invokeai-migrate3 --from /path/to/v2.3-root --to
/path/to/invokeai-3-root`. Provide the correct `--from` and `--to`
paths for your v2.3 and v3.0 root directories respectively.
This will copy and convert your old models from 2.3 format to 3.0
format and create a new `models` directory in the 3.0 directory. The
old models directory (which contains the models selected at install
time) will be renamed `models.orig` and can be deleted once you have
confirmed that the migration was successful.
If you wish, you can pass the 2.3 root directory to both `--from` and
`--to` in order to update in place. Warning: this directory will no
longer be usable with InvokeAI 2.3.
#### Migrating in place
For the adventurous, you may do an in-place upgrade from 2.3 to 3.0
without touching the command line. ***This recipe does not work on
Windows platforms due to a bug in the Windows version of the 2.3
upgrade script.** See the next section for a Windows recipe.
##### For Mac and Linux Users:
1. Launch the InvokeAI launcher script in your current v2.3 root directory.
2. Select option [9] "Update InvokeAI" to bring up the updater dialog.
3. Select option [1] to upgrade to the latest release.
4. Once the upgrade is finished you will be returned to the launcher
menu. Select option [6] "Re-run the configure script to fix a broken
install or to complete a major upgrade".
This will run the configure script against the v2.3 directory and
update it to the 3.0 format. The following files will be replaced:
- The invokeai.init file, replaced by invokeai.yaml
- The models directory
- The configs/models.yaml model index
The original versions of these files will be saved with the suffix
".orig" appended to the end. Once you have confirmed that the upgrade
worked, you can safely remove these files. Alternatively you can
restore a working v2.3 directory by removing the new files and
restoring the ".orig" files' original names.
##### For Windows Users:
Windows Users can upgrade with the
1. Enter the 2.3 root directory you wish to upgrade
2. Launch `invoke.sh` or `invoke.bat`
3. Select the "Developer's console" option [8]
4. Type the following commands
```
pip install "invokeai @ https://github.com/invoke-ai/InvokeAI/archive/refs/tags/v3.0.0" --use-pep517 --upgrade
invokeai-configure --root .
```
(Replace `v3.0.0` with the current release number if this document is out of date).
The first command will install and upgrade new software to run
InvokeAI. The second will prepare the 2.3 directory for use with 3.0.
You may now launch the WebUI in the usual way, by selecting option [1]
from the launcher script
#### Migrating Images
The migration script will migrate your invokeai settings and models,
including textual inversion models, LoRAs and merges that you may have
installed previously. However it does **not** migrate the generated
images stored in your 2.3-format outputs directory. To do this, you
need to run an additional step:
1. From a working InvokeAI 3.0 root directory, start the launcher and
enter menu option [8] to open the "developer's console".
2. At the developer's console command line, type the command:
```bash
invokeai-import-images
```
3. This will lead you through the process of confirming the desired
source and destination for the imported images. The images will
appear in the gallery board of your choice, and contain the
original prompt, model name, and other parameters used to generate
the image.
(Many kudos to **techjedi** for contributing this script.)
## Hardware Requirements
InvokeAI is supported across Linux, Windows and macOS. Linux
users can use either an Nvidia-based card (with CUDA support) or an
AMD card (using the ROCm driver).
### System
You will need one of the following:
- An NVIDIA-based graphics card with 4 GB or more VRAM memory. 6-8 GB
of VRAM is highly recommended for rendering using the Stable
Diffusion XL models
- An Apple computer with an M1 chip.
- An AMD-based graphics card with 4GB or more VRAM memory (Linux
only), 6-8 GB for XL rendering.
We do not recommend the GTX 1650 or 1660 series video cards. They are
unable to run in half-precision mode and do not have sufficient VRAM
to render 512x512 images.
**Memory** - At least 12 GB Main Memory RAM.
**Disk** - At least 12 GB of free disk space for the machine learning model, Python, and all its dependencies.
## Features
Feature documentation can be reviewed by navigating to [the InvokeAI Documentation page](https://invoke-ai.github.io/InvokeAI/features/)
### *Web Server & UI*
InvokeAI offers a locally hosted Web Server & React Frontend, with an industry leading user experience. The Web-based UI allows for simple and intuitive workflows, and is responsive for use on mobile devices and tablets accessing the web server.
### *Unified Canvas*
The Unified Canvas is a fully integrated canvas implementation with support for all core generation capabilities, in/outpainting, brush tools, and more. This creative tool unlocks the capability for artists to create with AI as a creative collaborator, and can be used to augment AI-generated imagery, sketches, photography, renders, and more.
### *Workflows & Nodes*
InvokeAI offers a fully featured workflow management solution, enabling users to combine the power of nodes based workflows with the easy of a UI. This allows for customizable generation pipelines to be developed and shared by users looking to create specific workflows to support their production use-cases.
### *Board & Gallery Management*
Invoke AI provides an organized gallery system for easily storing, accessing, and remixing your content in the Invoke workspace. Images can be dragged/dropped onto any Image-base UI element in the application, and rich metadata within the Image allows for easy recall of key prompts or settings used in your workflow.
### Other features
- *Support for both ckpt and diffusers models*
- *SD 2.0, 2.1, XL support*
- *Upscaling Tools*
- *Embedding Manager & Support*
- *Model Manager & Support*
- *Workflow creation & management*
- *Node-Based Architecture*
### Latest Changes
For our latest changes, view our [Release
Notes](https://github.com/invoke-ai/InvokeAI/releases) and the
[CHANGELOG](docs/CHANGELOG.md).
### Troubleshooting / FAQ
Please check out our **[FAQ](https://invoke-ai.github.io/InvokeAI/help/FAQ/)** to get solutions for common installation
problems and other issues. For more help, please join our [Discord][discord link]
## Contributing
Anyone who wishes to contribute to this project, whether documentation, features, bug fixes, code
cleanup, testing, or code reviews, is very much encouraged to do so.
Get started with contributing by reading our [Contribution documentation](https://invoke-ai.github.io/InvokeAI/contributing/CONTRIBUTING/), joining the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) or the GitHub discussion board.
If you are unfamiliar with how
to contribute to GitHub projects, we have a new contributor checklist you can follow to get started contributing:
[New Contributor Checklist](https://invoke-ai.github.io/InvokeAI/contributing/contribution_guides/newContributorChecklist/).
We hope you enjoy using our software as much as we enjoy creating it,
and we hope that some of those of you who are reading this will elect
to become part of our community.
Welcome to InvokeAI!
### Contributors
This fork is a combined effort of various people from across the world.
[Check out the list of all these amazing people](https://invoke-ai.github.io/InvokeAI/other/CONTRIBUTORS/). We thank them for
their time, hard work and effort.
### Support
For support, please use this repository's GitHub Issues tracking service, or join the [Discord][discord link].
Original portions of the software are Copyright (c) 2023 by respective contributors.

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 221 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 786 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

@ -51,13 +51,11 @@ The settings in this file will override the defaults. You only need
to change this file if the default for a particular setting doesn't
work for you.
You'll find an example file next to `invokeai.yaml` that shows the default values.
Some settings, like [Model Marketplace API Keys], require the YAML
to be formatted correctly. Here is a [basic guide to YAML files].
You can fix a broken `invokeai.yaml` by deleting it and running the
configuration script again -- option [6] in the launcher, "Re-run the
configure script".
#### Custom Config File Location
You can use any config file with the `--config` CLI arg. Pass in the path to the `invokeai.yaml` file you want to use.

92
docs/features/GALLERY.md Normal file
View File

@ -0,0 +1,92 @@
---
title: InvokeAI Gallery Panel
---
# :material-web: InvokeAI Gallery Panel
## Quick guided walkthrough of the Gallery Panel's features
The Gallery Panel is a fast way to review, find, and make use of images you've
generated and loaded. The Gallery is divided into Boards. The Uncategorized board is always
present but you can create your own for better organization.
![image](../assets/gallery/gallery.png)
### Board Display and Settings
At the very top of the Gallery Panel are the boards disclosure and settings buttons.
![image](../assets/gallery/top_controls.png)
The disclosure button shows the name of the currently selected board and allows you to show and hide the board thumbnails (shown in the image below).
![image](../assets/gallery/board_thumbnails.png)
The settings button opens a list of options.
![image](../assets/gallery/board_settings.png)
- ***Image Size*** this slider lets you control the size of the image previews (images of three different sizes).
- ***Auto-Switch to New Images*** if you turn this on, whenever a new image is generated, it will automatically be loaded into the current image panel on the Text to Image tab and into the result panel on the [Image to Image](IMG2IMG.md) tab. This will happen invisibly if you are on any other tab when the image is generated.
- ***Auto-Assign Board on Click*** whenever an image is generated or saved, it always gets put in a board. The board it gets put into is marked with AUTO (image of board marked). Turning on Auto-Assign Board on Click will make whichever board you last selected be the destination when you click Invoke. That means you can click Invoke, select a different board, and then click Invoke again and the two images will be put in two different boards. (bold)It's the board selected when Invoke is clicked that's used, not the board that's selected when the image is finished generating.(bold) Turning this off, enables the Auto-Add Board drop down which lets you set one specific board to always put generated images into. This also enables and disables the Auto-add to this Board menu item described below.
- ***Always Show Image Size Badge*** this toggles whether to show image sizes for each image preview (show two images, one with sizes shown, one without)
Below these two buttons, you'll see the Search Boards text entry area. You use this to search for specific boards by the name of the board.
Next to it is the Add Board (+) button which lets you add new boards. Boards can be renamed by clicking on the name of the board under its thumbnail and typing in the new name.
### Board Thumbnail Menu
Each board has a context menu (ctrl+click / right-click).
![image](../assets/gallery/thumbnail_menu.png)
- ***Auto-add to this Board*** if you've disabled Auto-Assign Board on Click in the board settings, you can use this option to set this board to be where new images are put.
- ***Download Board*** this will add all the images in the board into a zip file and provide a link to it in a notification (image of notification)
- ***Delete Board*** this will delete the board
> [!CAUTION]
> This will delete all the images in the board and the board itself.
### Board Contents
Every board is organized by two tabs, Images and Assets.
![image](../assets/gallery/board_tabs.png)
Images are the Invoke-generated images that are placed into the board. Assets are images that you upload into Invoke to be used as an [Image Prompt](https://support.invoke.ai/support/solutions/articles/151000159340-using-the-image-prompt-adapter-ip-adapter-) or in the [Image to Image](IMG2IMG.md) tab.
### Image Thumbnail Menu
Every image generated by Invoke has its generation information stored as text inside the image file itself. This can be read directly by selecting the image and clicking on the Info button ![image](../assets/gallery/info_button.png) in any of the image result panels.
Each image also has a context menu (ctrl+click / right-click).
![image](../assets/gallery/image_menu.png)
The options are (items marked with an * will not work with images that lack generation information):
- ***Open in New Tab*** this will open the image alone in a new browser tab, separate from the Invoke interface.
- ***Download Image*** this will trigger your browser to download the image.
- ***Load Workflow **** this will load any workflow settings into the Workflow tab and automatically open it.
- ***Remix Image **** this will load all of the image's generation information, (bold)excluding its Seed, into the left hand control panel
- ***Use Prompt **** this will load only the image's text prompts into the left-hand control panel
- ***Use Seed **** this will load only the image's Seed into the left-hand control panel
- ***Use All **** this will load all of the image's generation information into the left-hand control panel
- ***Send to Image to Image*** this will put the image into the left-hand panel in the Image to Image tab ana automatically open it
- ***Send to Unified Canvas*** This will (bold)replace whatever is already present(bold) in the Unified Canvas tab with the image and automatically open the tab
- ***Change Board*** this will oipen a small window that will let you move the image to a different board. This is the same as dragging the image to that board's thumbnail.
- ***Star Image*** this will add the image to the board's list of starred images that are always kept at the top of the gallery. This is the same as clicking on the star on the top right-hand side of the image that appears when you hover over the image with the mouse
- ***Delete Image*** this will delete the image from the board
> [!CAUTION]
> This will delete the image entirely from Invoke.
## Summary
This walkthrough only covers the Gallery interface and Boards. Actually generating images is handled by [Prompts](PROMPTS.md), the [Image to Image](IMG2IMG.md) tab, and the [Unified Canvas](UNIFIED_CANVAS.md).
## Acknowledgements
A huge shout-out to the core team working to make the Web GUI a reality,
including [psychedelicious](https://github.com/psychedelicious),
[Kyle0654](https://github.com/Kyle0654) and
[blessedcoolant](https://github.com/blessedcoolant).
[hipsterusername](https://github.com/hipsterusername) was the team's unofficial
cheerleader and added tooltips/docs.

View File

@ -108,40 +108,6 @@ Can be used with .and():
Each will give you different results - try them out and see what you prefer!
### Cross-Attention Control ('prompt2prompt')
Sometimes an image you generate is almost right, and you just want to change one
detail without affecting the rest. You could use a photo editor and inpainting
to overpaint the area, but that's a pain. Here's where `prompt2prompt` comes in
handy.
Generate an image with a given prompt, record the seed of the image, and then
use the `prompt2prompt` syntax to substitute words in the original prompt for
words in a new prompt. This works for `img2img` as well.
For example, consider the prompt `a cat.swap(dog) playing with a ball in the forest`. Normally, because the words interact with each other when doing a stable diffusion image generation, these two prompts would generate different compositions:
- `a cat playing with a ball in the forest`
- `a dog playing with a ball in the forest`
| `a cat playing with a ball in the forest` | `a dog playing with a ball in the forest` |
| --- | --- |
| img | img |
- For multiple word swaps, use parentheses: `a (fluffy cat).swap(barking dog) playing with a ball in the forest`.
- To swap a comma, use quotes: `a ("fluffy, grey cat").swap("big, barking dog") playing with a ball in the forest`.
- Supports options `t_start` and `t_end` (each 0-1) loosely corresponding to (bloc97's)[(https://github.com/bloc97/CrossAttentionControl)] `prompt_edit_tokens_start/_end` but with the math swapped to make it easier to
intuitively understand. `t_start` and `t_end` are used to control on which steps cross-attention control should run. With the default values `t_start=0` and `t_end=1`, cross-attention control is active on every step of image generation. Other values can be used to turn cross-attention control off for part of the image generation process.
- For example, if doing a diffusion with 10 steps for the prompt is `a cat.swap(dog, t_start=0.3, t_end=1.0) playing with a ball in the forest`, the first 3 steps will be run as `a cat playing with a ball in the forest`, while the last 7 steps will run as `a dog playing with a ball in the forest`, but the pixels that represent `dog` will be locked to the pixels that would have represented `cat` if the `cat` prompt had been used instead.
- Conversely, for `a cat.swap(dog, t_start=0, t_end=0.7) playing with a ball in the forest`, the first 7 steps will run as `a dog playing with a ball in the forest` with the pixels that represent `dog` locked to the same pixels that would have represented `cat` if the `cat` prompt was being used instead. The final 3 steps will just run `a cat playing with a ball in the forest`.
> For img2img, the step sequence does not start at 0 but instead at `(1.0-strength)` - so if the img2img `strength` is `0.7`, `t_start` and `t_end` must both be greater than `0.3` (`1.0-0.7`) to have any effect.
Prompt2prompt `.swap()` is not compatible with xformers, which will be temporarily disabled when doing a `.swap()` - so you should expect to use more VRAM and run slower that with xformers enabled.
The `prompt2prompt` code is based off
[bloc97's colab](https://github.com/bloc97/CrossAttentionControl).
### Escaping parentheses and speech marks
If the model you are using has parentheses () or speech marks "" as part of its

View File

@ -4,278 +4,6 @@ title: Training
# :material-file-document: Training
# Textual Inversion Training
## **Personalizing Text-to-Image Generation**
Invoke Training has moved to its own repository, with a dedicated UI for accessing common scripts like Textual Inversion and LoRA training.
You may personalize the generated images to provide your own styles or objects
by training a new LDM checkpoint and introducing a new vocabulary to the fixed
model as a (.pt) embeddings file. Alternatively, you may use or train
HuggingFace Concepts embeddings files (.bin) from
<https://huggingface.co/sd-concepts-library> and its associated
notebooks.
## **Hardware and Software Requirements**
You will need a GPU to perform training in a reasonable length of
time, and at least 12 GB of VRAM. We recommend using the [`xformers`
library](../installation/070_INSTALL_XFORMERS.md) to accelerate the
training process further. During training, about ~8 GB is temporarily
needed in order to store intermediate models, checkpoints and logs.
## **Preparing for Training**
To train, prepare a folder that contains 3-5 images that illustrate
the object or concept. It is good to provide a variety of examples or
poses to avoid overtraining the system. Format these images as PNG
(preferred) or JPG. You do not need to resize or crop the images in
advance, but for more control you may wish to do so.
Place the training images in a directory on the machine InvokeAI runs
on. We recommend placing them in a subdirectory of the
`text-inversion-training-data` folder located in the InvokeAI root
directory, ordinarily `~/invokeai` (Linux/Mac), or
`C:\Users\your_name\invokeai` (Windows). For example, to create an
embedding for the "psychedelic" style, you'd place the training images
into the directory
`~invokeai/text-inversion-training-data/psychedelic`.
## **Launching Training Using the Console Front End**
InvokeAI 2.3 and higher comes with a text console-based training front
end. From within the `invoke.sh`/`invoke.bat` Invoke launcher script,
start training tool selecting choice (3):
```sh
1 "Generate images with a browser-based interface"
2 "Explore InvokeAI nodes using a command-line interface"
3 "Textual inversion training"
4 "Merge models (diffusers type only)"
5 "Download and install models"
6 "Change InvokeAI startup options"
7 "Re-run the configure script to fix a broken install or to complete a major upgrade"
8 "Open the developer console"
9 "Update InvokeAI"
```
Alternatively, you can select option (8) or from the command line, with the InvokeAI virtual environment active,
you can then launch the front end with the command `invokeai-ti --gui`.
This will launch a text-based front end that will look like this:
<figure markdown>
![ti-frontend](../assets/textual-inversion/ti-frontend.png)
</figure>
The interface is keyboard-based. Move from field to field using
control-N (^N) to move to the next field and control-P (^P) to the
previous one. <Tab> and <shift-TAB> work as well. Once a field is
active, use the cursor keys. In a checkbox group, use the up and down
cursor keys to move from choice to choice, and <space> to select a
choice. In a scrollbar, use the left and right cursor keys to increase
and decrease the value of the scroll. In textfields, type the desired
values.
The number of parameters may look intimidating, but in most cases the
predefined defaults work fine. The red circled fields in the above
illustration are the ones you will adjust most frequently.
### Model Name
This will list all the diffusers models that are currently
installed. Select the one you wish to use as the basis for your
embedding. Be aware that if you use a SD-1.X-based model for your
training, you will only be able to use this embedding with other
SD-1.X-based models. Similarly, if you train on SD-2.X, you will only
be able to use the embeddings with models based on SD-2.X.
### Trigger Term
This is the prompt term you will use to trigger the embedding. Type a
single word or phrase you wish to use as the trigger, example
"psychedelic" (without angle brackets). Within InvokeAI, you will then
be able to activate the trigger using the syntax `<psychedelic>`.
### Initializer
This is a single character that is used internally during the training
process as a placeholder for the trigger term. It defaults to "*" and
can usually be left alone.
### Resume from last saved checkpoint
As training proceeds, textual inversion will write a series of
intermediate files that can be used to resume training from where it
was left off in the case of an interruption. This checkbox will be
automatically selected if you provide a previously used trigger term
and at least one checkpoint file is found on disk.
Note that as of 20 January 2023, resume does not seem to be working
properly due to an issue with the upstream code.
### Data Training Directory
This is the location of the images to be used for training. When you
select a trigger term like "my-trigger", the frontend will prepopulate
this field with `~/invokeai/text-inversion-training-data/my-trigger`,
but you can change the path to wherever you want.
### Output Destination Directory
This is the location of the logs, checkpoint files, and embedding
files created during training. When you select a trigger term like
"my-trigger", the frontend will prepopulate this field with
`~/invokeai/text-inversion-output/my-trigger`, but you can change the
path to wherever you want.
### Image resolution
The images in the training directory will be automatically scaled to
the value you use here. For best results, you will want to use the
same default resolution of the underlying model (512 pixels for
SD-1.5, 768 for the larger version of SD-2.1).
### Center crop images
If this is selected, your images will be center cropped to make them
square before resizing them to the desired resolution. Center cropping
can indiscriminately cut off the top of subjects' heads for portrait
aspect images, so if you have images like this, you may wish to use a
photoeditor to manually crop them to a square aspect ratio.
### Mixed precision
Select the floating point precision for the embedding. "no" will
result in a full 32-bit precision, "fp16" will provide 16-bit
precision, and "bf16" will provide mixed precision (only available
when XFormers is used).
### Max training steps
How many steps the training will take before the model converges. Most
training sets will converge with 2000-3000 steps.
### Batch size
This adjusts how many training images are processed simultaneously in
each step. Higher values will cause the training process to run more
quickly, but use more memory. The default size will run with GPUs with
as little as 12 GB.
### Learning rate
The rate at which the system adjusts its internal weights during
training. Higher values risk overtraining (getting the same image each
time), and lower values will take more steps to train a good
model. The default of 0.0005 is conservative; you may wish to increase
it to 0.005 to speed up training.
### Scale learning rate by number of GPUs, steps and batch size
If this is selected (the default) the system will adjust the provided
learning rate to improve performance.
### Use xformers acceleration
This will activate XFormers memory-efficient attention. You need to
have XFormers installed for this to have an effect.
### Learning rate scheduler
This adjusts how the learning rate changes over the course of
training. The default "constant" means to use a constant learning rate
for the entire training session. The other values scale the learning
rate according to various formulas.
Only "constant" is supported by the XFormers library.
### Gradient accumulation steps
This is a parameter that allows you to use bigger batch sizes than
your GPU's VRAM would ordinarily accommodate, at the cost of some
performance.
### Warmup steps
If "constant_with_warmup" is selected in the learning rate scheduler,
then this provides the number of warmup steps. Warmup steps have a
very low learning rate, and are one way of preventing early
overtraining.
## The training run
Start the training run by advancing to the OK button (bottom right)
and pressing <enter>. A series of progress messages will be displayed
as the training process proceeds. This may take an hour or two,
depending on settings and the speed of your system. Various log and
checkpoint files will be written into the output directory (ordinarily
`~/invokeai/text-inversion-output/my-model/`)
At the end of successful training, the system will copy the file
`learned_embeds.bin` into the InvokeAI root directory's `embeddings`
directory, using a subdirectory named after the trigger token. For
example, if the trigger token was `psychedelic`, then look for the
embeddings file in
`~/invokeai/embeddings/psychedelic/learned_embeds.bin`
You may now launch InvokeAI and try out a prompt that uses the trigger
term. For example `a plate of banana sushi in <psychedelic> style`.
## **Training with the Command-Line Script**
Training can also be done using a traditional command-line script. It
can be launched from within the "developer's console", or from the
command line after activating InvokeAI's virtual environment.
It accepts a large number of arguments, which can be summarized by
passing the `--help` argument:
```sh
invokeai-ti --help
```
Typical usage is shown here:
```sh
invokeai-ti \
--model=stable-diffusion-1.5 \
--resolution=512 \
--learnable_property=style \
--initializer_token='*' \
--placeholder_token='<psychedelic>' \
--train_data_dir=/home/lstein/invokeai/training-data/psychedelic \
--output_dir=/home/lstein/invokeai/text-inversion-training/psychedelic \
--scale_lr \
--train_batch_size=8 \
--gradient_accumulation_steps=4 \
--max_train_steps=3000 \
--learning_rate=0.0005 \
--resume_from_checkpoint=latest \
--lr_scheduler=constant \
--mixed_precision=fp16 \
--only_save_embeds
```
## Troubleshooting
### `Cannot load embedding for <trigger>. It was trained on a model with token dimension 1024, but the current model has token dimension 768`
Messages like this indicate you trained the embedding on a different base model than the currently selected one.
For example, in the error above, the training was done on SD2.1 (768x768) but it was used on SD1.5 (512x512).
## Reading
For more information on textual inversion, please see the following
resources:
* The [textual inversion repository](https://github.com/rinongal/textual_inversion) and
associated paper for details and limitations.
* [HuggingFace's textual inversion training
page](https://huggingface.co/docs/diffusers/training/text_inversion)
* [HuggingFace example script
documentation](https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion)
(Note that this script is similar to, but not identical, to
`textual_inversion`, but produces embed files that are completely compatible.
---
copyright (c) 2023, Lincoln Stein and the InvokeAI Development Team
You can find more by visiting the repo at https://github.com/invoke-ai/invoke-training

View File

@ -54,7 +54,7 @@ main sections:
of buttons at the top lets you modify and manipulate the image in
various ways.
3. A **gallery** section on the left that contains a history of the images you
3. A **gallery** section on the right that contains a history of the images you
have generated. These images are read and written to the directory specified
in the `INVOKEAIROOT/invokeai.yaml` initialization file, usually a directory
named `outputs` in `INVOKEAIROOT`.

View File

@ -1,8 +1,10 @@
# Automatic Install
# Automatic Install & Updates
The installer is used for both new installs and updates.
**The same packaged installer file can be used for both new installs and updates.**
Using the installer for updates will leave everything you've added since installation, and just update the core libraries used to run Invoke.
Simply use the same path you installed to originally.
Both release and pre-release versions can be installed using it. It also supports install a wheel if needed.
Both release and pre-release versions can be installed using the installer. It also supports install through a wheel if needed.
Be sure to review the [installation requirements] and ensure your system has everything it needs to install Invoke.
@ -96,7 +98,7 @@ Updating is exactly the same as installing - download the latest installer, choo
If you have installation issues, please review the [FAQ]. You can also [create an issue] or ask for help on [discord].
[installation requirements]: INSTALLATION.md#installation-requirements
[installation requirements]: INSTALL_REQUIREMENTS.md
[FAQ]: ../help/FAQ.md
[install some models]: 050_INSTALLING_MODELS.md
[configuration docs]: ../features/CONFIGURATION.md

View File

@ -1,4 +1,4 @@
# Installation Overview
# Installation and Updating Overview
Before installing, review the [installation requirements] to ensure your system is set up properly.
@ -6,14 +6,21 @@ See the [FAQ] for frequently-encountered installation issues.
If you need more help, join our [discord] or [create an issue].
<h2>Automatic Install</h2>
<h2>Automatic Install & Updates </h2>
✅ The automatic install is the best way to run InvokeAI. Check out the [installation guide] to get started.
⬆️ The same installer is also the best way to update InvokeAI - Simply rerun it for the same folder you installed to.
The installation process simply manages installation for the core libraries & application dependencies that run Invoke.
Any models, images, or other assets in the Invoke root folder won't be affected by the installation process.
<h2>Manual Install</h2>
If you are familiar with python and want more control over the packages that are installed, you can [install InvokeAI manually via PyPI].
Updates are managed by reinstalling the latest version through PyPi.
<h2>Developer Install</h2>
If you want to contribute to InvokeAI, consult the [developer install guide].

View File

@ -23,6 +23,7 @@ If you have an interest in how InvokeAI works, or you would like to add features
1. [Fork and clone] the [InvokeAI repo].
1. Follow the [manual installation] docs to create a new virtual environment for the development install.
- Create a new folder outside the repo root for the installation and create the venv inside that folder.
- When installing the InvokeAI package, add `-e` to the command so you get an [editable install].
1. Install the [frontend dev toolchain] and do a production build of the UI as described.
1. You can now run the app as described in the [manual installation] docs.

View File

@ -37,13 +37,13 @@ Invoke runs best with a dedicated GPU, but will fall back to running on CPU, alb
=== "Nvidia"
```
Any GPU with at least 8GB VRAM. Linux only.
Any GPU with at least 8GB VRAM.
```
=== "AMD"
```
Any GPU with at least 16GB VRAM.
Any GPU with at least 16GB VRAM. Linux only.
```
=== "Mac"

View File

@ -13,7 +13,6 @@ from pydantic import BaseModel, Field
from invokeai.app.invocations.upscale import ESRGAN_MODELS
from invokeai.app.services.invocation_cache.invocation_cache_common import InvocationCacheStatus
from invokeai.backend.image_util.infill_methods.patchmatch import PatchMatch
from invokeai.backend.image_util.safety_checker import SafetyChecker
from invokeai.backend.util.logging import logging
from invokeai.version import __version__
@ -109,9 +108,7 @@ async def get_config() -> AppConfig:
upscaling_models.append(str(Path(model).stem))
upscaler = Upscaler(upscaling_method="esrgan", upscaling_models=upscaling_models)
nsfw_methods = []
if SafetyChecker.safety_checker_available():
nsfw_methods.append("nsfw_checker")
nsfw_methods = ["nsfw_checker"]
watermarking_methods = ["invisible_watermark"]

View File

@ -6,7 +6,7 @@ import pathlib
import shutil
import traceback
from copy import deepcopy
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Type
from fastapi import Body, Path, Query, Response, UploadFile
from fastapi.responses import FileResponse
@ -16,6 +16,7 @@ from pydantic import AnyHttpUrl, BaseModel, ConfigDict, Field
from starlette.exceptions import HTTPException
from typing_extensions import Annotated
from invokeai.app.services.model_images.model_images_common import ModelImageFileNotFoundException
from invokeai.app.services.model_install import ModelInstallJob
from invokeai.app.services.model_records import (
DuplicateModelException,
@ -52,6 +53,13 @@ class ModelsList(BaseModel):
model_config = ConfigDict(use_enum_values=True)
def add_cover_image_to_model_config(config: AnyModelConfig, dependencies: Type[ApiDependencies]) -> AnyModelConfig:
"""Add a cover image URL to a model configuration."""
cover_image = dependencies.invoker.services.model_images.get_url(config.key)
config.cover_image = cover_image
return config
##############################################################################
# These are example inputs and outputs that are used in places where Swagger
# is unable to generate a correct example.
@ -118,8 +126,7 @@ async def list_model_records(
record_store.search_by_attr(model_type=model_type, model_name=model_name, model_format=model_format)
)
for model in found_models:
cover_image = ApiDependencies.invoker.services.model_images.get_url(model.key)
model.cover_image = cover_image
model = add_cover_image_to_model_config(model, ApiDependencies)
return ModelsList(models=found_models)
@ -160,12 +167,9 @@ async def get_model_record(
key: str = Path(description="Key of the model record to fetch."),
) -> AnyModelConfig:
"""Get a model record"""
record_store = ApiDependencies.invoker.services.model_manager.store
try:
config: AnyModelConfig = record_store.get_model(key)
cover_image = ApiDependencies.invoker.services.model_images.get_url(key)
config.cover_image = cover_image
return config
config = ApiDependencies.invoker.services.model_manager.store.get_model(key)
return add_cover_image_to_model_config(config, ApiDependencies)
except UnknownModelException as e:
raise HTTPException(status_code=404, detail=str(e))
@ -294,14 +298,15 @@ async def update_model_record(
installer = ApiDependencies.invoker.services.model_manager.install
try:
record_store.update_model(key, changes=changes)
model_response: AnyModelConfig = installer.sync_model_path(key)
config = installer.sync_model_path(key)
config = add_cover_image_to_model_config(config, ApiDependencies)
logger.info(f"Updated model: {key}")
except UnknownModelException as e:
raise HTTPException(status_code=404, detail=str(e))
except ValueError as e:
logger.error(str(e))
raise HTTPException(status_code=409, detail=str(e))
return model_response
return config
@model_manager_router.get(
@ -648,6 +653,14 @@ async def convert_model(
logger.error(str(e))
raise HTTPException(status_code=409, detail=str(e))
# Update the model image if the model had one
try:
model_image = ApiDependencies.invoker.services.model_images.get(key)
ApiDependencies.invoker.services.model_images.save(model_image, new_key)
ApiDependencies.invoker.services.model_images.delete(key)
except ModelImageFileNotFoundException:
pass
# delete the original safetensors file
installer.delete(key)
@ -655,7 +668,8 @@ async def convert_model(
shutil.rmtree(cache_path)
# return the config record for the new diffusers directory
new_config: AnyModelConfig = store.get_model(new_key)
new_config = store.get_model(new_key)
new_config = add_cover_image_to_model_config(new_config, ApiDependencies)
return new_config

View File

@ -28,7 +28,7 @@ from invokeai.app.api.no_cache_staticfiles import NoCacheStaticFiles
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.services.config.config_default import get_config
from invokeai.app.services.session_processor.session_processor_common import ProgressImage
from invokeai.backend.util.devices import get_torch_device_name
from invokeai.backend.util.devices import TorchDevice
from ..backend.util.logging import InvokeAILogger
from .api.dependencies import ApiDependencies
@ -63,7 +63,7 @@ logger = InvokeAILogger.get_logger(config=app_config)
mimetypes.add_type("application/javascript", ".js")
mimetypes.add_type("text/css", ".css")
torch_device_name = get_torch_device_name()
torch_device_name = TorchDevice.get_torch_device_name()
logger.info(f"Using torch device: {torch_device_name}")
@ -164,6 +164,12 @@ def custom_openapi() -> dict[str, Any]:
for schema_key, schema_json in additional_schemas[1]["$defs"].items():
openapi_schema["components"]["schemas"][schema_key] = schema_json
openapi_schema["components"]["schemas"]["InvocationOutputMap"] = {
"type": "object",
"properties": {},
"required": [],
}
# Add a reference to the output type to additionalProperties of the invoker schema
for invoker in all_invocations:
invoker_name = invoker.__name__ # type: ignore [attr-defined] # this is a valid attribute
@ -172,6 +178,8 @@ def custom_openapi() -> dict[str, Any]:
invoker_schema = openapi_schema["components"]["schemas"][f"{invoker_name}"]
outputs_ref = {"$ref": f"#/components/schemas/{output_type_title}"}
invoker_schema["output"] = outputs_ref
openapi_schema["components"]["schemas"]["InvocationOutputMap"]["properties"][invoker.get_type()] = outputs_ref
openapi_schema["components"]["schemas"]["InvocationOutputMap"]["required"].append(invoker.get_type())
invoker_schema["class"] = "invocation"
# This code no longer seems to be necessary?

View File

@ -5,7 +5,15 @@ from compel import Compel, ReturnedEmbeddingsType
from compel.prompt_parser import Blend, Conjunction, CrossAttentionControlSubstitute, FlattenedPrompt, Fragment
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIComponent
from invokeai.app.invocations.fields import (
ConditioningField,
FieldDescriptions,
Input,
InputField,
OutputField,
TensorField,
UIComponent,
)
from invokeai.app.invocations.primitives import ConditioningOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.ti_utils import generate_ti_list
@ -14,10 +22,9 @@ from invokeai.backend.model_patcher import ModelPatcher
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
BasicConditioningInfo,
ConditioningFieldData,
ExtraConditioningInfo,
SDXLConditioningInfo,
)
from invokeai.backend.util.devices import torch_dtype
from invokeai.backend.util.devices import TorchDevice
from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
from .model import CLIPField
@ -36,7 +43,7 @@ from .model import CLIPField
title="Prompt",
tags=["prompt", "compel"],
category="conditioning",
version="1.1.1",
version="1.2.0",
)
class CompelInvocation(BaseInvocation):
"""Parse prompt using compel package to conditioning."""
@ -51,6 +58,9 @@ class CompelInvocation(BaseInvocation):
description=FieldDescriptions.clip,
input=Input.Connection,
)
mask: Optional[TensorField] = InputField(
default=None, description="A mask defining the region that this conditioning prompt applies to."
)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> ConditioningOutput:
@ -89,7 +99,7 @@ class CompelInvocation(BaseInvocation):
tokenizer=tokenizer,
text_encoder=text_encoder,
textual_inversion_manager=ti_manager,
dtype_for_device_getter=torch_dtype,
dtype_for_device_getter=TorchDevice.choose_torch_dtype,
truncate_long_prompts=False,
)
@ -98,27 +108,19 @@ class CompelInvocation(BaseInvocation):
if context.config.get().log_tokenization:
log_tokenization_for_conjunction(conjunction, tokenizer)
c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)
ec = ExtraConditioningInfo(
tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
cross_attention_control_args=options.get("cross_attention_control", None),
)
c, _options = compel.build_conditioning_tensor_for_conjunction(conjunction)
c = c.detach().to("cpu")
conditioning_data = ConditioningFieldData(
conditionings=[
BasicConditioningInfo(
embeds=c,
extra_conditioning=ec,
)
]
)
conditioning_data = ConditioningFieldData(conditionings=[BasicConditioningInfo(embeds=c)])
conditioning_name = context.conditioning.save(conditioning_data)
return ConditioningOutput.build(conditioning_name)
return ConditioningOutput(
conditioning=ConditioningField(
conditioning_name=conditioning_name,
mask=self.mask,
)
)
class SDXLPromptInvocationBase:
@ -132,7 +134,7 @@ class SDXLPromptInvocationBase:
get_pooled: bool,
lora_prefix: str,
zero_on_empty: bool,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[ExtraConditioningInfo]]:
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
tokenizer_info = context.models.load(clip_field.tokenizer)
tokenizer_model = tokenizer_info.model
assert isinstance(tokenizer_model, CLIPTokenizer)
@ -159,7 +161,7 @@ class SDXLPromptInvocationBase:
)
else:
c_pooled = None
return c, c_pooled, None
return c, c_pooled
def _lora_loader() -> Iterator[Tuple[LoRAModelRaw, float]]:
for lora in clip_field.loras:
@ -191,7 +193,7 @@ class SDXLPromptInvocationBase:
tokenizer=tokenizer,
text_encoder=text_encoder,
textual_inversion_manager=ti_manager,
dtype_for_device_getter=torch_dtype,
dtype_for_device_getter=TorchDevice.choose_torch_dtype,
truncate_long_prompts=False, # TODO:
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, # TODO: clip skip
requires_pooled=get_pooled,
@ -204,17 +206,12 @@ class SDXLPromptInvocationBase:
log_tokenization_for_conjunction(conjunction, tokenizer)
# TODO: ask for optimizations? to not run text_encoder twice
c, options = compel.build_conditioning_tensor_for_conjunction(conjunction)
c, _options = compel.build_conditioning_tensor_for_conjunction(conjunction)
if get_pooled:
c_pooled = compel.conditioning_provider.get_pooled_embeddings([prompt])
else:
c_pooled = None
ec = ExtraConditioningInfo(
tokens_count_including_eos_bos=get_max_token_count(tokenizer, conjunction),
cross_attention_control_args=options.get("cross_attention_control", None),
)
del tokenizer
del text_encoder
del tokenizer_info
@ -224,7 +221,7 @@ class SDXLPromptInvocationBase:
if c_pooled is not None:
c_pooled = c_pooled.detach().to("cpu")
return c, c_pooled, ec
return c, c_pooled
@invocation(
@ -232,7 +229,7 @@ class SDXLPromptInvocationBase:
title="SDXL Prompt",
tags=["sdxl", "compel", "prompt"],
category="conditioning",
version="1.1.1",
version="1.2.0",
)
class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
"""Parse prompt using compel package to conditioning."""
@ -255,20 +252,19 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
target_height: int = InputField(default=1024, description="")
clip: CLIPField = InputField(description=FieldDescriptions.clip, input=Input.Connection, title="CLIP 1")
clip2: CLIPField = InputField(description=FieldDescriptions.clip, input=Input.Connection, title="CLIP 2")
mask: Optional[TensorField] = InputField(
default=None, description="A mask defining the region that this conditioning prompt applies to."
)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> ConditioningOutput:
c1, c1_pooled, ec1 = self.run_clip_compel(
context, self.clip, self.prompt, False, "lora_te1_", zero_on_empty=True
)
c1, c1_pooled = self.run_clip_compel(context, self.clip, self.prompt, False, "lora_te1_", zero_on_empty=True)
if self.style.strip() == "":
c2, c2_pooled, ec2 = self.run_clip_compel(
c2, c2_pooled = self.run_clip_compel(
context, self.clip2, self.prompt, True, "lora_te2_", zero_on_empty=True
)
else:
c2, c2_pooled, ec2 = self.run_clip_compel(
context, self.clip2, self.style, True, "lora_te2_", zero_on_empty=True
)
c2, c2_pooled = self.run_clip_compel(context, self.clip2, self.style, True, "lora_te2_", zero_on_empty=True)
original_size = (self.original_height, self.original_width)
crop_coords = (self.crop_top, self.crop_left)
@ -307,17 +303,19 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
conditioning_data = ConditioningFieldData(
conditionings=[
SDXLConditioningInfo(
embeds=torch.cat([c1, c2], dim=-1),
pooled_embeds=c2_pooled,
add_time_ids=add_time_ids,
extra_conditioning=ec1,
embeds=torch.cat([c1, c2], dim=-1), pooled_embeds=c2_pooled, add_time_ids=add_time_ids
)
]
)
conditioning_name = context.conditioning.save(conditioning_data)
return ConditioningOutput.build(conditioning_name)
return ConditioningOutput(
conditioning=ConditioningField(
conditioning_name=conditioning_name,
mask=self.mask,
)
)
@invocation(
@ -345,7 +343,7 @@ class SDXLRefinerCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase
@torch.no_grad()
def invoke(self, context: InvocationContext) -> ConditioningOutput:
# TODO: if there will appear lora for refiner - write proper prefix
c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True, "<NONE>", zero_on_empty=False)
c2, c2_pooled = self.run_clip_compel(context, self.clip2, self.style, True, "<NONE>", zero_on_empty=False)
original_size = (self.original_height, self.original_width)
crop_coords = (self.crop_top, self.crop_left)
@ -354,14 +352,7 @@ class SDXLRefinerCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase
assert c2_pooled is not None
conditioning_data = ConditioningFieldData(
conditionings=[
SDXLConditioningInfo(
embeds=c2,
pooled_embeds=c2_pooled,
add_time_ids=add_time_ids,
extra_conditioning=ec2, # or None
)
]
conditionings=[SDXLConditioningInfo(embeds=c2, pooled_embeds=c2_pooled, add_time_ids=add_time_ids)]
)
conditioning_name = context.conditioning.save(conditioning_data)

View File

@ -35,22 +35,16 @@ from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES, heuristic_resize
from invokeai.backend.image_util.canny import get_canny_edges
from invokeai.backend.image_util.depth_anything import DepthAnythingDetector
from invokeai.backend.image_util.dw_openpose import DWOpenposeDetector
from invokeai.backend.image_util.hed import HEDProcessor
from invokeai.backend.image_util.lineart import LineartProcessor
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
from invokeai.backend.image_util.util import np_to_pil, pil_to_np
from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
CONTROLNET_MODE_VALUES = Literal["balanced", "more_prompt", "more_control", "unbalanced"]
CONTROLNET_RESIZE_VALUES = Literal[
"just_resize",
"crop_resize",
"fill_resize",
"just_resize_simple",
]
from .baseinvocation import BaseInvocation, BaseInvocationOutput, Classification, invocation, invocation_output
class ControlField(BaseModel):
@ -171,13 +165,13 @@ class ImageProcessorInvocation(BaseInvocation, WithMetadata, WithBoard):
title="Canny Processor",
tags=["controlnet", "canny"],
category="controlnet",
version="1.3.2",
version="1.3.3",
)
class CannyImageProcessorInvocation(ImageProcessorInvocation):
"""Canny edge detection for ControlNet"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
low_threshold: int = InputField(
default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)"
)
@ -205,13 +199,13 @@ class CannyImageProcessorInvocation(ImageProcessorInvocation):
title="HED (softedge) Processor",
tags=["controlnet", "hed", "softedge"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class HedImageProcessorInvocation(ImageProcessorInvocation):
"""Applies HED edge detection to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
# safe not supported in controlnet_aux v0.0.3
# safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
@ -234,13 +228,13 @@ class HedImageProcessorInvocation(ImageProcessorInvocation):
title="Lineart Processor",
tags=["controlnet", "lineart"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class LineartImageProcessorInvocation(ImageProcessorInvocation):
"""Applies line art processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
coarse: bool = InputField(default=False, description="Whether to use coarse mode")
def run_processor(self, image: Image.Image) -> Image.Image:
@ -256,13 +250,13 @@ class LineartImageProcessorInvocation(ImageProcessorInvocation):
title="Lineart Anime Processor",
tags=["controlnet", "lineart", "anime"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation):
"""Applies line art anime processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image) -> Image.Image:
processor = LineartAnimeProcessor()
@ -279,15 +273,15 @@ class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation):
title="Midas Depth Processor",
tags=["controlnet", "midas"],
category="controlnet",
version="1.2.3",
version="1.2.4",
)
class MidasDepthImageProcessorInvocation(ImageProcessorInvocation):
"""Applies Midas depth processing to image"""
a_mult: float = InputField(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)")
bg_th: float = InputField(default=0.1, ge=0, description="Midas parameter `bg_th`")
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
# depth_and_normal not supported in controlnet_aux v0.0.3
# depth_and_normal: bool = InputField(default=False, description="whether to use depth and normal mode")
@ -310,13 +304,13 @@ class MidasDepthImageProcessorInvocation(ImageProcessorInvocation):
title="Normal BAE Processor",
tags=["controlnet"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class NormalbaeImageProcessorInvocation(ImageProcessorInvocation):
"""Applies NormalBae processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image):
normalbae_processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
@ -327,13 +321,13 @@ class NormalbaeImageProcessorInvocation(ImageProcessorInvocation):
@invocation(
"mlsd_image_processor", title="MLSD Processor", tags=["controlnet", "mlsd"], category="controlnet", version="1.2.2"
"mlsd_image_processor", title="MLSD Processor", tags=["controlnet", "mlsd"], category="controlnet", version="1.2.3"
)
class MlsdImageProcessorInvocation(ImageProcessorInvocation):
"""Applies MLSD processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
thr_v: float = InputField(default=0.1, ge=0, description="MLSD parameter `thr_v`")
thr_d: float = InputField(default=0.1, ge=0, description="MLSD parameter `thr_d`")
@ -350,13 +344,13 @@ class MlsdImageProcessorInvocation(ImageProcessorInvocation):
@invocation(
"pidi_image_processor", title="PIDI Processor", tags=["controlnet", "pidi"], category="controlnet", version="1.2.2"
"pidi_image_processor", title="PIDI Processor", tags=["controlnet", "pidi"], category="controlnet", version="1.2.3"
)
class PidiImageProcessorInvocation(ImageProcessorInvocation):
"""Applies PIDI processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
@ -377,13 +371,13 @@ class PidiImageProcessorInvocation(ImageProcessorInvocation):
title="Content Shuffle Processor",
tags=["controlnet", "contentshuffle"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation):
"""Applies content shuffle processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
h: int = InputField(default=512, ge=0, description="Content shuffle `h` parameter")
w: int = InputField(default=512, ge=0, description="Content shuffle `w` parameter")
f: int = InputField(default=256, ge=0, description="Content shuffle `f` parameter")
@ -407,7 +401,7 @@ class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation):
title="Zoe (Depth) Processor",
tags=["controlnet", "zoe", "depth"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation):
"""Applies Zoe depth processing to image"""
@ -423,15 +417,15 @@ class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation):
title="Mediapipe Face Processor",
tags=["controlnet", "mediapipe", "face"],
category="controlnet",
version="1.2.3",
version="1.2.4",
)
class MediapipeFaceProcessorInvocation(ImageProcessorInvocation):
"""Applies mediapipe face processing to image"""
max_faces: int = InputField(default=1, ge=1, description="Maximum number of faces to detect")
min_confidence: float = InputField(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image):
mediapipe_face_processor = MediapipeFaceDetector()
@ -450,7 +444,7 @@ class MediapipeFaceProcessorInvocation(ImageProcessorInvocation):
title="Leres (Depth) Processor",
tags=["controlnet", "leres", "depth"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class LeresImageProcessorInvocation(ImageProcessorInvocation):
"""Applies leres processing to image"""
@ -458,8 +452,8 @@ class LeresImageProcessorInvocation(ImageProcessorInvocation):
thr_a: float = InputField(default=0, description="Leres parameter `thr_a`")
thr_b: float = InputField(default=0, description="Leres parameter `thr_b`")
boost: bool = InputField(default=False, description="Whether to use boost mode")
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image):
leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators")
@ -479,7 +473,7 @@ class LeresImageProcessorInvocation(ImageProcessorInvocation):
title="Tile Resample Processor",
tags=["controlnet", "tile"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class TileResamplerProcessorInvocation(ImageProcessorInvocation):
"""Tile resampler processor"""
@ -519,13 +513,13 @@ class TileResamplerProcessorInvocation(ImageProcessorInvocation):
title="Segment Anything Processor",
tags=["controlnet", "segmentanything"],
category="controlnet",
version="1.2.3",
version="1.2.4",
)
class SegmentAnythingProcessorInvocation(ImageProcessorInvocation):
"""Applies segment anything processing to image"""
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image):
# segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
@ -566,12 +560,12 @@ class SamDetectorReproducibleColors(SamDetector):
title="Color Map Processor",
tags=["controlnet"],
category="controlnet",
version="1.2.2",
version="1.2.3",
)
class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
"""Generates a color map from the provided image"""
color_map_tile_size: int = InputField(default=64, ge=0, description=FieldDescriptions.tile_size)
color_map_tile_size: int = InputField(default=64, ge=1, description=FieldDescriptions.tile_size)
def run_processor(self, image: Image.Image):
np_image = np.array(image, dtype=np.uint8)
@ -598,7 +592,7 @@ DEPTH_ANYTHING_MODEL_SIZES = Literal["large", "base", "small"]
title="Depth Anything Processor",
tags=["controlnet", "depth", "depth anything"],
category="controlnet",
version="1.1.1",
version="1.1.2",
)
class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
"""Generates a depth map based on the Depth Anything algorithm"""
@ -606,7 +600,7 @@ class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
default="small", description="The size of the depth model to use"
)
resolution: int = InputField(default=512, ge=64, multiple_of=64, description=FieldDescriptions.image_res)
resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image):
depth_anything_detector = DepthAnythingDetector()
@ -621,7 +615,7 @@ class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
title="DW Openpose Image Processor",
tags=["controlnet", "dwpose", "openpose"],
category="controlnet",
version="1.1.0",
version="1.1.1",
)
class DWOpenposeImageProcessorInvocation(ImageProcessorInvocation):
"""Generates an openpose pose from an image using DWPose"""
@ -629,7 +623,7 @@ class DWOpenposeImageProcessorInvocation(ImageProcessorInvocation):
draw_body: bool = InputField(default=True)
draw_face: bool = InputField(default=False)
draw_hands: bool = InputField(default=False)
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
def run_processor(self, image: Image.Image):
dw_openpose = DWOpenposeDetector()
@ -641,3 +635,27 @@ class DWOpenposeImageProcessorInvocation(ImageProcessorInvocation):
resolution=self.image_resolution,
)
return processed_image
@invocation(
"heuristic_resize",
title="Heuristic Resize",
tags=["image, controlnet"],
category="image",
version="1.0.1",
classification=Classification.Prototype,
)
class HeuristicResizeInvocation(BaseInvocation):
"""Resize an image using a heuristic method. Preserves edge maps."""
image: ImageField = InputField(description="The image to resize")
width: int = InputField(default=512, ge=1, description="The width to resize to (px)")
height: int = InputField(default=512, ge=1, description="The height to resize to (px)")
def invoke(self, context: InvocationContext) -> ImageOutput:
image = context.images.get_pil(self.image.image_name, "RGB")
np_img = pil_to_np(image)
np_resized = heuristic_resize(np_img, (self.width, self.height))
resized = np_to_pil(np_resized)
image_dto = context.images.save(image=resized)
return ImageOutput.build(image_dto)

View File

@ -203,6 +203,12 @@ class DenoiseMaskField(BaseModel):
gradient: bool = Field(default=False, description="Used for gradient inpainting")
class TensorField(BaseModel):
"""A tensor primitive field."""
tensor_name: str = Field(description="The name of a tensor.")
class LatentsField(BaseModel):
"""A latents tensor primitive field"""
@ -226,7 +232,11 @@ class ConditioningField(BaseModel):
"""A conditioning tensor primitive value"""
conditioning_name: str = Field(description="The name of conditioning tensor")
# endregion
mask: Optional[TensorField] = Field(
default=None,
description="The mask associated with this conditioning tensor. Excluded regions should be set to False, "
"included regions should be set to True.",
)
class MetadataField(RootModel[dict[str, Any]]):

View File

@ -1,11 +1,11 @@
# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)
from pathlib import Path
from typing import Literal, Optional
from typing import Literal, Optional, List, Union
import cv2
import numpy
from PIL import Image, ImageChops, ImageFilter, ImageOps
from transformers import AutoModelForCausalLM, AutoTokenizer
from invokeai.app.invocations.constants import IMAGE_MODES
from invokeai.app.invocations.fields import (
@ -16,7 +16,7 @@ from invokeai.app.invocations.fields import (
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.invocations.primitives import ImageOutput, CaptionImageOutputs, CaptionImageOutput
from invokeai.app.services.image_records.image_records_common import ImageCategory
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.image_util.invisible_watermark import InvisibleWatermark
@ -67,6 +67,56 @@ class BlankImageInvocation(BaseInvocation, WithMetadata, WithBoard):
return ImageOutput.build(image_dto)
@invocation(
"auto_caption_image",
title="Automatically Caption Image",
tags=["image", "caption"],
category="image",
version="1.2.2",
)
class CaptionImageInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Adds a caption to an image"""
images: Union[ImageField,List[ImageField]] = InputField(description="The image to caption")
prompt: str = InputField(default="Describe this list of images in 20 words or less", description="Describe how you would like the image to be captioned.")
def invoke(self, context: InvocationContext) -> CaptionImageOutputs:
model_id = "vikhyatk/moondream2"
model_revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=model_revision)
moondream_model = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=model_revision
)
output: CaptionImageOutputs = CaptionImageOutputs()
try:
from PIL.Image import Image
images: List[Image] = []
image_fields = self.images if isinstance(self.images, list) else [self.images]
for image in image_fields:
images.append(context.images.get_pil(image.image_name))
answers: List[str] = moondream_model.batch_answer(
images=images,
prompts=[self.prompt] * len(images),
tokenizer=tokenizer,
)
assert isinstance(answers, list)
for i, answer in enumerate(answers):
output.images.append(CaptionImageOutput(
image=image_fields[i],
width=images[i].width,
height=images[i].height,
caption=answer
))
except:
raise
finally:
del moondream_model
del tokenizer
return output
@invocation(
"img_crop",
title="Crop Image",
@ -195,7 +245,7 @@ class ImagePasteInvocation(BaseInvocation, WithMetadata, WithBoard):
class MaskFromAlphaInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Extracts the alpha channel of an image as a mask."""
image: ImageField = InputField(description="The image to create the mask from")
image: List[ImageField] = InputField(description="The image to create the mask from")
invert: bool = InputField(default=False, description="Whether or not to invert the mask")
def invoke(self, context: InvocationContext) -> ImageOutput:
@ -504,7 +554,7 @@ class ImageInverseLerpInvocation(BaseInvocation, WithMetadata, WithBoard):
title="Blur NSFW Image",
tags=["image", "nsfw"],
category="image",
version="1.2.2",
version="1.2.3",
)
class ImageNSFWBlurInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Add blur to NSFW-flagged images"""
@ -516,23 +566,12 @@ class ImageNSFWBlurInvocation(BaseInvocation, WithMetadata, WithBoard):
logger = context.logger
logger.debug("Running NSFW checker")
if SafetyChecker.has_nsfw_concept(image):
logger.info("A potentially NSFW image has been detected. Image will be blurred.")
blurry_image = image.filter(filter=ImageFilter.GaussianBlur(radius=32))
caution = self._get_caution_img()
blurry_image.paste(caution, (0, 0), caution)
image = blurry_image
image = SafetyChecker.blur_if_nsfw(image)
image_dto = context.images.save(image=image)
return ImageOutput.build(image_dto)
def _get_caution_img(self) -> Image.Image:
import invokeai.app.assets.images as image_assets
caution = Image.open(Path(image_assets.__path__[0]) / "caution.png")
return caution.resize((caution.width // 2, caution.height // 2))
@invocation(
"img_watermark",

View File

@ -1,11 +1,11 @@
from builtins import float
from typing import List, Literal, Union
from typing import List, Literal, Optional, Union
from pydantic import BaseModel, Field, field_validator, model_validator
from typing_extensions import Self
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, UIType
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField, TensorField, UIType
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.primitives import ImageField
from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
@ -23,13 +23,19 @@ class IPAdapterField(BaseModel):
image: Union[ImageField, List[ImageField]] = Field(description="The IP-Adapter image prompt(s).")
ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model to use.")
image_encoder_model: ModelIdentifierField = Field(description="The name of the CLIP image encoder model.")
weight: Union[float, List[float]] = Field(default=1, description="The weight given to the ControlNet")
weight: Union[float, List[float]] = Field(default=1, description="The weight given to the IP-Adapter.")
target_blocks: List[str] = Field(default=[], description="The IP Adapter blocks to apply")
begin_step_percent: float = Field(
default=0, ge=0, le=1, description="When the IP-Adapter is first applied (% of total steps)"
)
end_step_percent: float = Field(
default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)"
)
mask: Optional[TensorField] = Field(
default=None,
description="The bool mask associated with this IP-Adapter. Excluded regions should be set to False, included "
"regions should be set to True.",
)
@field_validator("weight")
@classmethod
@ -52,7 +58,7 @@ class IPAdapterOutput(BaseInvocationOutput):
CLIP_VISION_MODEL_MAP = {"ViT-H": "ip_adapter_sd_image_encoder", "ViT-G": "ip_adapter_sdxl_image_encoder"}
@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.2.2")
@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.4.0")
class IPAdapterInvocation(BaseInvocation):
"""Collects IP-Adapter info to pass to other nodes."""
@ -73,12 +79,18 @@ class IPAdapterInvocation(BaseInvocation):
weight: Union[float, List[float]] = InputField(
default=1, description="The weight given to the IP-Adapter", title="Weight"
)
method: Literal["full", "style", "composition"] = InputField(
default="full", description="The method to apply the IP-Adapter"
)
begin_step_percent: float = InputField(
default=0, ge=0, le=1, description="When the IP-Adapter is first applied (% of total steps)"
)
end_step_percent: float = InputField(
default=1, ge=0, le=1, description="When the IP-Adapter is last applied (% of total steps)"
)
mask: Optional[TensorField] = InputField(
default=None, description="A mask defining the region that this IP-Adapter applies to."
)
@field_validator("weight")
@classmethod
@ -104,14 +116,35 @@ class IPAdapterInvocation(BaseInvocation):
image_encoder_model = self._get_image_encoder(context, image_encoder_model_name)
if self.method == "style":
if ip_adapter_info.base == "sd-1":
target_blocks = ["up_blocks.1"]
elif ip_adapter_info.base == "sdxl":
target_blocks = ["up_blocks.0.attentions.1"]
else:
raise ValueError(f"Unsupported IP-Adapter base type: '{ip_adapter_info.base}'.")
elif self.method == "composition":
if ip_adapter_info.base == "sd-1":
target_blocks = ["down_blocks.2", "mid_block"]
elif ip_adapter_info.base == "sdxl":
target_blocks = ["down_blocks.2.attentions.1"]
else:
raise ValueError(f"Unsupported IP-Adapter base type: '{ip_adapter_info.base}'.")
elif self.method == "full":
target_blocks = ["block"]
else:
raise ValueError(f"Unexpected IP-Adapter method: '{self.method}'.")
return IPAdapterOutput(
ip_adapter=IPAdapterField(
image=self.image,
ip_adapter_model=self.ip_adapter_model,
image_encoder_model=ModelIdentifierField.from_config(image_encoder_model),
weight=self.weight,
target_blocks=target_blocks,
begin_step_percent=self.begin_step_percent,
end_step_percent=self.end_step_percent,
mask=self.mask,
),
)

View File

@ -1,16 +1,16 @@
# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
import inspect
import math
from contextlib import ExitStack
from functools import singledispatchmethod
from typing import Any, Iterator, List, Literal, Optional, Tuple, Union
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
import einops
import numpy as np
import numpy.typing as npt
import torch
import torchvision
import torchvision.transforms as T
from diffusers import AutoencoderKL, AutoencoderTiny
from diffusers.configuration_utils import ConfigMixin
from diffusers.image_processor import VaeImageProcessor
from diffusers.models.adapter import T2IAdapter
@ -20,9 +20,12 @@ from diffusers.models.attention_processor import (
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
from diffusers.schedulers import DPMSolverSDEScheduler
from diffusers.schedulers import SchedulerMixin as Scheduler
from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler
from diffusers.schedulers.scheduling_tcd import TCDScheduler
from diffusers.schedulers.scheduling_utils import SchedulerMixin as Scheduler
from PIL import Image, ImageFilter
from pydantic import field_validator
from torchvision.transforms.functional import resize as tv_resize
@ -50,28 +53,34 @@ from invokeai.app.util.controlnet_utils import prepare_control_image
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter, IPAdapterPlus
from invokeai.backend.lora import LoRAModelRaw
from invokeai.backend.model_manager import BaseModelType, LoadedModel
from invokeai.backend.model_manager.config import MainConfigBase, ModelVariantType
from invokeai.backend.model_patcher import ModelPatcher
from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData, IPAdapterConditioningInfo
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
BasicConditioningInfo,
IPAdapterConditioningInfo,
IPAdapterData,
Range,
SDXLConditioningInfo,
TextConditioningData,
TextConditioningRegions,
)
from invokeai.backend.util.mask import to_standard_float_mask
from invokeai.backend.util.silence_warnings import SilenceWarnings
from ...backend.stable_diffusion.diffusers_pipeline import (
ControlNetData,
IPAdapterData,
StableDiffusionGeneratorPipeline,
T2IAdapterData,
image_resized_to_grid_as_tensor,
)
from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
from ...backend.util.devices import choose_precision, choose_torch_device
from ...backend.util.devices import TorchDevice
from .baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
from .controlnet_image_processors import ControlField
from .model import ModelIdentifierField, UNetField, VAEField
if choose_torch_device() == torch.device("mps"):
from torch import mps
DEFAULT_PRECISION = choose_precision(choose_torch_device())
DEFAULT_PRECISION = TorchDevice.choose_torch_dtype()
@invocation_output("scheduler_output")
@ -179,7 +188,7 @@ class GradientMaskOutput(BaseInvocationOutput):
title="Create Gradient Mask",
tags=["mask", "denoise"],
category="latents",
version="1.0.0",
version="1.1.0",
)
class CreateGradientMaskInvocation(BaseInvocation):
"""Creates mask for denoising model run."""
@ -192,6 +201,32 @@ class CreateGradientMaskInvocation(BaseInvocation):
minimum_denoise: float = InputField(
default=0.0, ge=0, le=1, description="Minimum denoise level for the coherence region", ui_order=4
)
image: Optional[ImageField] = InputField(
default=None,
description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE",
title="[OPTIONAL] Image",
ui_order=6,
)
unet: Optional[UNetField] = InputField(
description="OPTIONAL: If the Unet is a specialized Inpainting model, masked_latents will be generated from the image with the VAE",
default=None,
input=Input.Connection,
title="[OPTIONAL] UNet",
ui_order=5,
)
vae: Optional[VAEField] = InputField(
default=None,
description="OPTIONAL: Only connect for specialized Inpainting models, masked_latents will be generated from the image with the VAE",
title="[OPTIONAL] VAE",
input=Input.Connection,
ui_order=7,
)
tiled: bool = InputField(default=False, description=FieldDescriptions.tiled, ui_order=8)
fp32: bool = InputField(
default=DEFAULT_PRECISION == "float32",
description=FieldDescriptions.fp32,
ui_order=9,
)
@torch.no_grad()
def invoke(self, context: InvocationContext) -> GradientMaskOutput:
@ -227,8 +262,27 @@ class CreateGradientMaskInvocation(BaseInvocation):
expanded_mask_image = Image.fromarray((expanded_mask.squeeze(0).numpy() * 255).astype(np.uint8), mode="L")
expanded_image_dto = context.images.save(expanded_mask_image)
masked_latents_name = None
if self.unet is not None and self.vae is not None and self.image is not None:
# all three fields must be present at the same time
main_model_config = context.models.get_config(self.unet.unet.key)
assert isinstance(main_model_config, MainConfigBase)
if main_model_config.variant is ModelVariantType.Inpaint:
mask = blur_tensor
vae_info: LoadedModel = context.models.load(self.vae.vae)
image = context.images.get_pil(self.image.image_name)
image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
if image_tensor.dim() == 3:
image_tensor = image_tensor.unsqueeze(0)
img_mask = tv_resize(mask, image_tensor.shape[-2:], T.InterpolationMode.BILINEAR, antialias=False)
masked_image = image_tensor * torch.where(img_mask < 0.5, 0.0, 1.0)
masked_latents = ImageToLatentsInvocation.vae_encode(
vae_info, self.fp32, self.tiled, masked_image.clone()
)
masked_latents_name = context.tensors.save(tensor=masked_latents)
return GradientMaskOutput(
denoise_mask=DenoiseMaskField(mask_name=mask_name, masked_latents_name=None, gradient=True),
denoise_mask=DenoiseMaskField(mask_name=mask_name, masked_latents_name=masked_latents_name, gradient=True),
expanded_mask_area=ImageField(image_name=expanded_image_dto.image_name),
)
@ -275,10 +329,10 @@ def get_scheduler(
class DenoiseLatentsInvocation(BaseInvocation):
"""Denoises noisy latents to decodable images"""
positive_conditioning: ConditioningField = InputField(
positive_conditioning: Union[ConditioningField, list[ConditioningField]] = InputField(
description=FieldDescriptions.positive_cond, input=Input.Connection, ui_order=0
)
negative_conditioning: ConditioningField = InputField(
negative_conditioning: Union[ConditioningField, list[ConditioningField]] = InputField(
description=FieldDescriptions.negative_cond, input=Input.Connection, ui_order=1
)
noise: Optional[LatentsField] = InputField(
@ -289,7 +343,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
)
steps: int = InputField(default=10, gt=0, description=FieldDescriptions.steps)
cfg_scale: Union[float, List[float]] = InputField(
default=7.5, ge=1, description=FieldDescriptions.cfg_scale, title="CFG Scale"
default=7.5, description=FieldDescriptions.cfg_scale, title="CFG Scale"
)
denoising_start: float = InputField(
default=0.0,
@ -356,33 +410,174 @@ class DenoiseLatentsInvocation(BaseInvocation):
raise ValueError("cfg_scale must be greater than 1")
return v
def _get_text_embeddings_and_masks(
self,
cond_list: list[ConditioningField],
context: InvocationContext,
device: torch.device,
dtype: torch.dtype,
) -> tuple[Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]], list[Optional[torch.Tensor]]]:
"""Get the text embeddings and masks from the input conditioning fields."""
text_embeddings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]] = []
text_embeddings_masks: list[Optional[torch.Tensor]] = []
for cond in cond_list:
cond_data = context.conditioning.load(cond.conditioning_name)
text_embeddings.append(cond_data.conditionings[0].to(device=device, dtype=dtype))
mask = cond.mask
if mask is not None:
mask = context.tensors.load(mask.tensor_name)
text_embeddings_masks.append(mask)
return text_embeddings, text_embeddings_masks
def _preprocess_regional_prompt_mask(
self, mask: Optional[torch.Tensor], target_height: int, target_width: int, dtype: torch.dtype
) -> torch.Tensor:
"""Preprocess a regional prompt mask to match the target height and width.
If mask is None, returns a mask of all ones with the target height and width.
If mask is not None, resizes the mask to the target height and width using 'nearest' interpolation.
Returns:
torch.Tensor: The processed mask. shape: (1, 1, target_height, target_width).
"""
if mask is None:
return torch.ones((1, 1, target_height, target_width), dtype=dtype)
mask = to_standard_float_mask(mask, out_dtype=dtype)
tf = torchvision.transforms.Resize(
(target_height, target_width), interpolation=torchvision.transforms.InterpolationMode.NEAREST
)
# Add a batch dimension to the mask, because torchvision expects shape (batch, channels, h, w).
mask = mask.unsqueeze(0) # Shape: (1, h, w) -> (1, 1, h, w)
resized_mask = tf(mask)
return resized_mask
def _concat_regional_text_embeddings(
self,
text_conditionings: Union[list[BasicConditioningInfo], list[SDXLConditioningInfo]],
masks: Optional[list[Optional[torch.Tensor]]],
latent_height: int,
latent_width: int,
dtype: torch.dtype,
) -> tuple[Union[BasicConditioningInfo, SDXLConditioningInfo], Optional[TextConditioningRegions]]:
"""Concatenate regional text embeddings into a single embedding and track the region masks accordingly."""
if masks is None:
masks = [None] * len(text_conditionings)
assert len(text_conditionings) == len(masks)
is_sdxl = type(text_conditionings[0]) is SDXLConditioningInfo
all_masks_are_none = all(mask is None for mask in masks)
text_embedding = []
pooled_embedding = None
add_time_ids = None
cur_text_embedding_len = 0
processed_masks = []
embedding_ranges = []
for prompt_idx, text_embedding_info in enumerate(text_conditionings):
mask = masks[prompt_idx]
if is_sdxl:
# We choose a random SDXLConditioningInfo's pooled_embeds and add_time_ids here, with a preference for
# prompts without a mask. We prefer prompts without a mask, because they are more likely to contain
# global prompt information. In an ideal case, there should be exactly one global prompt without a
# mask, but we don't enforce this.
# HACK(ryand): The fact that we have to choose a single pooled_embedding and add_time_ids here is a
# fundamental interface issue. The SDXL Compel nodes are not designed to be used in the way that we use
# them for regional prompting. Ideally, the DenoiseLatents invocation should accept a single
# pooled_embeds tensor and a list of standard text embeds with region masks. This change would be a
# pretty major breaking change to a popular node, so for now we use this hack.
if pooled_embedding is None or mask is None:
pooled_embedding = text_embedding_info.pooled_embeds
if add_time_ids is None or mask is None:
add_time_ids = text_embedding_info.add_time_ids
text_embedding.append(text_embedding_info.embeds)
if not all_masks_are_none:
embedding_ranges.append(
Range(
start=cur_text_embedding_len, end=cur_text_embedding_len + text_embedding_info.embeds.shape[1]
)
)
processed_masks.append(
self._preprocess_regional_prompt_mask(mask, latent_height, latent_width, dtype=dtype)
)
cur_text_embedding_len += text_embedding_info.embeds.shape[1]
text_embedding = torch.cat(text_embedding, dim=1)
assert len(text_embedding.shape) == 3 # batch_size, seq_len, token_len
regions = None
if not all_masks_are_none:
regions = TextConditioningRegions(
masks=torch.cat(processed_masks, dim=1),
ranges=embedding_ranges,
)
if is_sdxl:
return (
SDXLConditioningInfo(embeds=text_embedding, pooled_embeds=pooled_embedding, add_time_ids=add_time_ids),
regions,
)
return BasicConditioningInfo(embeds=text_embedding), regions
def get_conditioning_data(
self,
context: InvocationContext,
scheduler: Scheduler,
unet: UNet2DConditionModel,
seed: int,
) -> ConditioningData:
positive_cond_data = context.conditioning.load(self.positive_conditioning.conditioning_name)
c = positive_cond_data.conditionings[0].to(device=unet.device, dtype=unet.dtype)
latent_height: int,
latent_width: int,
) -> TextConditioningData:
# Normalize self.positive_conditioning and self.negative_conditioning to lists.
cond_list = self.positive_conditioning
if not isinstance(cond_list, list):
cond_list = [cond_list]
uncond_list = self.negative_conditioning
if not isinstance(uncond_list, list):
uncond_list = [uncond_list]
negative_cond_data = context.conditioning.load(self.negative_conditioning.conditioning_name)
uc = negative_cond_data.conditionings[0].to(device=unet.device, dtype=unet.dtype)
conditioning_data = ConditioningData(
unconditioned_embeddings=uc,
text_embeddings=c,
guidance_scale=self.cfg_scale,
guidance_rescale_multiplier=self.cfg_rescale_multiplier,
cond_text_embeddings, cond_text_embedding_masks = self._get_text_embeddings_and_masks(
cond_list, context, unet.device, unet.dtype
)
uncond_text_embeddings, uncond_text_embedding_masks = self._get_text_embeddings_and_masks(
uncond_list, context, unet.device, unet.dtype
)
conditioning_data = conditioning_data.add_scheduler_args_if_applicable( # FIXME
scheduler,
# for ddim scheduler
eta=0.0, # ddim_eta
# for ancestral and sde schedulers
# flip all bits to have noise different from initial
generator=torch.Generator(device=unet.device).manual_seed(seed ^ 0xFFFFFFFF),
cond_text_embedding, cond_regions = self._concat_regional_text_embeddings(
text_conditionings=cond_text_embeddings,
masks=cond_text_embedding_masks,
latent_height=latent_height,
latent_width=latent_width,
dtype=unet.dtype,
)
uncond_text_embedding, uncond_regions = self._concat_regional_text_embeddings(
text_conditionings=uncond_text_embeddings,
masks=uncond_text_embedding_masks,
latent_height=latent_height,
latent_width=latent_width,
dtype=unet.dtype,
)
if isinstance(self.cfg_scale, list):
assert (
len(self.cfg_scale) == self.steps
), "cfg_scale (list) must have the same length as the number of steps"
conditioning_data = TextConditioningData(
uncond_text=uncond_text_embedding,
cond_text=cond_text_embedding,
uncond_regions=uncond_regions,
cond_regions=cond_regions,
guidance_scale=self.cfg_scale,
guidance_rescale_multiplier=self.cfg_rescale_multiplier,
)
return conditioning_data
@ -391,13 +586,6 @@ class DenoiseLatentsInvocation(BaseInvocation):
unet: UNet2DConditionModel,
scheduler: Scheduler,
) -> StableDiffusionGeneratorPipeline:
# TODO:
# configure_model_padding(
# unet,
# self.seamless,
# self.seamless_axes,
# )
class FakeVae:
class FakeVaeConfig:
def __init__(self) -> None:
@ -488,8 +676,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
self,
context: InvocationContext,
ip_adapter: Optional[Union[IPAdapterField, list[IPAdapterField]]],
conditioning_data: ConditioningData,
exit_stack: ExitStack,
latent_height: int,
latent_width: int,
dtype: torch.dtype,
) -> Optional[list[IPAdapterData]]:
"""If IP-Adapter is enabled, then this function loads the requisite models, and adds the image prompt embeddings
to the `conditioning_data` (in-place).
@ -505,7 +695,6 @@ class DenoiseLatentsInvocation(BaseInvocation):
return None
ip_adapter_data_list = []
conditioning_data.ip_adapter_conditioning = []
for single_ip_adapter in ip_adapter:
ip_adapter_model: Union[IPAdapter, IPAdapterPlus] = exit_stack.enter_context(
context.models.load(single_ip_adapter.ip_adapter_model)
@ -528,16 +717,20 @@ class DenoiseLatentsInvocation(BaseInvocation):
single_ipa_images, image_encoder_model
)
conditioning_data.ip_adapter_conditioning.append(
IPAdapterConditioningInfo(image_prompt_embeds, uncond_image_prompt_embeds)
)
mask = single_ip_adapter.mask
if mask is not None:
mask = context.tensors.load(mask.tensor_name)
mask = self._preprocess_regional_prompt_mask(mask, latent_height, latent_width, dtype=dtype)
ip_adapter_data_list.append(
IPAdapterData(
ip_adapter_model=ip_adapter_model,
weight=single_ip_adapter.weight,
target_blocks=single_ip_adapter.target_blocks,
begin_step_percent=single_ip_adapter.begin_step_percent,
end_step_percent=single_ip_adapter.end_step_percent,
ip_adapter_conditioning=IPAdapterConditioningInfo(image_prompt_embeds, uncond_image_prompt_embeds),
mask=mask,
)
)
@ -627,7 +820,8 @@ class DenoiseLatentsInvocation(BaseInvocation):
steps: int,
denoising_start: float,
denoising_end: float,
) -> Tuple[int, List[int], int]:
seed: int,
) -> Tuple[int, List[int], int, Dict[str, Any]]:
assert isinstance(scheduler, ConfigMixin)
if scheduler.config.get("cpu_only", False):
scheduler.set_timesteps(steps, device="cpu")
@ -655,7 +849,17 @@ class DenoiseLatentsInvocation(BaseInvocation):
timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx]
num_inference_steps = len(timesteps) // scheduler.order
return num_inference_steps, timesteps, init_timestep
scheduler_step_kwargs: Dict[str, Any] = {}
scheduler_step_signature = inspect.signature(scheduler.step)
if "generator" in scheduler_step_signature.parameters:
# At some point, someone decided that schedulers that accept a generator should use the original seed with
# all bits flipped. I don't know the original rationale for this, but now we must keep it like this for
# reproducibility.
scheduler_step_kwargs.update({"generator": torch.Generator(device=device).manual_seed(seed ^ 0xFFFFFFFF)})
if isinstance(scheduler, TCDScheduler):
scheduler_step_kwargs.update({"eta": 1.0})
return num_inference_steps, timesteps, init_timestep, scheduler_step_kwargs
def prep_inpaint_mask(
self, context: InvocationContext, latents: torch.Tensor
@ -749,7 +953,11 @@ class DenoiseLatentsInvocation(BaseInvocation):
)
pipeline = self.create_pipeline(unet, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler, unet, seed)
_, _, latent_height, latent_width = latents.shape
conditioning_data = self.get_conditioning_data(
context=context, unet=unet, latent_height=latent_height, latent_width=latent_width
)
controlnet_data = self.prep_control_data(
context=context,
@ -763,16 +971,19 @@ class DenoiseLatentsInvocation(BaseInvocation):
ip_adapter_data = self.prep_ip_adapter_data(
context=context,
ip_adapter=self.ip_adapter,
conditioning_data=conditioning_data,
exit_stack=exit_stack,
latent_height=latent_height,
latent_width=latent_width,
dtype=unet.dtype,
)
num_inference_steps, timesteps, init_timestep = self.init_scheduler(
num_inference_steps, timesteps, init_timestep, scheduler_step_kwargs = self.init_scheduler(
scheduler,
device=unet.device,
steps=self.steps,
denoising_start=self.denoising_start,
denoising_end=self.denoising_end,
seed=seed,
)
result_latents = pipeline.latents_from_embeddings(
@ -785,6 +996,7 @@ class DenoiseLatentsInvocation(BaseInvocation):
masked_latents=masked_latents,
gradient_mask=gradient_mask,
num_inference_steps=num_inference_steps,
scheduler_step_kwargs=scheduler_step_kwargs,
conditioning_data=conditioning_data,
control_data=controlnet_data,
ip_adapter_data=ip_adapter_data,
@ -794,12 +1006,10 @@ class DenoiseLatentsInvocation(BaseInvocation):
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
name = context.tensors.save(tensor=result_latents)
return LatentsOutput.build(latents_name=name, latents=result_latents, seed=seed)
return LatentsOutput.build(latents_name=name, latents=result_latents, seed=None)
@invocation(
@ -863,9 +1073,7 @@ class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
vae.disable_tiling()
# clear memory as vae decode can request a lot
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
with torch.inference_mode():
# copied from diffusers pipeline
@ -877,9 +1085,7 @@ class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
image = VaeImageProcessor.numpy_to_pil(np_image)[0]
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
image_dto = context.images.save(image=image)
@ -918,9 +1124,7 @@ class ResizeLatentsInvocation(BaseInvocation):
def invoke(self, context: InvocationContext) -> LatentsOutput:
latents = context.tensors.load(self.latents.latents_name)
# TODO:
device = choose_torch_device()
device = TorchDevice.choose_torch_device()
resized_latents = torch.nn.functional.interpolate(
latents.to(device),
@ -931,9 +1135,8 @@ class ResizeLatentsInvocation(BaseInvocation):
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
resized_latents = resized_latents.to("cpu")
torch.cuda.empty_cache()
if device == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
name = context.tensors.save(tensor=resized_latents)
return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed)
@ -960,8 +1163,7 @@ class ScaleLatentsInvocation(BaseInvocation):
def invoke(self, context: InvocationContext) -> LatentsOutput:
latents = context.tensors.load(self.latents.latents_name)
# TODO:
device = choose_torch_device()
device = TorchDevice.choose_torch_device()
# resizing
resized_latents = torch.nn.functional.interpolate(
@ -973,9 +1175,7 @@ class ScaleLatentsInvocation(BaseInvocation):
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
resized_latents = resized_latents.to("cpu")
torch.cuda.empty_cache()
if device == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
name = context.tensors.save(tensor=resized_latents)
return LatentsOutput.build(latents_name=name, latents=resized_latents, seed=self.latents.seed)
@ -1107,8 +1307,7 @@ class BlendLatentsInvocation(BaseInvocation):
if latents_a.shape != latents_b.shape:
raise Exception("Latents to blend must be the same size.")
# TODO:
device = choose_torch_device()
device = TorchDevice.choose_torch_device()
def slerp(
t: Union[float, npt.NDArray[Any]], # FIXME: maybe use np.float32 here?
@ -1161,9 +1360,8 @@ class BlendLatentsInvocation(BaseInvocation):
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
blended_latents = blended_latents.to("cpu")
torch.cuda.empty_cache()
if device == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
name = context.tensors.save(tensor=blended_latents)
return LatentsOutput.build(latents_name=name, latents=blended_latents)

View File

@ -0,0 +1,120 @@
import numpy as np
import torch
from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, InvocationContext, invocation
from invokeai.app.invocations.fields import ImageField, InputField, TensorField, WithMetadata
from invokeai.app.invocations.primitives import MaskOutput
@invocation(
"rectangle_mask",
title="Create Rectangle Mask",
tags=["conditioning"],
category="conditioning",
version="1.0.1",
)
class RectangleMaskInvocation(BaseInvocation, WithMetadata):
"""Create a rectangular mask."""
width: int = InputField(description="The width of the entire mask.")
height: int = InputField(description="The height of the entire mask.")
x_left: int = InputField(description="The left x-coordinate of the rectangular masked region (inclusive).")
y_top: int = InputField(description="The top y-coordinate of the rectangular masked region (inclusive).")
rectangle_width: int = InputField(description="The width of the rectangular masked region.")
rectangle_height: int = InputField(description="The height of the rectangular masked region.")
def invoke(self, context: InvocationContext) -> MaskOutput:
mask = torch.zeros((1, self.height, self.width), dtype=torch.bool)
mask[:, self.y_top : self.y_top + self.rectangle_height, self.x_left : self.x_left + self.rectangle_width] = (
True
)
mask_tensor_name = context.tensors.save(mask)
return MaskOutput(
mask=TensorField(tensor_name=mask_tensor_name),
width=self.width,
height=self.height,
)
@invocation(
"alpha_mask_to_tensor",
title="Alpha Mask to Tensor",
tags=["conditioning"],
category="conditioning",
version="1.0.0",
classification=Classification.Beta,
)
class AlphaMaskToTensorInvocation(BaseInvocation):
"""Convert a mask image to a tensor. Opaque regions are 1 and transparent regions are 0."""
image: ImageField = InputField(description="The mask image to convert.")
invert: bool = InputField(default=False, description="Whether to invert the mask.")
def invoke(self, context: InvocationContext) -> MaskOutput:
image = context.images.get_pil(self.image.image_name)
mask = torch.zeros((1, image.height, image.width), dtype=torch.bool)
if self.invert:
mask[0] = torch.tensor(np.array(image)[:, :, 3] == 0, dtype=torch.bool)
else:
mask[0] = torch.tensor(np.array(image)[:, :, 3] > 0, dtype=torch.bool)
return MaskOutput(
mask=TensorField(tensor_name=context.tensors.save(mask)),
height=mask.shape[1],
width=mask.shape[2],
)
@invocation(
"invert_tensor_mask",
title="Invert Tensor Mask",
tags=["conditioning"],
category="conditioning",
version="1.0.0",
classification=Classification.Beta,
)
class InvertTensorMaskInvocation(BaseInvocation):
"""Inverts a tensor mask."""
mask: TensorField = InputField(description="The tensor mask to convert.")
def invoke(self, context: InvocationContext) -> MaskOutput:
mask = context.tensors.load(self.mask.tensor_name)
inverted = ~mask
return MaskOutput(
mask=TensorField(tensor_name=context.tensors.save(inverted)),
height=inverted.shape[1],
width=inverted.shape[2],
)
@invocation(
"image_mask_to_tensor",
title="Image Mask to Tensor",
tags=["conditioning"],
category="conditioning",
version="1.0.0",
)
class ImageMaskToTensorInvocation(BaseInvocation, WithMetadata):
"""Convert a mask image to a tensor. Converts the image to grayscale and uses thresholding at the specified value."""
image: ImageField = InputField(description="The mask image to convert.")
cutoff: int = InputField(ge=0, le=255, description="Cutoff (<)", default=128)
invert: bool = InputField(default=False, description="Whether to invert the mask.")
def invoke(self, context: InvocationContext) -> MaskOutput:
image = context.images.get_pil(self.image.image_name, mode="L")
mask = torch.zeros((1, image.height, image.width), dtype=torch.bool)
if self.invert:
mask[0] = torch.tensor(np.array(image)[:, :] >= self.cutoff, dtype=torch.bool)
else:
mask[0] = torch.tensor(np.array(image)[:, :] < self.cutoff, dtype=torch.bool)
return MaskOutput(
mask=TensorField(tensor_name=context.tensors.save(mask)),
height=mask.shape[1],
width=mask.shape[2],
)

View File

@ -3,7 +3,6 @@ from typing import Any, Literal, Optional, Union
from pydantic import BaseModel, ConfigDict, Field
from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
from invokeai.app.invocations.controlnet_image_processors import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES
from invokeai.app.invocations.fields import (
FieldDescriptions,
ImageField,
@ -14,6 +13,7 @@ from invokeai.app.invocations.fields import (
)
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.controlnet_utils import CONTROLNET_MODE_VALUES, CONTROLNET_RESIZE_VALUES
from ...version import __version__
@ -36,6 +36,7 @@ class IPAdapterMetadataField(BaseModel):
image: ImageField = Field(description="The IP-Adapter image prompt.")
ip_adapter_model: ModelIdentifierField = Field(description="The IP-Adapter model.")
clip_vision_model: Literal["ViT-H", "ViT-G"] = Field(description="The CLIP Vision model")
method: Literal["full", "style", "composition"] = Field(description="Method to apply IP Weights with")
weight: Union[float, list[float]] = Field(description="The weight given to the IP-Adapter")
begin_step_percent: float = Field(description="When the IP-Adapter is first applied (% of total steps)")
end_step_percent: float = Field(description="When the IP-Adapter is last applied (% of total steps)")

View File

@ -190,6 +190,75 @@ class LoRALoaderInvocation(BaseInvocation):
return output
@invocation_output("lora_selector_output")
class LoRASelectorOutput(BaseInvocationOutput):
"""Model loader output"""
lora: LoRAField = OutputField(description="LoRA model and weight", title="LoRA")
@invocation("lora_selector", title="LoRA Selector", tags=["model"], category="model", version="1.0.0")
class LoRASelectorInvocation(BaseInvocation):
"""Selects a LoRA model and weight."""
lora: ModelIdentifierField = InputField(
description=FieldDescriptions.lora_model, input=Input.Direct, title="LoRA", ui_type=UIType.LoRAModel
)
weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight)
def invoke(self, context: InvocationContext) -> LoRASelectorOutput:
return LoRASelectorOutput(lora=LoRAField(lora=self.lora, weight=self.weight))
@invocation("lora_collection_loader", title="LoRA Collection Loader", tags=["model"], category="model", version="1.0.0")
class LoRACollectionLoader(BaseInvocation):
"""Applies a collection of LoRAs to the provided UNet and CLIP models."""
loras: LoRAField | list[LoRAField] = InputField(
description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs"
)
unet: Optional[UNetField] = InputField(
default=None,
description=FieldDescriptions.unet,
input=Input.Connection,
title="UNet",
)
clip: Optional[CLIPField] = InputField(
default=None,
description=FieldDescriptions.clip,
input=Input.Connection,
title="CLIP",
)
def invoke(self, context: InvocationContext) -> LoRALoaderOutput:
output = LoRALoaderOutput()
loras = self.loras if isinstance(self.loras, list) else [self.loras]
added_loras: list[str] = []
for lora in loras:
if lora.lora.key in added_loras:
continue
if not context.models.exists(lora.lora.key):
raise Exception(f"Unknown lora: {lora.lora.key}!")
assert lora.lora.base in (BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2)
added_loras.append(lora.lora.key)
if self.unet is not None:
if output.unet is None:
output.unet = self.unet.model_copy(deep=True)
output.unet.loras.append(lora)
if self.clip is not None:
if output.clip is None:
output.clip = self.clip.model_copy(deep=True)
output.clip.loras.append(lora)
return output
@invocation_output("sdxl_lora_loader_output")
class SDXLLoRALoaderOutput(BaseInvocationOutput):
"""SDXL LoRA Loader Output"""
@ -279,6 +348,72 @@ class SDXLLoRALoaderInvocation(BaseInvocation):
return output
@invocation(
"sdxl_lora_collection_loader",
title="SDXL LoRA Collection Loader",
tags=["model"],
category="model",
version="1.0.0",
)
class SDXLLoRACollectionLoader(BaseInvocation):
"""Applies a collection of SDXL LoRAs to the provided UNet and CLIP models."""
loras: LoRAField | list[LoRAField] = InputField(
description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs"
)
unet: Optional[UNetField] = InputField(
default=None,
description=FieldDescriptions.unet,
input=Input.Connection,
title="UNet",
)
clip: Optional[CLIPField] = InputField(
default=None,
description=FieldDescriptions.clip,
input=Input.Connection,
title="CLIP",
)
clip2: Optional[CLIPField] = InputField(
default=None,
description=FieldDescriptions.clip,
input=Input.Connection,
title="CLIP 2",
)
def invoke(self, context: InvocationContext) -> SDXLLoRALoaderOutput:
output = SDXLLoRALoaderOutput()
loras = self.loras if isinstance(self.loras, list) else [self.loras]
added_loras: list[str] = []
for lora in loras:
if lora.lora.key in added_loras:
continue
if not context.models.exists(lora.lora.key):
raise Exception(f"Unknown lora: {lora.lora.key}!")
assert lora.lora.base is BaseModelType.StableDiffusionXL
added_loras.append(lora.lora.key)
if self.unet is not None:
if output.unet is None:
output.unet = self.unet.model_copy(deep=True)
output.unet.loras.append(lora)
if self.clip is not None:
if output.clip is None:
output.clip = self.clip.model_copy(deep=True)
output.clip.loras.append(lora)
if self.clip2 is not None:
if output.clip2 is None:
output.clip2 = self.clip2.model_copy(deep=True)
output.clip2.loras.append(lora)
return output
@invocation("vae_loader", title="VAE", tags=["vae", "model"], category="model", version="1.0.2")
class VAELoaderInvocation(BaseInvocation):
"""Loads a VAE model, outputting a VaeLoaderOutput"""

View File

@ -9,7 +9,7 @@ from invokeai.app.invocations.fields import FieldDescriptions, InputField, Laten
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.misc import SEED_MAX
from ...backend.util.devices import choose_torch_device, torch_dtype
from ...backend.util.devices import TorchDevice
from .baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
@ -46,7 +46,7 @@ def get_noise(
height // downsampling_factor,
width // downsampling_factor,
],
dtype=torch_dtype(device),
dtype=TorchDevice.choose_torch_dtype(device=device),
device=noise_device_type,
generator=generator,
).to("cpu")
@ -111,14 +111,14 @@ class NoiseInvocation(BaseInvocation):
@field_validator("seed", mode="before")
def modulo_seed(cls, v):
"""Returns the seed modulo (SEED_MAX + 1) to ensure it is within the valid range."""
"""Return the seed modulo (SEED_MAX + 1) to ensure it is within the valid range."""
return v % (SEED_MAX + 1)
def invoke(self, context: InvocationContext) -> NoiseOutput:
noise = get_noise(
width=self.width,
height=self.height,
device=choose_torch_device(),
device=TorchDevice.choose_torch_device(),
seed=self.seed,
use_cpu=self.use_cpu,
)

View File

@ -1,6 +1,6 @@
# Copyright (c) 2023 Kyle Schouviller (https://github.com/kyle0654)
from typing import Optional
from typing import Optional, List
import torch
@ -15,6 +15,7 @@ from invokeai.app.invocations.fields import (
InputField,
LatentsField,
OutputField,
TensorField,
UIComponent,
)
from invokeai.app.services.images.images_common import ImageDTO
@ -246,6 +247,17 @@ class ImageOutput(BaseInvocationOutput):
)
@invocation_output("captioned_image_output")
class CaptionImageOutput(ImageOutput):
caption: str = OutputField(description="Caption for given image")
@invocation_output("captioned_image_outputs")
class CaptionImageOutputs(BaseInvocationOutput):
images: List[CaptionImageOutput] = OutputField(description="List of captioned images", default=[])
@invocation_output("image_collection_output")
class ImageCollectionOutput(BaseInvocationOutput):
"""Base class for nodes that output a collection of images"""
@ -405,9 +417,19 @@ class ColorInvocation(BaseInvocation):
# endregion
# region Conditioning
@invocation_output("mask_output")
class MaskOutput(BaseInvocationOutput):
"""A torch mask tensor."""
mask: TensorField = OutputField(description="The mask.")
width: int = OutputField(description="The width of the mask in pixels.")
height: int = OutputField(description="The height of the mask in pixels.")
@invocation_output("conditioning_output")
class ConditioningOutput(BaseInvocationOutput):
"""Base class for nodes that output a single conditioning tensor"""

View File

@ -8,11 +8,11 @@ from invokeai.app.invocations.baseinvocation import (
invocation,
invocation_output,
)
from invokeai.app.invocations.controlnet_image_processors import CONTROLNET_RESIZE_VALUES
from invokeai.app.invocations.fields import FieldDescriptions, ImageField, Input, InputField, OutputField, UIType
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.util import validate_begin_end_step, validate_weights
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.controlnet_utils import CONTROLNET_RESIZE_VALUES
class T2IAdapterField(BaseModel):

View File

@ -4,7 +4,6 @@ from typing import Literal
import cv2
import numpy as np
import torch
from PIL import Image
from pydantic import ConfigDict
@ -14,7 +13,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.download_with_progress import download_with_progress_bar
from invokeai.backend.image_util.basicsr.rrdbnet_arch import RRDBNet
from invokeai.backend.image_util.realesrgan.realesrgan import RealESRGAN
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
from .baseinvocation import BaseInvocation, invocation
from .fields import InputField, WithBoard, WithMetadata
@ -35,9 +34,6 @@ ESRGAN_MODEL_URLS: dict[str, str] = {
"RealESRGAN_x2plus.pth": "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth",
}
if choose_torch_device() == torch.device("mps"):
from torch import mps
@invocation("esrgan", title="Upscale (RealESRGAN)", tags=["esrgan", "upscale"], category="esrgan", version="1.3.2")
class ESRGANInvocation(BaseInvocation, WithMetadata, WithBoard):
@ -120,9 +116,7 @@ class ESRGANInvocation(BaseInvocation, WithMetadata, WithBoard):
upscaled_image = upscaler.upscale(cv2_image)
pil_image = Image.fromarray(cv2.cvtColor(upscaled_image, cv2.COLOR_BGR2RGB)).convert("RGBA")
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
image_dto = context.images.save(image=pil_image)

View File

@ -27,12 +27,12 @@ DEFAULT_RAM_CACHE = 10.0
DEFAULT_VRAM_CACHE = 0.25
DEFAULT_CONVERT_CACHE = 20.0
DEVICE = Literal["auto", "cpu", "cuda", "cuda:1", "mps"]
PRECISION = Literal["auto", "float16", "bfloat16", "float32", "autocast"]
PRECISION = Literal["auto", "float16", "bfloat16", "float32"]
ATTENTION_TYPE = Literal["auto", "normal", "xformers", "sliced", "torch-sdp"]
ATTENTION_SLICE_SIZE = Literal["auto", "balanced", "max", 1, 2, 3, 4, 5, 6, 7, 8]
LOG_FORMAT = Literal["plain", "color", "syslog", "legacy"]
LOG_LEVEL = Literal["debug", "info", "warning", "error", "critical"]
CONFIG_SCHEMA_VERSION = "4.0.0"
CONFIG_SCHEMA_VERSION = "4.0.1"
def get_default_ram_cache_size() -> float:
@ -105,7 +105,7 @@ class InvokeAIAppConfig(BaseSettings):
lazy_offload: Keep models in VRAM until their space is needed.
log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
device: Preferred execution device. `auto` will choose the device depending on the hardware platform and the installed torch capabilities.<br>Valid values: `auto`, `cpu`, `cuda`, `cuda:1`, `mps`
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`, `autocast`
precision: Floating point precision. `float16` will consume half the memory of `float32` but produce slightly lower-quality images. The `auto` setting will guess the proper precision based on your video card and operating system.<br>Valid values: `auto`, `float16`, `bfloat16`, `float32`
sequential_guidance: Whether to calculate guidance in serial instead of in parallel, lowering memory requirements.
attention_type: Attention type.<br>Valid values: `auto`, `normal`, `xformers`, `sliced`, `torch-sdp`
attention_slice_size: Slice size, valid when attention_type=="sliced".<br>Valid values: `auto`, `balanced`, `max`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`
@ -370,6 +370,9 @@ def migrate_v3_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
# `max_vram_cache_size` was renamed to `vram` some time in v3, but both names were used
if k == "max_vram_cache_size" and "vram" not in category_dict:
parsed_config_dict["vram"] = v
# autocast was removed in v4.0.1
if k == "precision" and v == "autocast":
parsed_config_dict["precision"] = "auto"
if k == "conf_path":
parsed_config_dict["legacy_models_yaml_path"] = v
if k == "legacy_conf_dir":
@ -392,6 +395,28 @@ def migrate_v3_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
return config
def migrate_v4_0_0_config_dict(config_dict: dict[str, Any]) -> InvokeAIAppConfig:
"""Migrate v4.0.0 config dictionary to a current config object.
Args:
config_dict: A dictionary of settings from a v4.0.0 config file.
Returns:
An instance of `InvokeAIAppConfig` with the migrated settings.
"""
parsed_config_dict: dict[str, Any] = {}
for k, v in config_dict.items():
# autocast was removed from precision in v4.0.1
if k == "precision" and v == "autocast":
parsed_config_dict["precision"] = "auto"
else:
parsed_config_dict[k] = v
if k == "schema_version":
parsed_config_dict[k] = CONFIG_SCHEMA_VERSION
config = DefaultInvokeAIAppConfig.model_validate(parsed_config_dict)
return config
def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
"""Load and migrate a config file to the latest version.
@ -418,17 +443,21 @@ def load_and_migrate_config(config_path: Path) -> InvokeAIAppConfig:
raise RuntimeError(f"Failed to load and migrate v3 config file {config_path}: {e}") from e
migrated_config.write_file(config_path)
return migrated_config
else:
# Attempt to load as a v4 config file
try:
# Meta is not included in the model fields, so we need to validate it separately
config = InvokeAIAppConfig.model_validate(loaded_config_dict)
assert (
config.schema_version == CONFIG_SCHEMA_VERSION
), f"Invalid schema version, expected {CONFIG_SCHEMA_VERSION}: {config.schema_version}"
return config
except Exception as e:
raise RuntimeError(f"Failed to load config file {config_path}: {e}") from e
if loaded_config_dict["schema_version"] == "4.0.0":
loaded_config_dict = migrate_v4_0_0_config_dict(loaded_config_dict)
loaded_config_dict.write_file(config_path)
# Attempt to load as a v4 config file
try:
# Meta is not included in the model fields, so we need to validate it separately
config = InvokeAIAppConfig.model_validate(loaded_config_dict)
assert (
config.schema_version == CONFIG_SCHEMA_VERSION
), f"Invalid schema version, expected {CONFIG_SCHEMA_VERSION}: {config.schema_version}"
return config
except Exception as e:
raise RuntimeError(f"Failed to load config file {config_path}: {e}") from e
@lru_cache(maxsize=1)

View File

@ -318,10 +318,8 @@ class DownloadQueueService(DownloadQueueServiceBase):
in_progress_path.rename(job.download_path)
def _validate_filename(self, directory: str, filename: str) -> bool:
pc_name_max = os.pathconf(directory, "PC_NAME_MAX") if hasattr(os, "pathconf") else 260 # hardcoded for windows
pc_path_max = (
os.pathconf(directory, "PC_PATH_MAX") if hasattr(os, "pathconf") else 32767
) # hardcoded for windows with long names enabled
pc_name_max = get_pc_name_max(directory)
pc_path_max = get_pc_path_max(directory)
if "/" in filename:
return False
if filename.startswith(".."):
@ -419,6 +417,26 @@ class DownloadQueueService(DownloadQueueServiceBase):
self._logger.warning(excp)
def get_pc_name_max(directory: str) -> int:
if hasattr(os, "pathconf"):
try:
return os.pathconf(directory, "PC_NAME_MAX")
except OSError:
# macOS w/ external drives raise OSError
pass
return 260 # hardcoded for windows
def get_pc_path_max(directory: str) -> int:
if hasattr(os, "pathconf"):
try:
return os.pathconf(directory, "PC_PATH_MAX")
except OSError:
# some platforms may not have this value
pass
return 32767 # hardcoded for windows with long names enabled
# Example on_progress event handler to display a TQDM status bar
# Activate with:
# download_service.download(DownloadJob('http://foo.bar/baz', '/tmp', on_progress=TqdmProgress().update))

View File

@ -3,7 +3,6 @@
import locale
import os
import re
import signal
import threading
import time
from hashlib import sha256
@ -13,6 +12,7 @@ from shutil import copyfile, copytree, move, rmtree
from tempfile import mkdtemp
from typing import Any, Dict, List, Optional, Union
import torch
import yaml
from huggingface_hub import HfFolder
from pydantic.networks import AnyHttpUrl
@ -42,7 +42,8 @@ from invokeai.backend.model_manager.metadata.metadata_base import HuggingFaceMet
from invokeai.backend.model_manager.probe import ModelProbe
from invokeai.backend.model_manager.search import ModelSearch
from invokeai.backend.util import InvokeAILogger
from invokeai.backend.util.devices import choose_precision, choose_torch_device
from invokeai.backend.util.catch_sigint import catch_sigint
from invokeai.backend.util.devices import TorchDevice
from .model_install_base import (
MODEL_SOURCE_TO_TYPE_MAP,
@ -111,17 +112,6 @@ class ModelInstallService(ModelInstallServiceBase):
def start(self, invoker: Optional[Invoker] = None) -> None:
"""Start the installer thread."""
# Yes, this is weird. When the installer thread is running, the
# thread masks the ^C signal. When we receive a
# sigINT, we stop the thread, reset sigINT, and send a new
# sigINT to the parent process.
def sigint_handler(signum, frame):
self.stop()
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.raise_signal(signal.SIGINT)
signal.signal(signal.SIGINT, sigint_handler)
with self._lock:
if self._running:
raise Exception("Attempt to start the installer service twice")
@ -131,7 +121,8 @@ class ModelInstallService(ModelInstallServiceBase):
# In normal use, we do not want to scan the models directory - it should never have orphaned models.
# We should only do the scan when the flag is set (which should only be set when testing).
if self.app_config.scan_models_on_startup:
self._register_orphaned_models()
with catch_sigint():
self._register_orphaned_models()
# Check all models' paths and confirm they exist. A model could be missing if it was installed on a volume
# that isn't currently mounted. In this case, we don't want to delete the model from the database, but we do
@ -634,11 +625,10 @@ class ModelInstallService(ModelInstallServiceBase):
self._next_job_id += 1
return id
@staticmethod
def _guess_variant() -> Optional[ModelRepoVariant]:
def _guess_variant(self) -> Optional[ModelRepoVariant]:
"""Guess the best HuggingFace variant type to download."""
precision = choose_precision(choose_torch_device())
return ModelRepoVariant.FP16 if precision == "float16" else None
precision = TorchDevice.choose_torch_dtype()
return ModelRepoVariant.FP16 if precision == torch.float16 else None
def _import_local_model(self, source: LocalModelSource, config: Optional[Dict[str, Any]]) -> ModelInstallJob:
return ModelInstallJob(
@ -754,6 +744,8 @@ class ModelInstallService(ModelInstallServiceBase):
self._download_cache[download_job.source] = install_job # matches a download job to an install job
install_job.download_parts.add(download_job)
# only start the jobs once install_job.download_parts is fully populated
for download_job in install_job.download_parts:
self._download_queue.submit_download_job(
download_job,
on_start=self._download_started_callback,
@ -762,6 +754,7 @@ class ModelInstallService(ModelInstallServiceBase):
on_error=self._download_error_callback,
on_cancelled=self._download_cancelled_callback,
)
return install_job
def _stat_size(self, path: Path) -> int:

View File

@ -1,12 +1,14 @@
# Copyright (c) 2023 Lincoln D. Stein and the InvokeAI Team
"""Implementation of ModelManagerServiceBase."""
from typing import Optional
import torch
from typing_extensions import Self
from invokeai.app.services.invoker import Invoker
from invokeai.backend.model_manager.load import ModelCache, ModelConvertCache, ModelLoaderRegistry
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.logging import InvokeAILogger
from ..config import InvokeAIAppConfig
@ -67,7 +69,7 @@ class ModelManagerService(ModelManagerServiceBase):
model_record_service: ModelRecordServiceBase,
download_queue: DownloadQueueServiceBase,
events: EventServiceBase,
execution_device: torch.device = choose_torch_device(),
execution_device: Optional[torch.device] = None,
) -> Self:
"""
Construct the model manager service instance.
@ -82,7 +84,7 @@ class ModelManagerService(ModelManagerServiceBase):
max_vram_cache_size=app_config.vram,
lazy_offloading=app_config.lazy_offload,
logger=logger,
execution_device=execution_device,
execution_device=execution_device or TorchDevice.choose_torch_device(),
)
convert_cache = ModelConvertCache(cache_path=app_config.convert_cache_path, max_size=app_config.convert_cache)
loader = ModelLoadService(

View File

@ -1,6 +1,6 @@
import shutil
import tempfile
import typing
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Optional, TypeVar
@ -17,12 +17,6 @@ if TYPE_CHECKING:
T = TypeVar("T")
@dataclass
class DeleteAllResult:
deleted_count: int
freed_space_bytes: float
class ObjectSerializerDisk(ObjectSerializerBase[T]):
"""Disk-backed storage for arbitrary python objects. Serialization is handled by `torch.save` and `torch.load`.
@ -35,6 +29,12 @@ class ObjectSerializerDisk(ObjectSerializerBase[T]):
self._ephemeral = ephemeral
self._base_output_dir = output_dir
self._base_output_dir.mkdir(parents=True, exist_ok=True)
if self._ephemeral:
# Remove dangling tempdirs that might have been left over from an earlier unplanned shutdown.
for temp_dir in filter(Path.is_dir, self._base_output_dir.glob("tmp*")):
shutil.rmtree(temp_dir)
# Must specify `ignore_cleanup_errors` to avoid fatal errors during cleanup on Windows
self._tempdir = (
tempfile.TemporaryDirectory(dir=self._base_output_dir, ignore_cleanup_errors=True) if ephemeral else None

View File

@ -86,6 +86,12 @@ class DefaultSessionProcessor(SessionProcessorBase):
self._poll_now()
elif event_name == "batch_enqueued":
self._poll_now()
elif event_name == "queue_item_status_changed" and event[1]["data"]["queue_item"]["status"] in [
"completed",
"failed",
"canceled",
]:
self._poll_now()
def resume(self) -> SessionProcessorStatus:
if not self._resume_event.is_set():

View File

@ -245,6 +245,18 @@ class ImagesInterface(InvocationContextInterface):
"""
return self._services.images.get_dto(image_name)
def get_path(self, image_name: str, thumbnail: bool = False) -> Path:
"""Gets the internal path to an image or thumbnail.
Args:
image_name: The name of the image to get the path of.
thumbnail: Get the path of the thumbnail instead of the full image
Returns:
The local path of the image or thumbnail.
"""
return self._services.images.get_path(image_name, thumbnail)
class TensorsInterface(InvocationContextInterface):
def save(self, tensor: Tensor) -> str:

View File

@ -1,13 +1,21 @@
from typing import Union
from typing import Any, Literal, Union
import cv2
import numpy as np
import torch
from controlnet_aux.util import HWC3
from diffusers.utils import PIL_INTERPOLATION
from einops import rearrange
from PIL import Image
from invokeai.backend.image_util.util import nms, normalize_image_channel_count
CONTROLNET_RESIZE_VALUES = Literal[
"just_resize",
"crop_resize",
"fill_resize",
"just_resize_simple",
]
CONTROLNET_MODE_VALUES = Literal["balanced", "more_prompt", "more_control", "unbalanced"]
###################################################################
# Copy of scripts/lvminthin.py from Mikubill/sd-webui-controlnet
###################################################################
@ -68,17 +76,6 @@ def lvmin_thin(x, prunings=True):
return y
def nake_nms(x):
f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
y = np.zeros_like(x)
for f in [f1, f2, f3, f4]:
np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
return y
################################################################################
# copied from Mikubill/sd-webui-controlnet external_code.py and modified for InvokeAI
################################################################################
@ -134,98 +131,122 @@ def pixel_perfect_resolution(
return int(np.round(estimation))
def clone_contiguous(x: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]:
"""Get a memory-contiguous clone of the given numpy array, as a safety measure and to improve computation efficiency."""
return np.ascontiguousarray(x).copy()
def np_img_to_torch(np_img: np.ndarray[Any, Any], device: torch.device) -> torch.Tensor:
"""Convert a numpy image to a PyTorch tensor. The image is normalized to 0-1, rearranged to BCHW format and sent to
the specified device."""
torch_img = torch.from_numpy(np_img)
normalized = torch_img.float() / 255.0
bchw = rearrange(normalized, "h w c -> 1 c h w")
on_device = bchw.to(device)
return on_device.clone()
def heuristic_resize(np_img: np.ndarray[Any, Any], size: tuple[int, int]) -> np.ndarray[Any, Any]:
"""Resizes an image using a heuristic to choose the best resizing strategy.
- If the image appears to be an edge map, special handling will be applied to ensure the edges are not distorted.
- Single-pixel edge maps use NMS and thinning to keep the edges as single-pixel lines.
- Low-color-count images are resized with nearest-neighbor to preserve color information (for e.g. segmentation maps).
- The alpha channel is handled separately to ensure it is resized correctly.
Args:
np_img (np.ndarray): The input image.
size (tuple[int, int]): The target size for the image.
Returns:
np.ndarray: The resized image.
Adapted from https://github.com/Mikubill/sd-webui-controlnet.
"""
# Return early if the image is already at the requested size
if np_img.shape[0] == size[1] and np_img.shape[1] == size[0]:
return np_img
# If the image has an alpha channel, separate it for special handling later.
inpaint_mask = None
if np_img.ndim == 3 and np_img.shape[2] == 4:
inpaint_mask = np_img[:, :, 3]
np_img = np_img[:, :, 0:3]
new_size_is_smaller = (size[0] * size[1]) < (np_img.shape[0] * np_img.shape[1])
new_size_is_bigger = (size[0] * size[1]) > (np_img.shape[0] * np_img.shape[1])
unique_color_count = np.unique(np_img.reshape(-1, np_img.shape[2]), axis=0).shape[0]
is_one_pixel_edge = False
is_binary = False
if unique_color_count == 2:
# If the image has only two colors, it is likely binary. Check if the image has one-pixel edges.
is_binary = np.min(np_img) < 16 and np.max(np_img) > 240
if is_binary:
eroded = cv2.erode(np_img, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
dilated = cv2.dilate(eroded, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
one_pixel_edge_count = np.where(dilated < np_img)[0].shape[0]
all_edge_count = np.where(np_img > 127)[0].shape[0]
is_one_pixel_edge = one_pixel_edge_count * 2 > all_edge_count
if 2 < unique_color_count < 200:
# With a low color count, we assume this is a map where exact colors are important. Near-neighbor preserves
# the colors as needed.
interpolation = cv2.INTER_NEAREST
elif new_size_is_smaller:
# This works best for downscaling
interpolation = cv2.INTER_AREA
else:
# Fall back for other cases
interpolation = cv2.INTER_CUBIC # Must be CUBIC because we now use nms. NEVER CHANGE THIS
# This may be further transformed depending on the binary nature of the image.
resized = cv2.resize(np_img, size, interpolation=interpolation)
if inpaint_mask is not None:
# Resize the inpaint mask to match the resized image using the same interpolation method.
inpaint_mask = cv2.resize(inpaint_mask, size, interpolation=interpolation)
# If the image is binary, we will perform some additional processing to ensure the edges are preserved.
if is_binary:
resized = np.mean(resized.astype(np.float32), axis=2).clip(0, 255).astype(np.uint8)
if is_one_pixel_edge:
# Use NMS and thinning to keep the edges as single-pixel lines.
resized = nms(resized)
_, resized = cv2.threshold(resized, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
resized = lvmin_thin(resized, prunings=new_size_is_bigger)
else:
_, resized = cv2.threshold(resized, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
resized = np.stack([resized] * 3, axis=2)
# Restore the alpha channel if it was present.
if inpaint_mask is not None:
inpaint_mask = (inpaint_mask > 127).astype(np.float32) * 255.0
inpaint_mask = inpaint_mask[:, :, None].clip(0, 255).astype(np.uint8)
resized = np.concatenate([resized, inpaint_mask], axis=2)
return resized
###########################################################################
# Copied from detectmap_proc method in scripts/detectmap_proc.py in Mikubill/sd-webui-controlnet
# modified for InvokeAI
###########################################################################
# def detectmap_proc(detected_map, module, resize_mode, h, w):
def np_img_resize(np_img: np.ndarray, resize_mode: str, h: int, w: int, device: torch.device = torch.device("cpu")):
# if 'inpaint' in module:
# np_img = np_img.astype(np.float32)
# else:
# np_img = HWC3(np_img)
np_img = HWC3(np_img)
def np_img_resize(
np_img: np.ndarray,
resize_mode: CONTROLNET_RESIZE_VALUES,
h: int,
w: int,
device: torch.device = torch.device("cpu"),
) -> tuple[torch.Tensor, np.ndarray[Any, Any]]:
np_img = normalize_image_channel_count(np_img)
def safe_numpy(x):
# A very safe method to make sure that Apple/Mac works
y = x
# below is very boring but do not change these. If you change these Apple or Mac may fail.
y = y.copy()
y = np.ascontiguousarray(y)
y = y.copy()
return y
def get_pytorch_control(x):
# A very safe method to make sure that Apple/Mac works
y = x
# below is very boring but do not change these. If you change these Apple or Mac may fail.
y = torch.from_numpy(y)
y = y.float() / 255.0
y = rearrange(y, "h w c -> 1 c h w")
y = y.clone()
# y = y.to(devices.get_device_for("controlnet"))
y = y.to(device)
y = y.clone()
return y
def high_quality_resize(x: np.ndarray, size):
# Written by lvmin
# Super high-quality control map up-scaling, considering binary, seg, and one-pixel edges
inpaint_mask = None
if x.ndim == 3 and x.shape[2] == 4:
inpaint_mask = x[:, :, 3]
x = x[:, :, 0:3]
new_size_is_smaller = (size[0] * size[1]) < (x.shape[0] * x.shape[1])
new_size_is_bigger = (size[0] * size[1]) > (x.shape[0] * x.shape[1])
unique_color_count = np.unique(x.reshape(-1, x.shape[2]), axis=0).shape[0]
is_one_pixel_edge = False
is_binary = False
if unique_color_count == 2:
is_binary = np.min(x) < 16 and np.max(x) > 240
if is_binary:
xc = x
xc = cv2.erode(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
xc = cv2.dilate(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
one_pixel_edge_count = np.where(xc < x)[0].shape[0]
all_edge_count = np.where(x > 127)[0].shape[0]
is_one_pixel_edge = one_pixel_edge_count * 2 > all_edge_count
if 2 < unique_color_count < 200:
interpolation = cv2.INTER_NEAREST
elif new_size_is_smaller:
interpolation = cv2.INTER_AREA
else:
interpolation = cv2.INTER_CUBIC # Must be CUBIC because we now use nms. NEVER CHANGE THIS
y = cv2.resize(x, size, interpolation=interpolation)
if inpaint_mask is not None:
inpaint_mask = cv2.resize(inpaint_mask, size, interpolation=interpolation)
if is_binary:
y = np.mean(y.astype(np.float32), axis=2).clip(0, 255).astype(np.uint8)
if is_one_pixel_edge:
y = nake_nms(y)
_, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
y = lvmin_thin(y, prunings=new_size_is_bigger)
else:
_, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
y = np.stack([y] * 3, axis=2)
if inpaint_mask is not None:
inpaint_mask = (inpaint_mask > 127).astype(np.float32) * 255.0
inpaint_mask = inpaint_mask[:, :, None].clip(0, 255).astype(np.uint8)
y = np.concatenate([y, inpaint_mask], axis=2)
return y
# if resize_mode == external_code.ResizeMode.RESIZE:
if resize_mode == "just_resize": # RESIZE
np_img = high_quality_resize(np_img, (w, h))
np_img = safe_numpy(np_img)
return get_pytorch_control(np_img), np_img
np_img = heuristic_resize(np_img, (w, h))
np_img = clone_contiguous(np_img)
return np_img_to_torch(np_img, device), np_img
old_h, old_w, _ = np_img.shape
old_w = float(old_w)
@ -236,7 +257,6 @@ def np_img_resize(np_img: np.ndarray, resize_mode: str, h: int, w: int, device:
def safeint(x: Union[int, float]) -> int:
return int(np.round(x))
# if resize_mode == external_code.ResizeMode.OUTER_FIT:
if resize_mode == "fill_resize": # OUTER_FIT
k = min(k0, k1)
borders = np.concatenate([np_img[0, :, :], np_img[-1, :, :], np_img[:, 0, :], np_img[:, -1, :]], axis=0)
@ -245,23 +265,23 @@ def np_img_resize(np_img: np.ndarray, resize_mode: str, h: int, w: int, device:
# Inpaint hijack
high_quality_border_color[3] = 255
high_quality_background = np.tile(high_quality_border_color[None, None], [h, w, 1])
np_img = high_quality_resize(np_img, (safeint(old_w * k), safeint(old_h * k)))
np_img = heuristic_resize(np_img, (safeint(old_w * k), safeint(old_h * k)))
new_h, new_w, _ = np_img.shape
pad_h = max(0, (h - new_h) // 2)
pad_w = max(0, (w - new_w) // 2)
high_quality_background[pad_h : pad_h + new_h, pad_w : pad_w + new_w] = np_img
np_img = high_quality_background
np_img = safe_numpy(np_img)
return get_pytorch_control(np_img), np_img
np_img = clone_contiguous(np_img)
return np_img_to_torch(np_img, device), np_img
else: # resize_mode == "crop_resize" (INNER_FIT)
k = max(k0, k1)
np_img = high_quality_resize(np_img, (safeint(old_w * k), safeint(old_h * k)))
np_img = heuristic_resize(np_img, (safeint(old_w * k), safeint(old_h * k)))
new_h, new_w, _ = np_img.shape
pad_h = max(0, (new_h - h) // 2)
pad_w = max(0, (new_w - w) // 2)
np_img = np_img[pad_h : pad_h + h, pad_w : pad_w + w]
np_img = safe_numpy(np_img)
return get_pytorch_control(np_img), np_img
np_img = clone_contiguous(np_img)
return np_img_to_torch(np_img, device), np_img
def prepare_control_image(
@ -269,12 +289,12 @@ def prepare_control_image(
width: int,
height: int,
num_channels: int = 3,
device="cuda",
dtype=torch.float16,
do_classifier_free_guidance=True,
control_mode="balanced",
resize_mode="just_resize_simple",
):
device: str = "cuda",
dtype: torch.dtype = torch.float16,
control_mode: CONTROLNET_MODE_VALUES = "balanced",
resize_mode: CONTROLNET_RESIZE_VALUES = "just_resize_simple",
do_classifier_free_guidance: bool = True,
) -> torch.Tensor:
"""Pre-process images for ControlNets or T2I-Adapters.
Args:
@ -292,26 +312,15 @@ def prepare_control_image(
resize_mode (str, optional): Defaults to "just_resize_simple".
Raises:
NotImplementedError: If resize_mode == "crop_resize_simple".
NotImplementedError: If resize_mode == "fill_resize_simple".
ValueError: If `resize_mode` is not recognized.
ValueError: If `num_channels` is out of range.
Returns:
torch.Tensor: The pre-processed input tensor.
"""
if (
resize_mode == "just_resize_simple"
or resize_mode == "crop_resize_simple"
or resize_mode == "fill_resize_simple"
):
if resize_mode == "just_resize_simple":
image = image.convert("RGB")
if resize_mode == "just_resize_simple":
image = image.resize((width, height), resample=PIL_INTERPOLATION["lanczos"])
elif resize_mode == "crop_resize_simple":
raise NotImplementedError(f"prepare_control_image is not implemented for resize_mode='{resize_mode}'.")
elif resize_mode == "fill_resize_simple":
raise NotImplementedError(f"prepare_control_image is not implemented for resize_mode='{resize_mode}'.")
image = image.resize((width, height), resample=Image.LANCZOS)
nimage = np.array(image)
nimage = nimage[None, :]
nimage = np.concatenate([nimage], axis=0)
@ -328,8 +337,7 @@ def prepare_control_image(
resize_mode=resize_mode,
h=height,
w=width,
# device=torch.device('cpu')
device=device,
device=torch.device(device),
)
else:
raise ValueError(f"Unsupported resize_mode: '{resize_mode}'.")

View File

@ -4,5 +4,4 @@ Initialization file for invokeai.backend.image_util methods.
from .infill_methods.patchmatch import PatchMatch # noqa: F401
from .pngwriter import PngWriter, PromptFormatter, retrieve_metadata, write_metadata # noqa: F401
from .seamless import configure_model_padding # noqa: F401
from .util import InitImageResizer, make_grid # noqa: F401

View File

@ -13,7 +13,7 @@ from invokeai.app.services.config.config_default import get_config
from invokeai.app.util.download_with_progress import download_with_progress_bar
from invokeai.backend.image_util.depth_anything.model.dpt import DPT_DINOv2
from invokeai.backend.image_util.depth_anything.utilities.util import NormalizeImage, PrepareForNet, Resize
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.logging import InvokeAILogger
config = get_config()
@ -56,7 +56,7 @@ class DepthAnythingDetector:
def __init__(self) -> None:
self.model = None
self.model_size: Union[Literal["large", "base", "small"], None] = None
self.device = choose_torch_device()
self.device = TorchDevice.choose_torch_device()
def load_model(self, model_size: Literal["large", "base", "small"] = "small"):
DEPTH_ANYTHING_MODEL_PATH = config.models_path / DEPTH_ANYTHING_MODELS[model_size]["local"]
@ -81,7 +81,7 @@ class DepthAnythingDetector:
self.model.load_state_dict(torch.load(DEPTH_ANYTHING_MODEL_PATH.as_posix(), map_location="cpu"))
self.model.eval()
self.model.to(choose_torch_device())
self.model.to(self.device)
return self.model
def __call__(self, image: Image.Image, resolution: int = 512) -> Image.Image:
@ -94,7 +94,7 @@ class DepthAnythingDetector:
image_height, image_width = np_image.shape[:2]
np_image = transform({"image": np_image})["image"]
tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(choose_torch_device())
tensor_image = torch.from_numpy(np_image).unsqueeze(0).to(self.device)
with torch.no_grad():
depth = self.model(tensor_image)

View File

@ -7,7 +7,7 @@ import onnxruntime as ort
from invokeai.app.services.config.config_default import get_config
from invokeai.app.util.download_with_progress import download_with_progress_bar
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
from .onnxdet import inference_detector
from .onnxpose import inference_pose
@ -28,9 +28,9 @@ config = get_config()
class Wholebody:
def __init__(self):
device = choose_torch_device()
device = TorchDevice.choose_torch_device()
providers = ["CUDAExecutionProvider"] if device == "cuda" else ["CPUExecutionProvider"]
providers = ["CUDAExecutionProvider"] if device.type == "cuda" else ["CPUExecutionProvider"]
DET_MODEL_PATH = config.models_path / DWPOSE_MODELS["yolox_l.onnx"]["local"]
download_with_progress_bar("yolox_l.onnx", DWPOSE_MODELS["yolox_l.onnx"]["url"], DET_MODEL_PATH)

View File

@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download
from PIL import Image
from invokeai.backend.image_util.util import (
non_maximum_suppression,
nms,
normalize_image_channel_count,
np_to_pil,
pil_to_np,
@ -134,7 +134,7 @@ class HEDProcessor:
detected_map = cv2.resize(detected_map, (width, height), interpolation=cv2.INTER_LINEAR)
if scribble:
detected_map = non_maximum_suppression(detected_map, 127, 3.0)
detected_map = nms(detected_map, 127, 3.0)
detected_map = cv2.GaussianBlur(detected_map, (0, 0), 3.0)
detected_map[detected_map > 4] = 255
detected_map[detected_map < 255] = 0

View File

@ -8,7 +8,7 @@ from PIL import Image
import invokeai.backend.util.logging as logger
from invokeai.app.services.config.config_default import get_config
from invokeai.app.util.download_with_progress import download_with_progress_bar
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
def norm_img(np_img):
@ -29,7 +29,7 @@ def load_jit_model(url_or_path, device):
class LaMA:
def __call__(self, input_image: Image.Image, *args: Any, **kwds: Any) -> Any:
device = choose_torch_device()
device = TorchDevice.choose_torch_device()
model_location = get_config().models_path / "core/misc/lama/lama.pt"
if not model_location.exists():

View File

@ -11,7 +11,7 @@ from cv2.typing import MatLike
from tqdm import tqdm
from invokeai.backend.image_util.basicsr.rrdbnet_arch import RRDBNet
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
"""
Adapted from https://github.com/xinntao/Real-ESRGAN/blob/master/realesrgan/utils.py
@ -65,7 +65,7 @@ class RealESRGAN:
self.pre_pad = pre_pad
self.mod_scale: Optional[int] = None
self.half = half
self.device = choose_torch_device()
self.device = TorchDevice.choose_torch_device()
loadnet = torch.load(model_path, map_location=torch.device("cpu"))

View File

@ -8,14 +8,15 @@ from pathlib import Path
import numpy as np
from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
from PIL import Image
from PIL import Image, ImageFilter
from transformers import AutoFeatureExtractor
import invokeai.backend.util.logging as logger
from invokeai.app.services.config.config_default import get_config
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.silence_warnings import SilenceWarnings
repo_id = "CompVis/stable-diffusion-safety-checker"
CHECKER_PATH = "core/convert/stable-diffusion-safety-checker"
@ -24,34 +25,34 @@ class SafetyChecker:
Wrapper around SafetyChecker model.
"""
safety_checker = None
feature_extractor = None
tried_load: bool = False
safety_checker = None
@classmethod
def _load_safety_checker(cls):
if cls.tried_load:
if cls.safety_checker is not None and cls.feature_extractor is not None:
return
try:
cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(get_config().models_path / CHECKER_PATH)
cls.feature_extractor = AutoFeatureExtractor.from_pretrained(get_config().models_path / CHECKER_PATH)
model_path = get_config().models_path / CHECKER_PATH
if model_path.exists():
cls.feature_extractor = AutoFeatureExtractor.from_pretrained(model_path)
cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(model_path)
else:
model_path.mkdir(parents=True, exist_ok=True)
cls.feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
cls.feature_extractor.save_pretrained(model_path, safe_serialization=True)
cls.safety_checker = StableDiffusionSafetyChecker.from_pretrained(repo_id)
cls.safety_checker.save_pretrained(model_path, safe_serialization=True)
except Exception as e:
logger.warning(f"Could not load NSFW checker: {str(e)}")
cls.tried_load = True
@classmethod
def safety_checker_available(cls) -> bool:
return Path(get_config().models_path, CHECKER_PATH).exists()
@classmethod
def has_nsfw_concept(cls, image: Image.Image) -> bool:
if not cls.safety_checker_available() and cls.tried_load:
return False
cls._load_safety_checker()
if cls.safety_checker is None or cls.feature_extractor is None:
return False
device = choose_torch_device()
device = TorchDevice.choose_torch_device()
features = cls.feature_extractor([image], return_tensors="pt")
features.to(device)
cls.safety_checker.to(device)
@ -60,3 +61,24 @@ class SafetyChecker:
with SilenceWarnings():
checked_image, has_nsfw_concept = cls.safety_checker(images=x_image, clip_input=features.pixel_values)
return has_nsfw_concept[0]
@classmethod
def blur_if_nsfw(cls, image: Image.Image) -> Image.Image:
if cls.has_nsfw_concept(image):
logger.warning("A potentially NSFW image has been detected. Image will be blurred.")
blurry_image = image.filter(filter=ImageFilter.GaussianBlur(radius=32))
caution = cls._get_caution_img()
# Center the caution image on the blurred image
x = (blurry_image.width - caution.width) // 2
y = (blurry_image.height - caution.height) // 2
blurry_image.paste(caution, (x, y), caution)
image = blurry_image
return image
@classmethod
def _get_caution_img(cls) -> Image.Image:
import invokeai.app.assets.images as image_assets
caution = Image.open(Path(image_assets.__path__[0]) / "caution.png")
return caution.resize((caution.width // 2, caution.height // 2))

View File

@ -1,52 +0,0 @@
import torch.nn as nn
def _conv_forward_asymmetric(self, input, weight, bias):
"""
Patch for Conv2d._conv_forward that supports asymmetric padding
"""
working = nn.functional.pad(input, self.asymmetric_padding["x"], mode=self.asymmetric_padding_mode["x"])
working = nn.functional.pad(working, self.asymmetric_padding["y"], mode=self.asymmetric_padding_mode["y"])
return nn.functional.conv2d(
working,
weight,
bias,
self.stride,
nn.modules.utils._pair(0),
self.dilation,
self.groups,
)
def configure_model_padding(model, seamless, seamless_axes):
"""
Modifies the 2D convolution layers to use a circular padding mode based on
the `seamless` and `seamless_axes` options.
"""
# TODO: get an explicit interface for this in diffusers: https://github.com/huggingface/diffusers/issues/556
for m in model.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
if seamless:
m.asymmetric_padding_mode = {}
m.asymmetric_padding = {}
m.asymmetric_padding_mode["x"] = "circular" if ("x" in seamless_axes) else "constant"
m.asymmetric_padding["x"] = (
m._reversed_padding_repeated_twice[0],
m._reversed_padding_repeated_twice[1],
0,
0,
)
m.asymmetric_padding_mode["y"] = "circular" if ("y" in seamless_axes) else "constant"
m.asymmetric_padding["y"] = (
0,
0,
m._reversed_padding_repeated_twice[2],
m._reversed_padding_repeated_twice[3],
)
m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
else:
m._conv_forward = nn.Conv2d._conv_forward.__get__(m, nn.Conv2d)
if hasattr(m, "asymmetric_padding_mode"):
del m.asymmetric_padding_mode
if hasattr(m, "asymmetric_padding"):
del m.asymmetric_padding

View File

@ -1,4 +1,5 @@
from math import ceil, floor, sqrt
from typing import Optional
import cv2
import numpy as np
@ -143,20 +144,21 @@ def resize_image_to_resolution(input_image: np.ndarray, resolution: int) -> np.n
h = float(input_image.shape[0])
w = float(input_image.shape[1])
scaling_factor = float(resolution) / min(h, w)
h *= scaling_factor
w *= scaling_factor
h = int(np.round(h / 64.0)) * 64
w = int(np.round(w / 64.0)) * 64
h = int(h * scaling_factor)
w = int(w * scaling_factor)
if scaling_factor > 1:
return cv2.resize(input_image, (w, h), interpolation=cv2.INTER_LANCZOS4)
else:
return cv2.resize(input_image, (w, h), interpolation=cv2.INTER_AREA)
def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float):
def nms(np_img: np.ndarray, threshold: Optional[int] = None, sigma: Optional[float] = None) -> np.ndarray:
"""
Apply non-maximum suppression to an image.
If both threshold and sigma are provided, the image will blurred before the suppression and thresholded afterwards,
resulting in a binary output image.
This function is adapted from https://github.com/lllyasviel/ControlNet.
Args:
@ -166,23 +168,36 @@ def non_maximum_suppression(image: np.ndarray, threshold: int, sigma: float):
Returns:
The image after non-maximum suppression.
Raises:
ValueError: If only one of threshold and sigma provided.
"""
image = cv2.GaussianBlur(image.astype(np.float32), (0, 0), sigma)
# Raise a value error if only one of threshold and sigma is provided
if (threshold is None) != (sigma is None):
raise ValueError("Both threshold and sigma must be provided if one is provided.")
if sigma is not None and threshold is not None:
# Blurring the image can help to thin out features
np_img = cv2.GaussianBlur(np_img.astype(np.float32), (0, 0), sigma)
filter_1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
filter_2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
filter_3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
filter_4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
y = np.zeros_like(image)
nms_img = np.zeros_like(np_img)
for f in [filter_1, filter_2, filter_3, filter_4]:
np.putmask(y, cv2.dilate(image, kernel=f) == image, image)
np.putmask(nms_img, cv2.dilate(np_img, kernel=f) == np_img, np_img)
z = np.zeros_like(y, dtype=np.uint8)
z[y > threshold] = 255
return z
if sigma is not None and threshold is not None:
# We blurred - now threshold to get a binary image
thresholded = np.zeros_like(nms_img, dtype=np.uint8)
thresholded[nms_img > threshold] = 255
return thresholded
return nms_img
def safe_step(x: np.ndarray, step: int = 2) -> np.ndarray:

View File

@ -1,182 +0,0 @@
# copied from https://github.com/tencent-ailab/IP-Adapter (Apache License 2.0)
# and modified as needed
# tencent-ailab comment:
# modified from https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from diffusers.models.attention_processor import AttnProcessor2_0 as DiffusersAttnProcessor2_0
from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionProcessorWeights
# Create a version of AttnProcessor2_0 that is a sub-class of nn.Module. This is required for IP-Adapter state_dict
# loading.
class AttnProcessor2_0(DiffusersAttnProcessor2_0, nn.Module):
def __init__(self):
DiffusersAttnProcessor2_0.__init__(self)
nn.Module.__init__(self)
def __call__(
self,
attn,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
temb=None,
ip_adapter_image_prompt_embeds=None,
):
"""Re-definition of DiffusersAttnProcessor2_0.__call__(...) that accepts and ignores the
ip_adapter_image_prompt_embeds parameter.
"""
return DiffusersAttnProcessor2_0.__call__(
self, attn, hidden_states, encoder_hidden_states, attention_mask, temb
)
class IPAttnProcessor2_0(torch.nn.Module):
r"""
Attention processor for IP-Adapater for PyTorch 2.0.
Args:
hidden_size (`int`):
The hidden size of the attention layer.
cross_attention_dim (`int`):
The number of channels in the `encoder_hidden_states`.
scale (`float`, defaults to 1.0):
the weight scale of image prompt.
"""
def __init__(self, weights: list[IPAttentionProcessorWeights], scales: list[float]):
super().__init__()
if not hasattr(F, "scaled_dot_product_attention"):
raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
assert len(weights) == len(scales)
self._weights = weights
self._scales = scales
def __call__(
self,
attn,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
temb=None,
ip_adapter_image_prompt_embeds=None,
):
"""Apply IP-Adapter attention.
Args:
ip_adapter_image_prompt_embeds (torch.Tensor): The image prompt embeddings.
Shape: (batch_size, num_ip_images, seq_len, ip_embedding_len).
"""
residual = hidden_states
if attn.spatial_norm is not None:
hidden_states = attn.spatial_norm(hidden_states, temb)
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
if attention_mask is not None:
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
# scaled_dot_product_attention expects attention_mask shape to be
# (batch, heads, source_length, target_length)
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
query = attn.to_q(hidden_states)
if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
key = attn.to_k(encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)
inner_dim = key.shape[-1]
head_dim = inner_dim // attn.heads
query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
# the output of sdp = (batch, num_heads, seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
hidden_states = F.scaled_dot_product_attention(
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
)
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
hidden_states = hidden_states.to(query.dtype)
if encoder_hidden_states is not None:
# If encoder_hidden_states is not None, then we are doing cross-attention, not self-attention. In this case,
# we will apply IP-Adapter conditioning. We validate the inputs for IP-Adapter conditioning here.
assert ip_adapter_image_prompt_embeds is not None
assert len(ip_adapter_image_prompt_embeds) == len(self._weights)
for ipa_embed, ipa_weights, scale in zip(
ip_adapter_image_prompt_embeds, self._weights, self._scales, strict=True
):
# The batch dimensions should match.
assert ipa_embed.shape[0] == encoder_hidden_states.shape[0]
# The token_len dimensions should match.
assert ipa_embed.shape[-1] == encoder_hidden_states.shape[-1]
ip_hidden_states = ipa_embed
# Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding)
ip_key = ipa_weights.to_k_ip(ip_hidden_states)
ip_value = ipa_weights.to_v_ip(ip_hidden_states)
# Expected ip_key and ip_value shape: (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads)
ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
# Expected ip_key and ip_value shape: (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
ip_hidden_states = F.scaled_dot_product_attention(
query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
)
# Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim)
ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
ip_hidden_states = ip_hidden_states.to(query.dtype)
# Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim)
hidden_states = hidden_states + scale * ip_hidden_states
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
if input_ndim == 4:
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if attn.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / attn.rescale_output_factor
return hidden_states

View File

@ -1,53 +0,0 @@
from contextlib import contextmanager
from diffusers.models import UNet2DConditionModel
from invokeai.backend.ip_adapter.attention_processor import AttnProcessor2_0, IPAttnProcessor2_0
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
class UNetPatcher:
"""A class that contains multiple IP-Adapters and can apply them to a UNet."""
def __init__(self, ip_adapters: list[IPAdapter]):
self._ip_adapters = ip_adapters
self._scales = [1.0] * len(self._ip_adapters)
def set_scale(self, idx: int, value: float):
self._scales[idx] = value
def _prepare_attention_processors(self, unet: UNet2DConditionModel):
"""Prepare a dict of attention processors that can be injected into a unet, and load the IP-Adapter attention
weights into them.
Note that the `unet` param is only used to determine attention block dimensions and naming.
"""
# Construct a dict of attention processors based on the UNet's architecture.
attn_procs = {}
for idx, name in enumerate(unet.attn_processors.keys()):
if name.endswith("attn1.processor"):
attn_procs[name] = AttnProcessor2_0()
else:
# Collect the weights from each IP Adapter for the idx'th attention processor.
attn_procs[name] = IPAttnProcessor2_0(
[ip_adapter.attn_weights.get_attention_processor_weights(idx) for ip_adapter in self._ip_adapters],
self._scales,
)
return attn_procs
@contextmanager
def apply_ip_adapter_attention(self, unet: UNet2DConditionModel):
"""A context manager that patches `unet` with IP-Adapter attention processors."""
attn_procs = self._prepare_attention_processors(unet)
orig_attn_processors = unet.attn_processors
try:
# Note to future devs: set_attn_processor(...) does something slightly unexpected - it pops elements from the
# passed dict. So, if you wanted to keep the dict for future use, you'd have to make a moderately-shallow copy
# of it. E.g. `attn_procs_copy = {k: v for k, v in attn_procs.items()}`.
unet.set_attn_processor(attn_procs)
yield None
finally:
unet.set_attn_processor(orig_attn_processors)

View File

@ -301,12 +301,12 @@ class MainConfigBase(ModelConfigBase):
default_settings: Optional[MainModelDefaultSettings] = Field(
description="Default settings for this model", default=None
)
variant: ModelVariantType = ModelVariantType.Normal
class MainCheckpointConfig(CheckpointConfigBase, MainConfigBase):
"""Model config for main checkpoint models."""
variant: ModelVariantType = ModelVariantType.Normal
prediction_type: SchedulerPredictionType = SchedulerPredictionType.Epsilon
upcast_attention: bool = False

View File

@ -18,7 +18,7 @@ from invokeai.backend.model_manager.load.load_base import LoadedModel, ModelLoad
from invokeai.backend.model_manager.load.model_cache.model_cache_base import ModelCacheBase, ModelLockerBase
from invokeai.backend.model_manager.load.model_util import calc_model_size_by_data, calc_model_size_by_fs
from invokeai.backend.model_manager.load.optimizations import skip_torch_weight_init
from invokeai.backend.util.devices import choose_torch_device, torch_dtype
from invokeai.backend.util.devices import TorchDevice
# TO DO: The loader is not thread safe!
@ -37,7 +37,7 @@ class ModelLoader(ModelLoaderBase):
self._logger = logger
self._ram_cache = ram_cache
self._convert_cache = convert_cache
self._torch_dtype = torch_dtype(choose_torch_device(), app_config)
self._torch_dtype = TorchDevice.choose_torch_dtype()
def load_model(self, model_config: AnyModelConfig, submodel_type: Optional[SubModelType] = None) -> LoadedModel:
"""

View File

@ -30,15 +30,12 @@ import torch
from invokeai.backend.model_manager import AnyModel, SubModelType
from invokeai.backend.model_manager.load.memory_snapshot import MemorySnapshot, get_pretty_snapshot_diff
from invokeai.backend.util.devices import choose_torch_device
from invokeai.backend.util.devices import TorchDevice
from invokeai.backend.util.logging import InvokeAILogger
from .model_cache_base import CacheRecord, CacheStats, ModelCacheBase, ModelLockerBase
from .model_locker import ModelLocker
if choose_torch_device() == torch.device("mps"):
from torch import mps
# Maximum size of the cache, in gigs
# Default is roughly enough to hold three fp16 diffusers models in RAM simultaneously
DEFAULT_MAX_CACHE_SIZE = 6.0
@ -244,9 +241,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
f"Removing {cache_entry.key} from VRAM to free {(cache_entry.size/GIG):.2f}GB; vram free = {(torch.cuda.memory_allocated()/GIG):.2f}GB"
)
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
"""Move model into the indicated device.
@ -271,7 +266,12 @@ class ModelCache(ModelCacheBase[AnyModel]):
start_model_to_time = time.time()
snapshot_before = self._capture_memory_snapshot()
cache_entry.model.to(target_device)
try:
cache_entry.model.to(target_device)
except Exception as e: # blow away cache entry
self._delete_cache_entry(cache_entry)
raise e
snapshot_after = self._capture_memory_snapshot()
end_model_to_time = time.time()
self.logger.debug(
@ -389,8 +389,7 @@ class ModelCache(ModelCacheBase[AnyModel]):
)
current_size -= cache_entry.size
models_cleared += 1
del self._cache_stack[pos]
del self._cached_models[model_key]
self._delete_cache_entry(cache_entry)
del cache_entry
else:
@ -412,8 +411,9 @@ class ModelCache(ModelCacheBase[AnyModel]):
self.stats.cleared = models_cleared
gc.collect()
torch.cuda.empty_cache()
if choose_torch_device() == torch.device("mps"):
mps.empty_cache()
TorchDevice.empty_cache()
self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
def _delete_cache_entry(self, cache_entry: CacheRecord[AnyModel]) -> None:
self._cache_stack.remove(cache_entry.key)
del self._cached_models[cache_entry.key]

View File

@ -17,7 +17,7 @@ from diffusers.utils import logging as dlogging
from invokeai.app.services.model_install import ModelInstallServiceBase
from invokeai.app.services.model_records.model_records_base import ModelRecordChanges
from invokeai.backend.util.devices import choose_torch_device, torch_dtype
from invokeai.backend.util.devices import TorchDevice
from . import (
AnyModelConfig,
@ -43,6 +43,7 @@ class ModelMerger(object):
Initialize a ModelMerger object with the model installer.
"""
self._installer = installer
self._dtype = TorchDevice.choose_torch_dtype()
def merge_diffusion_models(
self,
@ -68,7 +69,7 @@ class ModelMerger(object):
warnings.simplefilter("ignore")
verbosity = dlogging.get_verbosity()
dlogging.set_verbosity_error()
dtype = torch.float16 if variant == "fp16" else torch_dtype(choose_torch_device())
dtype = torch.float16 if variant == "fp16" else self._dtype
# Note that checkpoint_merger will not work with downloaded HuggingFace fp16 models
# until upstream https://github.com/huggingface/diffusers/pull/6670 is merged and released.
@ -151,7 +152,7 @@ class ModelMerger(object):
dump_path.mkdir(parents=True, exist_ok=True)
dump_path = dump_path / merged_model_name
dtype = torch.float16 if variant == "fp16" else torch_dtype(choose_torch_device())
dtype = torch.float16 if variant == "fp16" else self._dtype
merged_pipe.save_pretrained(dump_path.as_posix(), safe_serialization=True, torch_dtype=dtype, variant=variant)
# register model and get its unique key

View File

@ -51,6 +51,7 @@ LEGACY_CONFIGS: Dict[BaseModelType, Dict[ModelVariantType, Union[str, Dict[Sched
},
BaseModelType.StableDiffusionXL: {
ModelVariantType.Normal: "sd_xl_base.yaml",
ModelVariantType.Inpaint: "sd_xl_inpaint.yaml",
},
BaseModelType.StableDiffusionXLRefiner: {
ModelVariantType.Normal: "sd_xl_refiner.yaml",

View File

@ -155,7 +155,7 @@ STARTER_MODELS: list[StarterModel] = [
StarterModel(
name="IP Adapter",
base=BaseModelType.StableDiffusion1,
source="InvokeAI/ip_adapter_sd15",
source="https://huggingface.co/InvokeAI/ip_adapter_sd15/resolve/main/ip-adapter_sd15.safetensors",
description="IP-Adapter for SD 1.5 models",
type=ModelType.IPAdapter,
dependencies=[ip_adapter_sd_image_encoder],
@ -163,7 +163,7 @@ STARTER_MODELS: list[StarterModel] = [
StarterModel(
name="IP Adapter Plus",
base=BaseModelType.StableDiffusion1,
source="InvokeAI/ip_adapter_plus_sd15",
source="https://huggingface.co/InvokeAI/ip_adapter_plus_sd15/resolve/main/ip-adapter-plus_sd15.safetensors",
description="Refined IP-Adapter for SD 1.5 models",
type=ModelType.IPAdapter,
dependencies=[ip_adapter_sd_image_encoder],
@ -171,7 +171,7 @@ STARTER_MODELS: list[StarterModel] = [
StarterModel(
name="IP Adapter Plus Face",
base=BaseModelType.StableDiffusion1,
source="InvokeAI/ip_adapter_plus_face_sd15",
source="https://huggingface.co/InvokeAI/ip_adapter_plus_face_sd15/resolve/main/ip-adapter-plus-face_sd15.safetensors",
description="Refined IP-Adapter for SD 1.5 models, adapted for faces",
type=ModelType.IPAdapter,
dependencies=[ip_adapter_sd_image_encoder],
@ -179,7 +179,7 @@ STARTER_MODELS: list[StarterModel] = [
StarterModel(
name="IP Adapter SDXL",
base=BaseModelType.StableDiffusionXL,
source="InvokeAI/ip_adapter_sdxl",
source="https://huggingface.co/InvokeAI/ip_adapter_sdxl_vit_h/resolve/main/ip-adapter_sdxl_vit-h.safetensors",
description="IP-Adapter for SDXL models",
type=ModelType.IPAdapter,
dependencies=[ip_adapter_sdxl_image_encoder],

View File

@ -21,12 +21,11 @@ from pydantic import Field
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from invokeai.app.services.config.config_default import get_config
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
from invokeai.backend.ip_adapter.unet_patcher import UNetPatcher
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ConditioningData
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import IPAdapterData, TextConditioningData
from invokeai.backend.stable_diffusion.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent
from invokeai.backend.stable_diffusion.diffusion.unet_attention_patcher import UNetAttentionPatcher, UNetIPAdapterData
from invokeai.backend.util.attention import auto_detect_slice_size
from invokeai.backend.util.devices import normalize_device
from invokeai.backend.util.devices import TorchDevice
@dataclass
@ -149,16 +148,6 @@ class ControlNetData:
resize_mode: str = Field(default="just_resize")
@dataclass
class IPAdapterData:
ip_adapter_model: IPAdapter = Field(default=None)
# TODO: change to polymorphic so can do different weights per step (once implemented...)
weight: Union[float, List[float]] = Field(default=1.0)
# weight: float = Field(default=1.0)
begin_step_percent: float = Field(default=0.0)
end_step_percent: float = Field(default=1.0)
@dataclass
class T2IAdapterData:
"""A structure containing the information required to apply conditioning from a single T2I-Adapter model."""
@ -266,7 +255,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
if self.unet.device.type == "cpu" or self.unet.device.type == "mps":
mem_free = psutil.virtual_memory().free
elif self.unet.device.type == "cuda":
mem_free, _ = torch.cuda.mem_get_info(normalize_device(self.unet.device))
mem_free, _ = torch.cuda.mem_get_info(TorchDevice.normalize(self.unet.device))
else:
raise ValueError(f"unrecognized device {self.unet.device}")
# input tensor of [1, 4, h/8, w/8]
@ -295,7 +284,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
self,
latents: torch.Tensor,
num_inference_steps: int,
conditioning_data: ConditioningData,
scheduler_step_kwargs: dict[str, Any],
conditioning_data: TextConditioningData,
*,
noise: Optional[torch.Tensor],
timesteps: torch.Tensor,
@ -308,7 +298,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
mask: Optional[torch.Tensor] = None,
masked_latents: Optional[torch.Tensor] = None,
gradient_mask: Optional[bool] = False,
seed: Optional[int] = None,
seed: int,
) -> torch.Tensor:
if init_timestep.shape[0] == 0:
return latents
@ -326,20 +316,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
latents = self.scheduler.add_noise(latents, noise, batched_t)
if mask is not None:
# if no noise provided, noisify unmasked area based on seed(or 0 as fallback)
if noise is None:
noise = torch.randn(
orig_latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(seed or 0),
).to(device=orig_latents.device, dtype=orig_latents.dtype)
latents = self.scheduler.add_noise(latents, noise, batched_t)
latents = torch.lerp(
orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
)
if is_inpainting_model(self.unet):
if masked_latents is None:
raise Exception("Source image required for inpaint mask when inpaint model used!")
@ -348,6 +324,15 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
self._unet_forward, mask, masked_latents
)
else:
# if no noise provided, noisify unmasked area based on seed
if noise is None:
noise = torch.randn(
orig_latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(seed),
).to(device=orig_latents.device, dtype=orig_latents.dtype)
additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise, gradient_mask))
try:
@ -355,6 +340,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
latents,
timesteps,
conditioning_data,
scheduler_step_kwargs=scheduler_step_kwargs,
additional_guidance=additional_guidance,
control_data=control_data,
ip_adapter_data=ip_adapter_data,
@ -380,7 +366,8 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
self,
latents: torch.Tensor,
timesteps,
conditioning_data: ConditioningData,
conditioning_data: TextConditioningData,
scheduler_step_kwargs: dict[str, Any],
*,
additional_guidance: List[Callable] = None,
control_data: List[ControlNetData] = None,
@ -397,22 +384,22 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
if timesteps.shape[0] == 0:
return latents
ip_adapter_unet_patcher = None
extra_conditioning_info = conditioning_data.text_embeddings.extra_conditioning
if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control:
attn_ctx = self.invokeai_diffuser.custom_attention_context(
self.invokeai_diffuser.model,
extra_conditioning_info=extra_conditioning_info,
use_ip_adapter = ip_adapter_data is not None
use_regional_prompting = (
conditioning_data.cond_regions is not None or conditioning_data.uncond_regions is not None
)
unet_attention_patcher = None
self.use_ip_adapter = use_ip_adapter
attn_ctx = nullcontext()
if use_ip_adapter or use_regional_prompting:
ip_adapters: Optional[List[UNetIPAdapterData]] = (
[{"ip_adapter": ipa.ip_adapter_model, "target_blocks": ipa.target_blocks} for ipa in ip_adapter_data]
if use_ip_adapter
else None
)
self.use_ip_adapter = False
elif ip_adapter_data is not None:
# TODO(ryand): Should we raise an exception if both custom attention and IP-Adapter attention are active?
# As it is now, the IP-Adapter will silently be skipped.
ip_adapter_unet_patcher = UNetPatcher([ipa.ip_adapter_model for ipa in ip_adapter_data])
attn_ctx = ip_adapter_unet_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model)
self.use_ip_adapter = True
else:
attn_ctx = nullcontext()
unet_attention_patcher = UNetAttentionPatcher(ip_adapters)
attn_ctx = unet_attention_patcher.apply_ip_adapter_attention(self.invokeai_diffuser.model)
with attn_ctx:
if callback is not None:
@ -435,11 +422,11 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
conditioning_data,
step_index=i,
total_step_count=len(timesteps),
scheduler_step_kwargs=scheduler_step_kwargs,
additional_guidance=additional_guidance,
control_data=control_data,
ip_adapter_data=ip_adapter_data,
t2i_adapter_data=t2i_adapter_data,
ip_adapter_unet_patcher=ip_adapter_unet_patcher,
)
latents = step_output.prev_sample
predicted_original = getattr(step_output, "pred_original_sample", None)
@ -463,14 +450,14 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
self,
t: torch.Tensor,
latents: torch.Tensor,
conditioning_data: ConditioningData,
conditioning_data: TextConditioningData,
step_index: int,
total_step_count: int,
scheduler_step_kwargs: dict[str, Any],
additional_guidance: List[Callable] = None,
control_data: List[ControlNetData] = None,
ip_adapter_data: Optional[list[IPAdapterData]] = None,
t2i_adapter_data: Optional[list[T2IAdapterData]] = None,
ip_adapter_unet_patcher: Optional[UNetPatcher] = None,
):
# invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value
timestep = t[0]
@ -485,23 +472,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# i.e. before or after passing it to InvokeAIDiffuserComponent
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
# handle IP-Adapter
if self.use_ip_adapter and ip_adapter_data is not None: # somewhat redundant but logic is clearer
for i, single_ip_adapter_data in enumerate(ip_adapter_data):
first_adapter_step = math.floor(single_ip_adapter_data.begin_step_percent * total_step_count)
last_adapter_step = math.ceil(single_ip_adapter_data.end_step_percent * total_step_count)
weight = (
single_ip_adapter_data.weight[step_index]
if isinstance(single_ip_adapter_data.weight, List)
else single_ip_adapter_data.weight
)
if step_index >= first_adapter_step and step_index <= last_adapter_step:
# Only apply this IP-Adapter if the current step is within the IP-Adapter's begin/end step range.
ip_adapter_unet_patcher.set_scale(i, weight)
else:
# Otherwise, set the IP-Adapter's scale to 0, so it has no effect.
ip_adapter_unet_patcher.set_scale(i, 0.0)
# Handle ControlNet(s)
down_block_additional_residuals = None
mid_block_additional_residual = None
@ -550,6 +520,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
step_index=step_index,
total_step_count=total_step_count,
conditioning_data=conditioning_data,
ip_adapter_data=ip_adapter_data,
down_block_additional_residuals=down_block_additional_residuals, # for ControlNet
mid_block_additional_residual=mid_block_additional_residual, # for ControlNet
down_intrablock_additional_residuals=down_intrablock_additional_residuals, # for T2I-Adapter
@ -569,7 +540,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
)
# compute the previous noisy sample x_t -> x_t-1
step_output = self.scheduler.step(noise_pred, timestep, latents, **conditioning_data.scheduler_args)
step_output = self.scheduler.step(noise_pred, timestep, latents, **scheduler_step_kwargs)
# TODO: discuss injection point options. For now this is a patch to get progress images working with inpainting again.
for guidance in additional_guidance:

View File

@ -1,27 +1,17 @@
import dataclasses
import inspect
from dataclasses import dataclass, field
from typing import Any, List, Optional, Union
import math
from dataclasses import dataclass
from typing import List, Optional, Union
import torch
from .cross_attention_control import Arguments
@dataclass
class ExtraConditioningInfo:
tokens_count_including_eos_bos: int
cross_attention_control_args: Optional[Arguments] = None
@property
def wants_cross_attention_control(self):
return self.cross_attention_control_args is not None
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
@dataclass
class BasicConditioningInfo:
"""SD 1/2 text conditioning information produced by Compel."""
embeds: torch.Tensor
extra_conditioning: Optional[ExtraConditioningInfo]
def to(self, device, dtype=None):
self.embeds = self.embeds.to(device=device, dtype=dtype)
@ -35,6 +25,8 @@ class ConditioningFieldData:
@dataclass
class SDXLConditioningInfo(BasicConditioningInfo):
"""SDXL text conditioning information produced by Compel."""
pooled_embeds: torch.Tensor
add_time_ids: torch.Tensor
@ -57,37 +49,75 @@ class IPAdapterConditioningInfo:
@dataclass
class ConditioningData:
unconditioned_embeddings: BasicConditioningInfo
text_embeddings: BasicConditioningInfo
"""
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
`guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf).
Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
"""
guidance_scale: Union[float, List[float]]
""" for models trained using zero-terminal SNR ("ztsnr"), it's suggested to use guidance_rescale_multiplier of 0.7 .
ref [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf)
"""
guidance_rescale_multiplier: float = 0
scheduler_args: dict[str, Any] = field(default_factory=dict)
class IPAdapterData:
ip_adapter_model: IPAdapter
ip_adapter_conditioning: IPAdapterConditioningInfo
mask: torch.Tensor
target_blocks: List[str]
ip_adapter_conditioning: Optional[list[IPAdapterConditioningInfo]] = None
# Either a single weight applied to all steps, or a list of weights for each step.
weight: Union[float, List[float]] = 1.0
begin_step_percent: float = 0.0
end_step_percent: float = 1.0
@property
def dtype(self):
return self.text_embeddings.dtype
def scale_for_step(self, step_index: int, total_steps: int) -> float:
first_adapter_step = math.floor(self.begin_step_percent * total_steps)
last_adapter_step = math.ceil(self.end_step_percent * total_steps)
weight = self.weight[step_index] if isinstance(self.weight, List) else self.weight
if step_index >= first_adapter_step and step_index <= last_adapter_step:
# Only apply this IP-Adapter if the current step is within the IP-Adapter's begin/end step range.
return weight
# Otherwise, set the IP-Adapter's scale to 0, so it has no effect.
return 0.0
def add_scheduler_args_if_applicable(self, scheduler, **kwargs):
scheduler_args = dict(self.scheduler_args)
step_method = inspect.signature(scheduler.step)
for name, value in kwargs.items():
try:
step_method.bind_partial(**{name: value})
except TypeError:
# FIXME: don't silently discard arguments
pass # debug("%s does not accept argument named %r", scheduler, name)
else:
scheduler_args[name] = value
return dataclasses.replace(self, scheduler_args=scheduler_args)
@dataclass
class Range:
start: int
end: int
class TextConditioningRegions:
def __init__(
self,
masks: torch.Tensor,
ranges: list[Range],
):
# A binary mask indicating the regions of the image that the prompt should be applied to.
# Shape: (1, num_prompts, height, width)
# Dtype: torch.bool
self.masks = masks
# A list of ranges indicating the start and end indices of the embeddings that corresponding mask applies to.
# ranges[i] contains the embedding range for the i'th prompt / mask.
self.ranges = ranges
assert self.masks.shape[1] == len(self.ranges)
class TextConditioningData:
def __init__(
self,
uncond_text: Union[BasicConditioningInfo, SDXLConditioningInfo],
cond_text: Union[BasicConditioningInfo, SDXLConditioningInfo],
uncond_regions: Optional[TextConditioningRegions],
cond_regions: Optional[TextConditioningRegions],
guidance_scale: Union[float, List[float]],
guidance_rescale_multiplier: float = 0,
):
self.uncond_text = uncond_text
self.cond_text = cond_text
self.uncond_regions = uncond_regions
self.cond_regions = cond_regions
# Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
# `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf).
# Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
# images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
self.guidance_scale = guidance_scale
# For models trained using zero-terminal SNR ("ztsnr"), it's suggested to use guidance_rescale_multiplier of 0.7.
# See [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
self.guidance_rescale_multiplier = guidance_rescale_multiplier
def is_sdxl(self):
assert isinstance(self.uncond_text, SDXLConditioningInfo) == isinstance(self.cond_text, SDXLConditioningInfo)
return isinstance(self.cond_text, SDXLConditioningInfo)

View File

@ -1,218 +0,0 @@
# adapted from bloc97's CrossAttentionControl colab
# https://github.com/bloc97/CrossAttentionControl
import enum
from dataclasses import dataclass, field
from typing import Optional
import torch
from compel.cross_attention_control import Arguments
from diffusers.models.attention_processor import Attention, SlicedAttnProcessor
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
from invokeai.backend.util.devices import torch_dtype
class CrossAttentionType(enum.Enum):
SELF = 1
TOKENS = 2
class CrossAttnControlContext:
def __init__(self, arguments: Arguments):
"""
:param arguments: Arguments for the cross-attention control process
"""
self.cross_attention_mask: Optional[torch.Tensor] = None
self.cross_attention_index_map: Optional[torch.Tensor] = None
self.arguments = arguments
def get_active_cross_attention_control_types_for_step(
self, percent_through: float = None
) -> list[CrossAttentionType]:
"""
Should cross-attention control be applied on the given step?
:param percent_through: How far through the step sequence are we (0.0=pure noise, 1.0=completely denoised image). Expected range 0.0..<1.0.
:return: A list of attention types that cross-attention control should be performed for on the given step. May be [].
"""
if percent_through is None:
return [CrossAttentionType.SELF, CrossAttentionType.TOKENS]
opts = self.arguments.edit_options
to_control = []
if opts["s_start"] <= percent_through < opts["s_end"]:
to_control.append(CrossAttentionType.SELF)
if opts["t_start"] <= percent_through < opts["t_end"]:
to_control.append(CrossAttentionType.TOKENS)
return to_control
def setup_cross_attention_control_attention_processors(unet: UNet2DConditionModel, context: CrossAttnControlContext):
"""
Inject attention parameters and functions into the passed in model to enable cross attention editing.
:param model: The unet model to inject into.
:return: None
"""
# adapted from init_attention_edit
device = context.arguments.edited_conditioning.device
# urgh. should this be hardcoded?
max_length = 77
# mask=1 means use base prompt attention, mask=0 means use edited prompt attention
mask = torch.zeros(max_length, dtype=torch_dtype(device))
indices_target = torch.arange(max_length, dtype=torch.long)
indices = torch.arange(max_length, dtype=torch.long)
for name, a0, a1, b0, b1 in context.arguments.edit_opcodes:
if b0 < max_length:
if name == "equal": # or (name == "replace" and a1 - a0 == b1 - b0):
# these tokens have not been edited
indices[b0:b1] = indices_target[a0:a1]
mask[b0:b1] = 1
context.cross_attention_mask = mask.to(device)
context.cross_attention_index_map = indices.to(device)
old_attn_processors = unet.attn_processors
if torch.backends.mps.is_available():
# see note in StableDiffusionGeneratorPipeline.__init__ about borked slicing on MPS
unet.set_attn_processor(SwapCrossAttnProcessor())
else:
# try to re-use an existing slice size
default_slice_size = 4
slice_size = next(
(p.slice_size for p in old_attn_processors.values() if type(p) is SlicedAttnProcessor), default_slice_size
)
unet.set_attn_processor(SlicedSwapCrossAttnProcesser(slice_size=slice_size))
@dataclass
class SwapCrossAttnContext:
modified_text_embeddings: torch.Tensor
index_map: torch.Tensor # maps from original prompt token indices to the equivalent tokens in the modified prompt
mask: torch.Tensor # in the target space of the index_map
cross_attention_types_to_do: list[CrossAttentionType] = field(default_factory=list)
def wants_cross_attention_control(self, attn_type: CrossAttentionType) -> bool:
return attn_type in self.cross_attention_types_to_do
@classmethod
def make_mask_and_index_map(
cls, edit_opcodes: list[tuple[str, int, int, int, int]], max_length: int
) -> tuple[torch.Tensor, torch.Tensor]:
# mask=1 means use original prompt attention, mask=0 means use modified prompt attention
mask = torch.zeros(max_length)
indices_target = torch.arange(max_length, dtype=torch.long)
indices = torch.arange(max_length, dtype=torch.long)
for name, a0, a1, b0, b1 in edit_opcodes:
if b0 < max_length:
if name == "equal":
# these tokens remain the same as in the original prompt
indices[b0:b1] = indices_target[a0:a1]
mask[b0:b1] = 1
return mask, indices
class SlicedSwapCrossAttnProcesser(SlicedAttnProcessor):
# TODO: dynamically pick slice size based on memory conditions
def __call__(
self,
attn: Attention,
hidden_states,
encoder_hidden_states=None,
attention_mask=None,
# kwargs
swap_cross_attn_context: SwapCrossAttnContext = None,
**kwargs,
):
attention_type = CrossAttentionType.SELF if encoder_hidden_states is None else CrossAttentionType.TOKENS
# if cross-attention control is not in play, just call through to the base implementation.
if (
attention_type is CrossAttentionType.SELF
or swap_cross_attn_context is None
or not swap_cross_attn_context.wants_cross_attention_control(attention_type)
):
# print(f"SwapCrossAttnContext for {attention_type} not active - passing request to superclass")
return super().__call__(attn, hidden_states, encoder_hidden_states, attention_mask)
# else:
# print(f"SwapCrossAttnContext for {attention_type} active")
batch_size, sequence_length, _ = hidden_states.shape
attention_mask = attn.prepare_attention_mask(
attention_mask=attention_mask,
target_length=sequence_length,
batch_size=batch_size,
)
query = attn.to_q(hidden_states)
dim = query.shape[-1]
query = attn.head_to_batch_dim(query)
original_text_embeddings = encoder_hidden_states
modified_text_embeddings = swap_cross_attn_context.modified_text_embeddings
original_text_key = attn.to_k(original_text_embeddings)
modified_text_key = attn.to_k(modified_text_embeddings)
original_value = attn.to_v(original_text_embeddings)
modified_value = attn.to_v(modified_text_embeddings)
original_text_key = attn.head_to_batch_dim(original_text_key)
modified_text_key = attn.head_to_batch_dim(modified_text_key)
original_value = attn.head_to_batch_dim(original_value)
modified_value = attn.head_to_batch_dim(modified_value)
# compute slices and prepare output tensor
batch_size_attention = query.shape[0]
hidden_states = torch.zeros(
(batch_size_attention, sequence_length, dim // attn.heads),
device=query.device,
dtype=query.dtype,
)
# do slices
for i in range(max(1, hidden_states.shape[0] // self.slice_size)):
start_idx = i * self.slice_size
end_idx = (i + 1) * self.slice_size
query_slice = query[start_idx:end_idx]
original_key_slice = original_text_key[start_idx:end_idx]
modified_key_slice = modified_text_key[start_idx:end_idx]
attn_mask_slice = attention_mask[start_idx:end_idx] if attention_mask is not None else None
original_attn_slice = attn.get_attention_scores(query_slice, original_key_slice, attn_mask_slice)
modified_attn_slice = attn.get_attention_scores(query_slice, modified_key_slice, attn_mask_slice)
# because the prompt modifications may result in token sequences shifted forwards or backwards,
# the original attention probabilities must be remapped to account for token index changes in the
# modified prompt
remapped_original_attn_slice = torch.index_select(
original_attn_slice, -1, swap_cross_attn_context.index_map
)
# only some tokens taken from the original attention probabilities. this is controlled by the mask.
mask = swap_cross_attn_context.mask
inverse_mask = 1 - mask
attn_slice = remapped_original_attn_slice * mask + modified_attn_slice * inverse_mask
del remapped_original_attn_slice, modified_attn_slice
attn_slice = torch.bmm(attn_slice, modified_value[start_idx:end_idx])
hidden_states[start_idx:end_idx] = attn_slice
# done
hidden_states = attn.batch_to_head_dim(hidden_states)
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
return hidden_states
class SwapCrossAttnProcessor(SlicedSwapCrossAttnProcesser):
def __init__(self):
super(SwapCrossAttnProcessor, self).__init__(slice_size=int(1e9)) # massive slice size = don't slice

View File

@ -0,0 +1,214 @@
from dataclasses import dataclass
from typing import List, Optional, cast
import torch
import torch.nn.functional as F
from diffusers.models.attention_processor import Attention, AttnProcessor2_0
from invokeai.backend.ip_adapter.ip_attention_weights import IPAttentionProcessorWeights
from invokeai.backend.stable_diffusion.diffusion.regional_ip_data import RegionalIPData
from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData
@dataclass
class IPAdapterAttentionWeights:
ip_adapter_weights: IPAttentionProcessorWeights
skip: bool
class CustomAttnProcessor2_0(AttnProcessor2_0):
"""A custom implementation of AttnProcessor2_0 that supports additional Invoke features.
This implementation is based on
https://github.com/huggingface/diffusers/blame/fcfa270fbd1dc294e2f3a505bae6bcb791d721c3/src/diffusers/models/attention_processor.py#L1204
Supported custom features:
- IP-Adapter
- Regional prompt attention
"""
def __init__(
self,
ip_adapter_attention_weights: Optional[List[IPAdapterAttentionWeights]] = None,
):
"""Initialize a CustomAttnProcessor2_0.
Note: Arguments that are the same for all attention layers are passed to __call__(). Arguments that are
layer-specific are passed to __init__().
Args:
ip_adapter_weights: The IP-Adapter attention weights. ip_adapter_weights[i] contains the attention weights
for the i'th IP-Adapter.
"""
super().__init__()
self._ip_adapter_attention_weights = ip_adapter_attention_weights
def __call__(
self,
attn: Attention,
hidden_states: torch.Tensor,
encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
temb: Optional[torch.Tensor] = None,
# For Regional Prompting:
regional_prompt_data: Optional[RegionalPromptData] = None,
percent_through: Optional[torch.Tensor] = None,
# For IP-Adapter:
regional_ip_data: Optional[RegionalIPData] = None,
*args,
**kwargs,
) -> torch.FloatTensor:
"""Apply attention.
Args:
regional_prompt_data: The regional prompt data for the current batch. If not None, this will be used to
apply regional prompt masking.
regional_ip_data: The IP-Adapter data for the current batch.
"""
# If true, we are doing cross-attention, if false we are doing self-attention.
is_cross_attention = encoder_hidden_states is not None
# Start unmodified block from AttnProcessor2_0.
# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
residual = hidden_states
if attn.spatial_norm is not None:
hidden_states = attn.spatial_norm(hidden_states, temb)
input_ndim = hidden_states.ndim
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.view(batch_size, channel, height * width).transpose(1, 2)
batch_size, sequence_length, _ = (
hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# End unmodified block from AttnProcessor2_0.
_, query_seq_len, _ = hidden_states.shape
# Handle regional prompt attention masks.
if regional_prompt_data is not None and is_cross_attention:
assert percent_through is not None
prompt_region_attention_mask = regional_prompt_data.get_cross_attn_mask(
query_seq_len=query_seq_len, key_seq_len=sequence_length
)
if attention_mask is None:
attention_mask = prompt_region_attention_mask
else:
attention_mask = prompt_region_attention_mask + attention_mask
# Start unmodified block from AttnProcessor2_0.
# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
if attention_mask is not None:
attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
# scaled_dot_product_attention expects attention_mask shape to be
# (batch, heads, source_length, target_length)
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
query = attn.to_q(hidden_states)
if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
key = attn.to_k(encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)
inner_dim = key.shape[-1]
head_dim = inner_dim // attn.heads
query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
# the output of sdp = (batch, num_heads, seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
hidden_states = F.scaled_dot_product_attention(
query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_causal=False
)
hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
hidden_states = hidden_states.to(query.dtype)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# End unmodified block from AttnProcessor2_0.
# Apply IP-Adapter conditioning.
if is_cross_attention:
if self._ip_adapter_attention_weights:
assert regional_ip_data is not None
ip_masks = regional_ip_data.get_masks(query_seq_len=query_seq_len)
assert (
len(regional_ip_data.image_prompt_embeds)
== len(self._ip_adapter_attention_weights)
== len(regional_ip_data.scales)
== ip_masks.shape[1]
)
for ipa_index, ipa_embed in enumerate(regional_ip_data.image_prompt_embeds):
ipa_weights = self._ip_adapter_attention_weights[ipa_index].ip_adapter_weights
ipa_scale = regional_ip_data.scales[ipa_index]
ip_mask = ip_masks[0, ipa_index, ...]
# The batch dimensions should match.
assert ipa_embed.shape[0] == encoder_hidden_states.shape[0]
# The token_len dimensions should match.
assert ipa_embed.shape[-1] == encoder_hidden_states.shape[-1]
ip_hidden_states = ipa_embed
# Expected ip_hidden_state shape: (batch_size, num_ip_images, ip_seq_len, ip_image_embedding)
if not self._ip_adapter_attention_weights[ipa_index].skip:
ip_key = ipa_weights.to_k_ip(ip_hidden_states)
ip_value = ipa_weights.to_v_ip(ip_hidden_states)
# Expected ip_key and ip_value shape:
# (batch_size, num_ip_images, ip_seq_len, head_dim * num_heads)
ip_key = ip_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
ip_value = ip_value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
# Expected ip_key and ip_value shape:
# (batch_size, num_heads, num_ip_images * ip_seq_len, head_dim)
# TODO: add support for attn.scale when we move to Torch 2.1
ip_hidden_states = F.scaled_dot_product_attention(
query, ip_key, ip_value, attn_mask=None, dropout_p=0.0, is_causal=False
)
# Expected ip_hidden_states shape: (batch_size, num_heads, query_seq_len, head_dim)
ip_hidden_states = ip_hidden_states.transpose(1, 2).reshape(
batch_size, -1, attn.heads * head_dim
)
ip_hidden_states = ip_hidden_states.to(query.dtype)
# Expected ip_hidden_states shape: (batch_size, query_seq_len, num_heads * head_dim)
hidden_states = hidden_states + ipa_scale * ip_hidden_states * ip_mask
else:
# If IP-Adapter is not enabled, then regional_ip_data should not be passed in.
assert regional_ip_data is None
# Start unmodified block from AttnProcessor2_0.
# vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
# linear proj
hidden_states = attn.to_out[0](hidden_states)
# dropout
hidden_states = attn.to_out[1](hidden_states)
if input_ndim == 4:
batch_size, channel, height, width = hidden_states.shape
hidden_states = hidden_states.transpose(-1, -2).reshape(batch_size, channel, height, width)
if attn.residual_connection:
hidden_states = hidden_states + residual
hidden_states = hidden_states / attn.rescale_output_factor
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# End of unmodified block from AttnProcessor2_0
# casting torch.Tensor to torch.FloatTensor to avoid type issues
return cast(torch.FloatTensor, hidden_states)

View File

@ -0,0 +1,72 @@
import torch
class RegionalIPData:
"""A class to manage the data for regional IP-Adapter conditioning."""
def __init__(
self,
image_prompt_embeds: list[torch.Tensor],
scales: list[float],
masks: list[torch.Tensor],
dtype: torch.dtype,
device: torch.device,
max_downscale_factor: int = 8,
):
"""Initialize a `IPAdapterConditioningData` object."""
assert len(image_prompt_embeds) == len(scales) == len(masks)
# The image prompt embeddings.
# regional_ip_data[i] contains the image prompt embeddings for the i'th IP-Adapter. Each tensor
# has shape (batch_size, num_ip_images, seq_len, ip_embedding_len).
self.image_prompt_embeds = image_prompt_embeds
# The scales for the IP-Adapter attention.
# scales[i] contains the attention scale for the i'th IP-Adapter.
self.scales = scales
# The IP-Adapter masks.
# self._masks_by_seq_len[s] contains the spatial masks for the downsampling level with query sequence length of
# s. It has shape (batch_size, num_ip_images, query_seq_len, 1). The masks have values of 1.0 for included
# regions and 0.0 for excluded regions.
self._masks_by_seq_len = self._prepare_masks(masks, max_downscale_factor, device, dtype)
def _prepare_masks(
self, masks: list[torch.Tensor], max_downscale_factor: int, device: torch.device, dtype: torch.dtype
) -> dict[int, torch.Tensor]:
"""Prepare the masks for the IP-Adapter attention."""
# Concatenate the masks so that they can be processed more efficiently.
mask_tensor = torch.cat(masks, dim=1)
mask_tensor = mask_tensor.to(device=device, dtype=dtype)
masks_by_seq_len: dict[int, torch.Tensor] = {}
# Downsample the spatial dimensions by factors of 2 until max_downscale_factor is reached.
downscale_factor = 1
while downscale_factor <= max_downscale_factor:
b, num_ip_adapters, h, w = mask_tensor.shape
# Assert that the batch size is 1, because I haven't thought through batch handling for this feature yet.
assert b == 1
# The IP-Adapters are applied in the cross-attention layers, where the query sequence length is the h * w of
# the spatial features.
query_seq_len = h * w
masks_by_seq_len[query_seq_len] = mask_tensor.view((b, num_ip_adapters, -1, 1))
downscale_factor *= 2
if downscale_factor <= max_downscale_factor:
# We use max pooling because we downscale to a pretty low resolution, so we don't want small mask
# regions to be lost entirely.
#
# ceil_mode=True is set to mirror the downsampling behavior of SD and SDXL.
#
# TODO(ryand): In the future, we may want to experiment with other downsampling methods.
mask_tensor = torch.nn.functional.max_pool2d(mask_tensor, kernel_size=2, stride=2, ceil_mode=True)
return masks_by_seq_len
def get_masks(self, query_seq_len: int) -> torch.Tensor:
"""Get the mask for the given query sequence length."""
return self._masks_by_seq_len[query_seq_len]

View File

@ -0,0 +1,105 @@
import torch
import torch.nn.functional as F
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
TextConditioningRegions,
)
class RegionalPromptData:
"""A class to manage the prompt data for regional conditioning."""
def __init__(
self,
regions: list[TextConditioningRegions],
device: torch.device,
dtype: torch.dtype,
max_downscale_factor: int = 8,
):
"""Initialize a `RegionalPromptData` object.
Args:
regions (list[TextConditioningRegions]): regions[i] contains the prompt regions for the i'th sample in the
batch.
device (torch.device): The device to use for the attention masks.
dtype (torch.dtype): The data type to use for the attention masks.
max_downscale_factor: Spatial masks will be prepared for downscale factors from 1 to max_downscale_factor
in steps of 2x.
"""
self._regions = regions
self._device = device
self._dtype = dtype
# self._spatial_masks_by_seq_len[b][s] contains the spatial masks for the b'th batch sample with a query
# sequence length of s.
self._spatial_masks_by_seq_len: list[dict[int, torch.Tensor]] = self._prepare_spatial_masks(
regions, max_downscale_factor
)
self._negative_cross_attn_mask_score = -10000.0
def _prepare_spatial_masks(
self, regions: list[TextConditioningRegions], max_downscale_factor: int = 8
) -> list[dict[int, torch.Tensor]]:
"""Prepare the spatial masks for all downscaling factors."""
# batch_masks_by_seq_len[b][s] contains the spatial masks for the b'th batch sample with a query sequence length
# of s.
batch_sample_masks_by_seq_len: list[dict[int, torch.Tensor]] = []
for batch_sample_regions in regions:
batch_sample_masks_by_seq_len.append({})
batch_sample_masks = batch_sample_regions.masks.to(device=self._device, dtype=self._dtype)
# Downsample the spatial dimensions by factors of 2 until max_downscale_factor is reached.
downscale_factor = 1
while downscale_factor <= max_downscale_factor:
b, _num_prompts, h, w = batch_sample_masks.shape
assert b == 1
query_seq_len = h * w
batch_sample_masks_by_seq_len[-1][query_seq_len] = batch_sample_masks
downscale_factor *= 2
if downscale_factor <= max_downscale_factor:
# We use max pooling because we downscale to a pretty low resolution, so we don't want small prompt
# regions to be lost entirely.
#
# ceil_mode=True is set to mirror the downsampling behavior of SD and SDXL.
#
# TODO(ryand): In the future, we may want to experiment with other downsampling methods (e.g.
# nearest interpolation), and could potentially use a weighted mask rather than a binary mask.
batch_sample_masks = F.max_pool2d(batch_sample_masks, kernel_size=2, stride=2, ceil_mode=True)
return batch_sample_masks_by_seq_len
def get_cross_attn_mask(self, query_seq_len: int, key_seq_len: int) -> torch.Tensor:
"""Get the cross-attention mask for the given query sequence length.
Args:
query_seq_len: The length of the flattened spatial features at the current downscaling level.
key_seq_len (int): The sequence length of the prompt embeddings (which act as the key in the cross-attention
layers). This is most likely equal to the max embedding range end, but we pass it explicitly to be sure.
Returns:
torch.Tensor: The cross-attention score mask.
shape: (batch_size, query_seq_len, key_seq_len).
dtype: float
"""
batch_size = len(self._spatial_masks_by_seq_len)
batch_spatial_masks = [self._spatial_masks_by_seq_len[b][query_seq_len] for b in range(batch_size)]
# Create an empty attention mask with the correct shape.
attn_mask = torch.zeros((batch_size, query_seq_len, key_seq_len), dtype=self._dtype, device=self._device)
for batch_idx in range(batch_size):
batch_sample_spatial_masks = batch_spatial_masks[batch_idx]
batch_sample_regions = self._regions[batch_idx]
# Flatten the spatial dimensions of the mask by reshaping to (1, num_prompts, query_seq_len, 1).
_, num_prompts, _, _ = batch_sample_spatial_masks.shape
batch_sample_query_masks = batch_sample_spatial_masks.view((1, num_prompts, query_seq_len, 1))
for prompt_idx, embedding_range in enumerate(batch_sample_regions.ranges):
batch_sample_query_scores = batch_sample_query_masks[0, prompt_idx, :, :].clone()
batch_sample_query_mask = batch_sample_query_scores > 0.5
batch_sample_query_scores[batch_sample_query_mask] = 0.0
batch_sample_query_scores[~batch_sample_query_mask] = self._negative_cross_attn_mask_score
attn_mask[batch_idx, :, embedding_range.start : embedding_range.end] = batch_sample_query_scores
return attn_mask

View File

@ -1,26 +1,20 @@
from __future__ import annotations
import math
from contextlib import contextmanager
from typing import Any, Callable, Optional, Union
import torch
from diffusers import UNet2DConditionModel
from typing_extensions import TypeAlias
from invokeai.app.services.config.config_default import get_config
from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
ConditioningData,
ExtraConditioningInfo,
SDXLConditioningInfo,
)
from .cross_attention_control import (
CrossAttentionType,
CrossAttnControlContext,
SwapCrossAttnContext,
setup_cross_attention_control_attention_processors,
IPAdapterData,
Range,
TextConditioningData,
TextConditioningRegions,
)
from invokeai.backend.stable_diffusion.diffusion.regional_ip_data import RegionalIPData
from invokeai.backend.stable_diffusion.diffusion.regional_prompt_data import RegionalPromptData
ModelForwardCallback: TypeAlias = Union[
# x, t, conditioning, Optional[cross-attention kwargs]
@ -58,31 +52,8 @@ class InvokeAIDiffuserComponent:
self.conditioning = None
self.model = model
self.model_forward_callback = model_forward_callback
self.cross_attention_control_context = None
self.sequential_guidance = config.sequential_guidance
@contextmanager
def custom_attention_context(
self,
unet: UNet2DConditionModel,
extra_conditioning_info: Optional[ExtraConditioningInfo],
):
old_attn_processors = unet.attn_processors
try:
self.cross_attention_control_context = CrossAttnControlContext(
arguments=extra_conditioning_info.cross_attention_control_args,
)
setup_cross_attention_control_attention_processors(
unet,
self.cross_attention_control_context,
)
yield None
finally:
self.cross_attention_control_context = None
unet.set_attn_processor(old_attn_processors)
def do_controlnet_step(
self,
control_data,
@ -90,7 +61,7 @@ class InvokeAIDiffuserComponent:
timestep: torch.Tensor,
step_index: int,
total_step_count: int,
conditioning_data,
conditioning_data: TextConditioningData,
):
down_block_res_samples, mid_block_res_sample = None, None
@ -123,28 +94,28 @@ class InvokeAIDiffuserComponent:
added_cond_kwargs = None
if cfg_injection: # only applying ControlNet to conditional instead of in unconditioned
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
if conditioning_data.is_sdxl():
added_cond_kwargs = {
"text_embeds": conditioning_data.text_embeddings.pooled_embeds,
"time_ids": conditioning_data.text_embeddings.add_time_ids,
"text_embeds": conditioning_data.cond_text.pooled_embeds,
"time_ids": conditioning_data.cond_text.add_time_ids,
}
encoder_hidden_states = conditioning_data.text_embeddings.embeds
encoder_hidden_states = conditioning_data.cond_text.embeds
encoder_attention_mask = None
else:
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
if conditioning_data.is_sdxl():
added_cond_kwargs = {
"text_embeds": torch.cat(
[
# TODO: how to pad? just by zeros? or even truncate?
conditioning_data.unconditioned_embeddings.pooled_embeds,
conditioning_data.text_embeddings.pooled_embeds,
conditioning_data.uncond_text.pooled_embeds,
conditioning_data.cond_text.pooled_embeds,
],
dim=0,
),
"time_ids": torch.cat(
[
conditioning_data.unconditioned_embeddings.add_time_ids,
conditioning_data.text_embeddings.add_time_ids,
conditioning_data.uncond_text.add_time_ids,
conditioning_data.cond_text.add_time_ids,
],
dim=0,
),
@ -153,8 +124,8 @@ class InvokeAIDiffuserComponent:
encoder_hidden_states,
encoder_attention_mask,
) = self._concat_conditionings_for_batch(
conditioning_data.unconditioned_embeddings.embeds,
conditioning_data.text_embeddings.embeds,
conditioning_data.uncond_text.embeds,
conditioning_data.cond_text.embeds,
)
if isinstance(control_datum.weight, list):
# if controlnet has multiple weights, use the weight for the current step
@ -198,24 +169,15 @@ class InvokeAIDiffuserComponent:
self,
sample: torch.Tensor,
timestep: torch.Tensor,
conditioning_data: ConditioningData,
conditioning_data: TextConditioningData,
ip_adapter_data: Optional[list[IPAdapterData]],
step_index: int,
total_step_count: int,
down_block_additional_residuals: Optional[torch.Tensor] = None, # for ControlNet
mid_block_additional_residual: Optional[torch.Tensor] = None, # for ControlNet
down_intrablock_additional_residuals: Optional[torch.Tensor] = None, # for T2I-Adapter
):
cross_attention_control_types_to_do = []
if self.cross_attention_control_context is not None:
percent_through = step_index / total_step_count
cross_attention_control_types_to_do = (
self.cross_attention_control_context.get_active_cross_attention_control_types_for_step(percent_through)
)
wants_cross_attention_control = len(cross_attention_control_types_to_do) > 0
if wants_cross_attention_control or self.sequential_guidance:
# If wants_cross_attention_control is True, we force the sequential mode to be used, because cross-attention
# control is currently only supported in sequential mode.
if self.sequential_guidance:
(
unconditioned_next_x,
conditioned_next_x,
@ -223,7 +185,9 @@ class InvokeAIDiffuserComponent:
x=sample,
sigma=timestep,
conditioning_data=conditioning_data,
cross_attention_control_types_to_do=cross_attention_control_types_to_do,
ip_adapter_data=ip_adapter_data,
step_index=step_index,
total_step_count=total_step_count,
down_block_additional_residuals=down_block_additional_residuals,
mid_block_additional_residual=mid_block_additional_residual,
down_intrablock_additional_residuals=down_intrablock_additional_residuals,
@ -236,6 +200,9 @@ class InvokeAIDiffuserComponent:
x=sample,
sigma=timestep,
conditioning_data=conditioning_data,
ip_adapter_data=ip_adapter_data,
step_index=step_index,
total_step_count=total_step_count,
down_block_additional_residuals=down_block_additional_residuals,
mid_block_additional_residual=mid_block_additional_residual,
down_intrablock_additional_residuals=down_intrablock_additional_residuals,
@ -294,53 +261,84 @@ class InvokeAIDiffuserComponent:
def _apply_standard_conditioning(
self,
x,
sigma,
conditioning_data: ConditioningData,
x: torch.Tensor,
sigma: torch.Tensor,
conditioning_data: TextConditioningData,
ip_adapter_data: Optional[list[IPAdapterData]],
step_index: int,
total_step_count: int,
down_block_additional_residuals: Optional[torch.Tensor] = None, # for ControlNet
mid_block_additional_residual: Optional[torch.Tensor] = None, # for ControlNet
down_intrablock_additional_residuals: Optional[torch.Tensor] = None, # for T2I-Adapter
):
) -> tuple[torch.Tensor, torch.Tensor]:
"""Runs the conditioned and unconditioned UNet forward passes in a single batch for faster inference speed at
the cost of higher memory usage.
"""
x_twice = torch.cat([x] * 2)
sigma_twice = torch.cat([sigma] * 2)
cross_attention_kwargs = None
if conditioning_data.ip_adapter_conditioning is not None:
cross_attention_kwargs = {}
if ip_adapter_data is not None:
ip_adapter_conditioning = [ipa.ip_adapter_conditioning for ipa in ip_adapter_data]
# Note that we 'stack' to produce tensors of shape (batch_size, num_ip_images, seq_len, token_len).
cross_attention_kwargs = {
"ip_adapter_image_prompt_embeds": [
torch.stack(
[ipa_conditioning.uncond_image_prompt_embeds, ipa_conditioning.cond_image_prompt_embeds]
)
for ipa_conditioning in conditioning_data.ip_adapter_conditioning
]
}
image_prompt_embeds = [
torch.stack([ipa_conditioning.uncond_image_prompt_embeds, ipa_conditioning.cond_image_prompt_embeds])
for ipa_conditioning in ip_adapter_conditioning
]
scales = [ipa.scale_for_step(step_index, total_step_count) for ipa in ip_adapter_data]
ip_masks = [ipa.mask for ipa in ip_adapter_data]
regional_ip_data = RegionalIPData(
image_prompt_embeds=image_prompt_embeds, scales=scales, masks=ip_masks, dtype=x.dtype, device=x.device
)
cross_attention_kwargs["regional_ip_data"] = regional_ip_data
added_cond_kwargs = None
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
if conditioning_data.is_sdxl():
added_cond_kwargs = {
"text_embeds": torch.cat(
[
# TODO: how to pad? just by zeros? or even truncate?
conditioning_data.unconditioned_embeddings.pooled_embeds,
conditioning_data.text_embeddings.pooled_embeds,
conditioning_data.uncond_text.pooled_embeds,
conditioning_data.cond_text.pooled_embeds,
],
dim=0,
),
"time_ids": torch.cat(
[
conditioning_data.unconditioned_embeddings.add_time_ids,
conditioning_data.text_embeddings.add_time_ids,
conditioning_data.uncond_text.add_time_ids,
conditioning_data.cond_text.add_time_ids,
],
dim=0,
),
}
if conditioning_data.cond_regions is not None or conditioning_data.uncond_regions is not None:
# TODO(ryand): We currently initialize RegionalPromptData for every denoising step. The text conditionings
# and masks are not changing from step-to-step, so this really only needs to be done once. While this seems
# painfully inefficient, the time spent is typically negligible compared to the forward inference pass of
# the UNet. The main reason that this hasn't been moved up to eliminate redundancy is that it is slightly
# awkward to handle both standard conditioning and sequential conditioning further up the stack.
regions = []
for c, r in [
(conditioning_data.uncond_text, conditioning_data.uncond_regions),
(conditioning_data.cond_text, conditioning_data.cond_regions),
]:
if r is None:
# Create a dummy mask and range for text conditioning that doesn't have region masks.
_, _, h, w = x.shape
r = TextConditioningRegions(
masks=torch.ones((1, 1, h, w), dtype=x.dtype),
ranges=[Range(start=0, end=c.embeds.shape[1])],
)
regions.append(r)
cross_attention_kwargs["regional_prompt_data"] = RegionalPromptData(
regions=regions, device=x.device, dtype=x.dtype
)
cross_attention_kwargs["percent_through"] = step_index / total_step_count
both_conditionings, encoder_attention_mask = self._concat_conditionings_for_batch(
conditioning_data.unconditioned_embeddings.embeds, conditioning_data.text_embeddings.embeds
conditioning_data.uncond_text.embeds, conditioning_data.cond_text.embeds
)
both_results = self.model_forward_callback(
x_twice,
@ -360,8 +358,10 @@ class InvokeAIDiffuserComponent:
self,
x: torch.Tensor,
sigma,
conditioning_data: ConditioningData,
cross_attention_control_types_to_do: list[CrossAttentionType],
conditioning_data: TextConditioningData,
ip_adapter_data: Optional[list[IPAdapterData]],
step_index: int,
total_step_count: int,
down_block_additional_residuals: Optional[torch.Tensor] = None, # for ControlNet
mid_block_additional_residual: Optional[torch.Tensor] = None, # for ControlNet
down_intrablock_additional_residuals: Optional[torch.Tensor] = None, # for T2I-Adapter
@ -391,53 +391,48 @@ class InvokeAIDiffuserComponent:
if mid_block_additional_residual is not None:
uncond_mid_block, cond_mid_block = mid_block_additional_residual.chunk(2)
# If cross-attention control is enabled, prepare the SwapCrossAttnContext.
cross_attn_processor_context = None
if self.cross_attention_control_context is not None:
# Note that the SwapCrossAttnContext is initialized with an empty list of cross_attention_types_to_do.
# This list is empty because cross-attention control is not applied in the unconditioned pass. This field
# will be populated before the conditioned pass.
cross_attn_processor_context = SwapCrossAttnContext(
modified_text_embeddings=self.cross_attention_control_context.arguments.edited_conditioning,
index_map=self.cross_attention_control_context.cross_attention_index_map,
mask=self.cross_attention_control_context.cross_attention_mask,
cross_attention_types_to_do=[],
)
#####################
# Unconditioned pass
#####################
cross_attention_kwargs = None
cross_attention_kwargs = {}
# Prepare IP-Adapter cross-attention kwargs for the unconditioned pass.
if conditioning_data.ip_adapter_conditioning is not None:
if ip_adapter_data is not None:
ip_adapter_conditioning = [ipa.ip_adapter_conditioning for ipa in ip_adapter_data]
# Note that we 'unsqueeze' to produce tensors of shape (batch_size=1, num_ip_images, seq_len, token_len).
cross_attention_kwargs = {
"ip_adapter_image_prompt_embeds": [
torch.unsqueeze(ipa_conditioning.uncond_image_prompt_embeds, dim=0)
for ipa_conditioning in conditioning_data.ip_adapter_conditioning
]
}
image_prompt_embeds = [
torch.unsqueeze(ipa_conditioning.uncond_image_prompt_embeds, dim=0)
for ipa_conditioning in ip_adapter_conditioning
]
# Prepare cross-attention control kwargs for the unconditioned pass.
if cross_attn_processor_context is not None:
cross_attention_kwargs = {"swap_cross_attn_context": cross_attn_processor_context}
scales = [ipa.scale_for_step(step_index, total_step_count) for ipa in ip_adapter_data]
ip_masks = [ipa.mask for ipa in ip_adapter_data]
regional_ip_data = RegionalIPData(
image_prompt_embeds=image_prompt_embeds, scales=scales, masks=ip_masks, dtype=x.dtype, device=x.device
)
cross_attention_kwargs["regional_ip_data"] = regional_ip_data
# Prepare SDXL conditioning kwargs for the unconditioned pass.
added_cond_kwargs = None
is_sdxl = type(conditioning_data.text_embeddings) is SDXLConditioningInfo
if is_sdxl:
if conditioning_data.is_sdxl():
added_cond_kwargs = {
"text_embeds": conditioning_data.unconditioned_embeddings.pooled_embeds,
"time_ids": conditioning_data.unconditioned_embeddings.add_time_ids,
"text_embeds": conditioning_data.uncond_text.pooled_embeds,
"time_ids": conditioning_data.uncond_text.add_time_ids,
}
# Prepare prompt regions for the unconditioned pass.
if conditioning_data.uncond_regions is not None:
cross_attention_kwargs["regional_prompt_data"] = RegionalPromptData(
regions=[conditioning_data.uncond_regions], device=x.device, dtype=x.dtype
)
cross_attention_kwargs["percent_through"] = step_index / total_step_count
# Run unconditioned UNet denoising (i.e. negative prompt).
unconditioned_next_x = self.model_forward_callback(
x,
sigma,
conditioning_data.unconditioned_embeddings.embeds,
conditioning_data.uncond_text.embeds,
cross_attention_kwargs=cross_attention_kwargs,
down_block_additional_residuals=uncond_down_block,
mid_block_additional_residual=uncond_mid_block,
@ -449,36 +444,43 @@ class InvokeAIDiffuserComponent:
# Conditioned pass
###################
cross_attention_kwargs = None
cross_attention_kwargs = {}
# Prepare IP-Adapter cross-attention kwargs for the conditioned pass.
if conditioning_data.ip_adapter_conditioning is not None:
if ip_adapter_data is not None:
ip_adapter_conditioning = [ipa.ip_adapter_conditioning for ipa in ip_adapter_data]
# Note that we 'unsqueeze' to produce tensors of shape (batch_size=1, num_ip_images, seq_len, token_len).
cross_attention_kwargs = {
"ip_adapter_image_prompt_embeds": [
torch.unsqueeze(ipa_conditioning.cond_image_prompt_embeds, dim=0)
for ipa_conditioning in conditioning_data.ip_adapter_conditioning
]
}
image_prompt_embeds = [
torch.unsqueeze(ipa_conditioning.cond_image_prompt_embeds, dim=0)
for ipa_conditioning in ip_adapter_conditioning
]
# Prepare cross-attention control kwargs for the conditioned pass.
if cross_attn_processor_context is not None:
cross_attn_processor_context.cross_attention_types_to_do = cross_attention_control_types_to_do
cross_attention_kwargs = {"swap_cross_attn_context": cross_attn_processor_context}
scales = [ipa.scale_for_step(step_index, total_step_count) for ipa in ip_adapter_data]
ip_masks = [ipa.mask for ipa in ip_adapter_data]
regional_ip_data = RegionalIPData(
image_prompt_embeds=image_prompt_embeds, scales=scales, masks=ip_masks, dtype=x.dtype, device=x.device
)
cross_attention_kwargs["regional_ip_data"] = regional_ip_data
# Prepare SDXL conditioning kwargs for the conditioned pass.
added_cond_kwargs = None
if is_sdxl:
if conditioning_data.is_sdxl():
added_cond_kwargs = {
"text_embeds": conditioning_data.text_embeddings.pooled_embeds,
"time_ids": conditioning_data.text_embeddings.add_time_ids,
"text_embeds": conditioning_data.cond_text.pooled_embeds,
"time_ids": conditioning_data.cond_text.add_time_ids,
}
# Prepare prompt regions for the conditioned pass.
if conditioning_data.cond_regions is not None:
cross_attention_kwargs["regional_prompt_data"] = RegionalPromptData(
regions=[conditioning_data.cond_regions], device=x.device, dtype=x.dtype
)
cross_attention_kwargs["percent_through"] = step_index / total_step_count
# Run conditioned UNet denoising (i.e. positive prompt).
conditioned_next_x = self.model_forward_callback(
x,
sigma,
conditioning_data.text_embeddings.embeds,
conditioning_data.cond_text.embeds,
cross_attention_kwargs=cross_attention_kwargs,
down_block_additional_residuals=cond_down_block,
mid_block_additional_residual=cond_mid_block,

View File

@ -0,0 +1,68 @@
from contextlib import contextmanager
from typing import List, Optional, TypedDict
from diffusers.models import UNet2DConditionModel
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
from invokeai.backend.stable_diffusion.diffusion.custom_atttention import (
CustomAttnProcessor2_0,
IPAdapterAttentionWeights,
)
class UNetIPAdapterData(TypedDict):
ip_adapter: IPAdapter
target_blocks: List[str]
class UNetAttentionPatcher:
"""A class for patching a UNet with CustomAttnProcessor2_0 attention layers."""
def __init__(self, ip_adapter_data: Optional[List[UNetIPAdapterData]]):
self._ip_adapters = ip_adapter_data
def _prepare_attention_processors(self, unet: UNet2DConditionModel):
"""Prepare a dict of attention processors that can be injected into a unet, and load the IP-Adapter attention
weights into them (if IP-Adapters are being applied).
Note that the `unet` param is only used to determine attention block dimensions and naming.
"""
# Construct a dict of attention processors based on the UNet's architecture.
attn_procs = {}
for idx, name in enumerate(unet.attn_processors.keys()):
if name.endswith("attn1.processor") or self._ip_adapters is None:
# "attn1" processors do not use IP-Adapters.
attn_procs[name] = CustomAttnProcessor2_0()
else:
# Collect the weights from each IP Adapter for the idx'th attention processor.
ip_adapter_attention_weights_collection: list[IPAdapterAttentionWeights] = []
for ip_adapter in self._ip_adapters:
ip_adapter_weights = ip_adapter["ip_adapter"].attn_weights.get_attention_processor_weights(idx)
skip = True
for block in ip_adapter["target_blocks"]:
if block in name:
skip = False
break
ip_adapter_attention_weights: IPAdapterAttentionWeights = IPAdapterAttentionWeights(
ip_adapter_weights=ip_adapter_weights, skip=skip
)
ip_adapter_attention_weights_collection.append(ip_adapter_attention_weights)
attn_procs[name] = CustomAttnProcessor2_0(ip_adapter_attention_weights_collection)
return attn_procs
@contextmanager
def apply_ip_adapter_attention(self, unet: UNet2DConditionModel):
"""A context manager that patches `unet` with CustomAttnProcessor2_0 attention layers."""
attn_procs = self._prepare_attention_processors(unet)
orig_attn_processors = unet.attn_processors
try:
# Note to future devs: set_attn_processor(...) does something slightly unexpected - it pops elements from
# the passed dict. So, if you wanted to keep the dict for future use, you'd have to make a
# moderately-shallow copy of it. E.g. `attn_procs_copy = {k: v for k, v in attn_procs.items()}`.
unet.set_attn_processor(attn_procs)
yield None
finally:
unet.set_attn_processor(orig_attn_processors)

View File

@ -13,6 +13,7 @@ from diffusers import (
LCMScheduler,
LMSDiscreteScheduler,
PNDMScheduler,
TCDScheduler,
UniPCMultistepScheduler,
)
@ -40,4 +41,5 @@ SCHEDULER_MAP = {
"dpmpp_sde_k": (DPMSolverSDEScheduler, {"use_karras_sigmas": True, "noise_sampler_seed": 0}),
"unipc": (UniPCMultistepScheduler, {"cpu_only": True}),
"lcm": (LCMScheduler, {}),
"tcd": (TCDScheduler, {}),
}

View File

@ -1,89 +1,51 @@
from __future__ import annotations
from contextlib import contextmanager
from typing import Callable, List, Union
from typing import Callable, List, Optional, Tuple, Union
import torch
import torch.nn as nn
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
from diffusers.models.lora import LoRACompatibleConv
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
def _conv_forward_asymmetric(self, input, weight, bias):
"""
Patch for Conv2d._conv_forward that supports asymmetric padding
"""
working = nn.functional.pad(input, self.asymmetric_padding["x"], mode=self.asymmetric_padding_mode["x"])
working = nn.functional.pad(working, self.asymmetric_padding["y"], mode=self.asymmetric_padding_mode["y"])
return nn.functional.conv2d(
working,
weight,
bias,
self.stride,
nn.modules.utils._pair(0),
self.dilation,
self.groups,
)
@contextmanager
def set_seamless(model: Union[UNet2DConditionModel, AutoencoderKL, AutoencoderTiny], seamless_axes: List[str]):
if not seamless_axes:
yield
return
# Callable: (input: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor
to_restore: list[tuple[nn.Conv2d | nn.ConvTranspose2d, Callable]] = []
# override conv_forward
# https://github.com/huggingface/diffusers/issues/556#issuecomment-1993287019
def _conv_forward_asymmetric(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
self.paddingX = (self._reversed_padding_repeated_twice[0], self._reversed_padding_repeated_twice[1], 0, 0)
self.paddingY = (0, 0, self._reversed_padding_repeated_twice[2], self._reversed_padding_repeated_twice[3])
working = torch.nn.functional.pad(input, self.paddingX, mode=x_mode)
working = torch.nn.functional.pad(working, self.paddingY, mode=y_mode)
return torch.nn.functional.conv2d(
working, weight, bias, self.stride, torch.nn.modules.utils._pair(0), self.dilation, self.groups
)
original_layers: List[Tuple[nn.Conv2d, Callable]] = []
try:
# Hard coded to skip down block layers, allowing for seamless tiling at the expense of prompt adherence
skipped_layers = 1
for m_name, m in model.named_modules():
if not isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
continue
x_mode = "circular" if "x" in seamless_axes else "constant"
y_mode = "circular" if "y" in seamless_axes else "constant"
if isinstance(model, UNet2DConditionModel) and m_name.startswith("down_blocks.") and ".resnets." in m_name:
# down_blocks.1.resnets.1.conv1
_, block_num, _, resnet_num, submodule_name = m_name.split(".")
block_num = int(block_num)
resnet_num = int(resnet_num)
conv_layers: List[torch.nn.Conv2d] = []
if block_num >= len(model.down_blocks) - skipped_layers:
continue
for module in model.modules():
if isinstance(module, torch.nn.Conv2d):
conv_layers.append(module)
# Skip the second resnet (could be configurable)
if resnet_num > 0:
continue
# Skip Conv2d layers (could be configurable)
if submodule_name == "conv2":
continue
m.asymmetric_padding_mode = {}
m.asymmetric_padding = {}
m.asymmetric_padding_mode["x"] = "circular" if ("x" in seamless_axes) else "constant"
m.asymmetric_padding["x"] = (
m._reversed_padding_repeated_twice[0],
m._reversed_padding_repeated_twice[1],
0,
0,
)
m.asymmetric_padding_mode["y"] = "circular" if ("y" in seamless_axes) else "constant"
m.asymmetric_padding["y"] = (
0,
0,
m._reversed_padding_repeated_twice[2],
m._reversed_padding_repeated_twice[3],
)
to_restore.append((m, m._conv_forward))
m._conv_forward = _conv_forward_asymmetric.__get__(m, nn.Conv2d)
for layer in conv_layers:
if isinstance(layer, LoRACompatibleConv) and layer.lora_layer is None:
layer.lora_layer = lambda *x: 0
original_layers.append((layer, layer._conv_forward))
layer._conv_forward = _conv_forward_asymmetric.__get__(layer, torch.nn.Conv2d)
yield
finally:
for module, orig_conv_forward in to_restore:
module._conv_forward = orig_conv_forward
if hasattr(module, "asymmetric_padding_mode"):
del module.asymmetric_padding_mode
if hasattr(module, "asymmetric_padding"):
del module.asymmetric_padding
for layer, orig_conv_forward in original_layers:
layer._conv_forward = orig_conv_forward

View File

@ -2,7 +2,6 @@
Initialization file for invokeai.backend.util
"""
from .devices import choose_precision, choose_torch_device
from .logging import InvokeAILogger
from .util import GIG, Chdir, directory_size
@ -11,6 +10,4 @@ __all__ = [
"directory_size",
"Chdir",
"InvokeAILogger",
"choose_precision",
"choose_torch_device",
]

View File

@ -0,0 +1,29 @@
"""
This module defines a context manager `catch_sigint()` which temporarily replaces
the sigINT handler defined by the ASGI in order to allow the user to ^C the application
and shut it down immediately. This was implemented in order to allow the user to interrupt
slow model hashing during startup.
Use like this:
from invokeai.backend.util.catch_sigint import catch_sigint
with catch_sigint():
run_some_hard_to_interrupt_process()
"""
import signal
from contextlib import contextmanager
from typing import Generator
def sigint_handler(signum, frame): # type: ignore
signal.signal(signal.SIGINT, signal.SIG_DFL)
signal.raise_signal(signal.SIGINT)
@contextmanager
def catch_sigint() -> Generator[None, None, None]:
original_handler = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, sigint_handler)
yield
signal.signal(signal.SIGINT, original_handler)

View File

@ -1,91 +1,110 @@
from __future__ import annotations
from contextlib import nullcontext
from typing import Literal, Optional, Union
from typing import Dict, Literal, Optional, Union
import torch
from torch import autocast
from deprecated import deprecated
from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.app.services.config.config_default import get_config
# legacy APIs
TorchPrecisionNames = Literal["float32", "float16", "bfloat16"]
CPU_DEVICE = torch.device("cpu")
CUDA_DEVICE = torch.device("cuda")
MPS_DEVICE = torch.device("mps")
@deprecated("Use TorchDevice.choose_torch_dtype() instead.") # type: ignore
def choose_precision(device: torch.device) -> TorchPrecisionNames:
"""Return the string representation of the recommended torch device."""
torch_dtype = TorchDevice.choose_torch_dtype(device)
return PRECISION_TO_NAME[torch_dtype]
@deprecated("Use TorchDevice.choose_torch_device() instead.") # type: ignore
def choose_torch_device() -> torch.device:
"""Convenience routine for guessing which GPU device to run model on"""
config = get_config()
if config.device == "auto":
if torch.cuda.is_available():
return torch.device("cuda")
if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
return torch.device("mps")
"""Return the torch.device to use for accelerated inference."""
return TorchDevice.choose_torch_device()
@deprecated("Use TorchDevice.choose_torch_dtype() instead.") # type: ignore
def torch_dtype(device: torch.device) -> torch.dtype:
"""Return the torch precision for the recommended torch device."""
return TorchDevice.choose_torch_dtype(device)
NAME_TO_PRECISION: Dict[TorchPrecisionNames, torch.dtype] = {
"float32": torch.float32,
"float16": torch.float16,
"bfloat16": torch.bfloat16,
}
PRECISION_TO_NAME: Dict[torch.dtype, TorchPrecisionNames] = {v: k for k, v in NAME_TO_PRECISION.items()}
class TorchDevice:
"""Abstraction layer for torch devices."""
@classmethod
def choose_torch_device(cls) -> torch.device:
"""Return the torch.device to use for accelerated inference."""
app_config = get_config()
if app_config.device != "auto":
device = torch.device(app_config.device)
elif torch.cuda.is_available():
device = CUDA_DEVICE
elif torch.backends.mps.is_available():
device = MPS_DEVICE
else:
return CPU_DEVICE
else:
return torch.device(config.device)
device = CPU_DEVICE
return cls.normalize(device)
def get_torch_device_name() -> str:
device = choose_torch_device()
return torch.cuda.get_device_name(device) if device.type == "cuda" else device.type.upper()
# We are in transition here from using a single global AppConfig to allowing multiple
# configurations. It is strongly recommended to pass the app_config to this function.
def choose_precision(
device: torch.device, app_config: Optional[InvokeAIAppConfig] = None
) -> Literal["float32", "float16", "bfloat16"]:
"""Return an appropriate precision for the given torch device."""
app_config = app_config or get_config()
if device.type == "cuda":
device_name = torch.cuda.get_device_name(device)
if not ("GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name):
if app_config.precision == "float32":
return "float32"
elif app_config.precision == "bfloat16":
return "bfloat16"
@classmethod
def choose_torch_dtype(cls, device: Optional[torch.device] = None) -> torch.dtype:
"""Return the precision to use for accelerated inference."""
device = device or cls.choose_torch_device()
config = get_config()
if device.type == "cuda" and torch.cuda.is_available():
device_name = torch.cuda.get_device_name(device)
if "GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name:
# These GPUs have limited support for float16
return cls._to_dtype("float32")
elif config.precision == "auto":
# Default to float16 for CUDA devices
return cls._to_dtype("float16")
else:
return "float16"
elif device.type == "mps":
return "float16"
return "float32"
# Use the user-defined precision
return cls._to_dtype(config.precision)
elif device.type == "mps" and torch.backends.mps.is_available():
if config.precision == "auto":
# Default to float16 for MPS devices
return cls._to_dtype("float16")
else:
# Use the user-defined precision
return cls._to_dtype(config.precision)
# CPU / safe fallback
return cls._to_dtype("float32")
# We are in transition here from using a single global AppConfig to allowing multiple
# configurations. It is strongly recommended to pass the app_config to this function.
def torch_dtype(
device: Optional[torch.device] = None,
app_config: Optional[InvokeAIAppConfig] = None,
) -> torch.dtype:
device = device or choose_torch_device()
precision = choose_precision(device, app_config)
if precision == "float16":
return torch.float16
if precision == "bfloat16":
return torch.bfloat16
else:
# "auto", "autocast", "float32"
return torch.float32
@classmethod
def get_torch_device_name(cls) -> str:
"""Return the device name for the current torch device."""
device = cls.choose_torch_device()
return torch.cuda.get_device_name(device) if device.type == "cuda" else device.type.upper()
def choose_autocast(precision):
"""Returns an autocast context or nullcontext for the given precision string"""
# float16 currently requires autocast to avoid errors like:
# 'expected scalar type Half but found Float'
if precision == "autocast" or precision == "float16":
return autocast
return nullcontext
def normalize_device(device: Union[str, torch.device]) -> torch.device:
"""Ensure device has a device index defined, if appropriate."""
device = torch.device(device)
if device.index is None:
# cuda might be the only torch backend that currently uses the device index?
# I don't see anything like `current_device` for cpu or mps.
if device.type == "cuda":
@classmethod
def normalize(cls, device: Union[str, torch.device]) -> torch.device:
"""Add the device index to CUDA devices."""
device = torch.device(device)
if device.index is None and device.type == "cuda" and torch.cuda.is_available():
device = torch.device(device.type, torch.cuda.current_device())
return device
return device
@classmethod
def empty_cache(cls) -> None:
"""Clear the GPU device cache."""
if torch.backends.mps.is_available():
torch.mps.empty_cache()
if torch.cuda.is_available():
torch.cuda.empty_cache()
@classmethod
def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype:
return NAME_TO_PRECISION[precision_name]

View File

@ -0,0 +1,53 @@
import torch
def to_standard_mask_dim(mask: torch.Tensor) -> torch.Tensor:
"""Standardize the dimensions of a mask tensor.
Args:
mask (torch.Tensor): A mask tensor. The shape can be (1, h, w) or (h, w).
Returns:
torch.Tensor: The output mask tensor. The shape is (1, h, w).
"""
# Get the mask height and width.
if mask.ndim == 2:
mask = mask.unsqueeze(0)
elif mask.ndim == 3 and mask.shape[0] == 1:
pass
else:
raise ValueError(f"Unsupported mask shape: {mask.shape}. Expected (1, h, w) or (h, w).")
return mask
def to_standard_float_mask(mask: torch.Tensor, out_dtype: torch.dtype) -> torch.Tensor:
"""Standardize the format of a mask tensor.
Args:
mask (torch.Tensor): A mask tensor. The dtype can be any bool, float, or int type. The shape must be (1, h, w)
or (h, w).
out_dtype (torch.dtype): The dtype of the output mask tensor. Must be a float type.
Returns:
torch.Tensor: The output mask tensor. The dtype is out_dtype. The shape is (1, h, w). All values are either 0.0
or 1.0.
"""
if not out_dtype.is_floating_point:
raise ValueError(f"out_dtype must be a float type, but got {out_dtype}")
mask = to_standard_mask_dim(mask)
mask = mask.to(out_dtype)
# Set masked regions to 1.0.
if mask.dtype == torch.bool:
mask = mask.to(out_dtype)
else:
mask = mask.to(out_dtype)
mask_region = mask > 0.5
mask[mask_region] = 1.0
mask[~mask_region] = 0.0
return mask

View File

@ -0,0 +1,98 @@
model:
target: sgm.models.diffusion.DiffusionEngine
params:
scale_factor: 0.13025
disable_first_stage_autocast: True
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
params:
num_idx: 1000
weighting_config:
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
network_config:
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
params:
adm_in_channels: 2816
num_classes: sequential
use_checkpoint: True
in_channels: 9
out_channels: 4
model_channels: 320
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
context_dim: 2048
spatial_transformer_attn_type: softmax-xformers
legacy: False
conditioner_config:
target: sgm.modules.GeneralConditioner
params:
emb_models:
# crossattn cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
params:
layer: hidden
layer_idx: 11
# crossattn and vector cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
params:
arch: ViT-bigG-14
version: laion2b_s39b_b160k
freeze: True
layer: penultimate
always_return_pooled: True
legacy: False
# vector cond
- is_trainable: False
input_key: original_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: crop_coords_top_left
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: target_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
first_stage_config:
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: vanilla-xformers
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity

View File

@ -10,6 +10,8 @@ module.exports = {
'path/no-relative-imports': ['error', { maxDepth: 0 }],
// https://github.com/edvardchen/eslint-plugin-i18next/blob/HEAD/docs/rules/no-literal-string.md
'i18next/no-literal-string': 'error',
// https://eslint.org/docs/latest/rules/no-console
'no-console': 'error',
},
overrides: [
/**

View File

@ -43,4 +43,5 @@ stats.html
yalc.lock
# vitest
tsconfig.vitest-temp.json
tsconfig.vitest-temp.json
coverage/

View File

@ -11,6 +11,7 @@ import { createStore } from '../src/app/store/store';
// @ts-ignore
import translationEN from '../public/locales/en.json';
import { ReduxInit } from './ReduxInit';
import { $store } from 'app/store/nanostores/store';
i18n.use(initReactI18next).init({
lng: 'en',
@ -25,6 +26,7 @@ i18n.use(initReactI18next).init({
});
const store = createStore(undefined, false);
$store.set(store);
$baseUrl.set('http://localhost:9090');
const preview: Preview = {

View File

@ -8,7 +8,7 @@
<meta http-equiv="Pragma" content="no-cache">
<meta http-equiv="Expires" content="0">
<title>Invoke - Community Edition</title>
<link rel="icon" type="icon" href="assets/images/invoke-favicon.svg" />
<link id="invoke-favicon" rel="icon" type="icon" href="assets/images/invoke-favicon.svg" />
<style>
html,
body {
@ -23,4 +23,4 @@
<script type="module" src="/src/main.tsx"></script>
</body>
</html>
</html>

View File

@ -1,6 +1,7 @@
import type { KnipConfig } from 'knip';
const config: KnipConfig = {
project: ['src/**/*.{ts,tsx}!'],
ignore: [
// This file is only used during debugging
'src/app/store/middleware/debugLoggerMiddleware.ts',
@ -10,6 +11,9 @@ const config: KnipConfig = {
'src/features/nodes/types/v2/**',
],
ignoreBinaries: ['only-allow'],
paths: {
'public/*': ['public/*'],
},
};
export default config;

View File

@ -24,7 +24,7 @@
"build": "pnpm run lint && vite build",
"typegen": "node scripts/typegen.js",
"preview": "vite preview",
"lint:knip": "knip --tags=-@knipignore",
"lint:knip": "knip",
"lint:dpdm": "dpdm --no-warning --no-tree --transform --exit-code circular:1 src/main.tsx",
"lint:eslint": "eslint --max-warnings=0 .",
"lint:prettier": "prettier --check .",
@ -35,6 +35,7 @@
"storybook": "storybook dev -p 6006",
"build-storybook": "storybook build",
"test": "vitest",
"test:ui": "vitest --coverage --ui",
"test:no-watch": "vitest --no-watch"
},
"madge": {
@ -52,57 +53,61 @@
},
"dependencies": {
"@chakra-ui/react-use-size": "^2.1.0",
"@dagrejs/graphlib": "^2.2.1",
"@dagrejs/dagre": "^1.1.2",
"@dagrejs/graphlib": "^2.2.2",
"@dnd-kit/core": "^6.1.0",
"@dnd-kit/sortable": "^8.0.0",
"@dnd-kit/utilities": "^3.2.2",
"@fontsource-variable/inter": "^5.0.17",
"@invoke-ai/ui-library": "^0.0.21",
"@fontsource-variable/inter": "^5.0.18",
"@invoke-ai/ui-library": "^0.0.25",
"@nanostores/react": "^0.7.2",
"@reduxjs/toolkit": "2.2.2",
"@reduxjs/toolkit": "2.2.3",
"@roarr/browser-log-writer": "^1.3.0",
"chakra-react-select": "^4.7.6",
"compare-versions": "^6.1.0",
"dateformat": "^5.0.3",
"framer-motion": "^11.0.22",
"i18next": "^23.10.1",
"i18next-http-backend": "^2.5.0",
"fracturedjsonjs": "^4.0.1",
"framer-motion": "^11.1.8",
"i18next": "^23.11.3",
"i18next-http-backend": "^2.5.1",
"idb-keyval": "^6.2.1",
"jsondiffpatch": "^0.6.0",
"konva": "^9.3.6",
"lodash-es": "^4.17.21",
"nanostores": "^0.10.0",
"nanostores": "^0.10.3",
"new-github-issue-url": "^1.0.0",
"overlayscrollbars": "^2.6.1",
"overlayscrollbars-react": "^0.5.5",
"overlayscrollbars": "^2.7.3",
"overlayscrollbars-react": "^0.5.6",
"query-string": "^9.0.0",
"react": "^18.2.0",
"react": "^18.3.1",
"react-colorful": "^5.6.1",
"react-dom": "^18.2.0",
"react-dom": "^18.3.1",
"react-dropzone": "^14.2.3",
"react-error-boundary": "^4.0.13",
"react-hook-form": "^7.51.2",
"react-hook-form": "^7.51.4",
"react-hotkeys-hook": "4.5.0",
"react-i18next": "^14.1.0",
"react-icons": "^5.0.1",
"react-i18next": "^14.1.1",
"react-icons": "^5.2.0",
"react-konva": "^18.2.10",
"react-redux": "9.1.0",
"react-resizable-panels": "^2.0.16",
"react-redux": "9.1.2",
"react-resizable-panels": "^2.0.19",
"react-select": "5.8.0",
"react-use": "^17.5.0",
"react-virtuoso": "^4.7.5",
"reactflow": "^11.10.4",
"react-virtuoso": "^4.7.10",
"reactflow": "^11.11.3",
"redux-dynamic-middlewares": "^2.2.0",
"redux-remember": "^5.1.0",
"redux-undo": "^1.1.0",
"rfdc": "^1.3.1",
"roarr": "^7.21.1",
"serialize-error": "^11.0.3",
"socket.io-client": "^4.7.5",
"use-debounce": "^10.0.0",
"use-device-pixel-ratio": "^1.1.2",
"use-image": "^1.1.1",
"uuid": "^9.0.1",
"zod": "^3.22.4",
"zod-validation-error": "^3.0.3"
"zod": "^3.23.6",
"zod-validation-error": "^3.2.0"
},
"peerDependencies": {
"@chakra-ui/react": "^2.8.2",
@ -113,40 +118,42 @@
"devDependencies": {
"@invoke-ai/eslint-config-react": "^0.0.14",
"@invoke-ai/prettier-config-react": "^0.0.7",
"@storybook/addon-essentials": "^8.0.4",
"@storybook/addon-interactions": "^8.0.4",
"@storybook/addon-links": "^8.0.4",
"@storybook/addon-storysource": "^8.0.4",
"@storybook/manager-api": "^8.0.4",
"@storybook/react": "^8.0.4",
"@storybook/react-vite": "^8.0.4",
"@storybook/theming": "^8.0.4",
"@storybook/addon-essentials": "^8.0.10",
"@storybook/addon-interactions": "^8.0.10",
"@storybook/addon-links": "^8.0.10",
"@storybook/addon-storysource": "^8.0.10",
"@storybook/manager-api": "^8.0.10",
"@storybook/react": "^8.0.10",
"@storybook/react-vite": "^8.0.10",
"@storybook/theming": "^8.0.10",
"@types/dateformat": "^5.0.2",
"@types/lodash-es": "^4.17.12",
"@types/node": "^20.11.30",
"@types/react": "^18.2.73",
"@types/react-dom": "^18.2.22",
"@types/node": "^20.12.10",
"@types/react": "^18.3.1",
"@types/react-dom": "^18.3.0",
"@types/uuid": "^9.0.8",
"@vitejs/plugin-react-swc": "^3.6.0",
"@vitest/coverage-v8": "^1.5.0",
"@vitest/ui": "^1.5.0",
"concurrently": "^8.2.2",
"dpdm": "^3.14.0",
"eslint": "^8.57.0",
"eslint-plugin-i18next": "^6.0.3",
"eslint-plugin-path": "^1.3.0",
"knip": "^5.6.1",
"knip": "^5.12.3",
"openapi-types": "^12.1.3",
"openapi-typescript": "^6.7.5",
"prettier": "^3.2.5",
"rollup-plugin-visualizer": "^5.12.0",
"storybook": "^8.0.4",
"storybook": "^8.0.10",
"ts-toolbelt": "^9.6.0",
"tsafe": "^1.6.6",
"typescript": "^5.4.3",
"vite": "^5.2.6",
"vite-plugin-css-injected-by-js": "^3.5.0",
"vite-plugin-dts": "^3.8.0",
"typescript": "^5.4.5",
"vite": "^5.2.11",
"vite-plugin-css-injected-by-js": "^3.5.1",
"vite-plugin-dts": "^3.9.1",
"vite-plugin-eslint": "^1.8.1",
"vite-tsconfig-paths": "^4.3.2",
"vitest": "^1.4.0"
"vitest": "^1.6.0"
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,5 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="16" height="16" rx="2" fill="#E6FD13"/>
<path d="M9.61889 5.45H12.5V3.5H3.5V5.45H6.38111L9.61889 10.55H12.5V12.5H3.5V10.55H6.38111" stroke="black"/>
<circle cx="12" cy="4" r="3" fill="#f5480c" stroke="#0d1117" stroke-width="1"/>
</svg>

After

Width:  |  Height:  |  Size: 345 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

View File

@ -76,7 +76,9 @@
"aboutHeading": "Nutzen Sie Ihre kreative Energie",
"toResolve": "Lösen",
"add": "Hinzufügen",
"loglevel": "Protokoll Stufe"
"loglevel": "Protokoll Stufe",
"selected": "Ausgewählt",
"beta": "Beta"
},
"gallery": {
"galleryImageSize": "Bildgröße",
@ -85,7 +87,8 @@
"loadMore": "Mehr laden",
"noImagesInGallery": "Keine Bilder in der Galerie",
"loading": "Lade",
"deleteImage": "Lösche Bild",
"deleteImage_one": "Lösche Bild",
"deleteImage_other": "Lösche {{count}} Bilder",
"copy": "Kopieren",
"download": "Runterladen",
"setCurrentImage": "Setze aktuelle Bild",
@ -396,7 +399,14 @@
"cancel": "Stornieren",
"defaultSettingsSaved": "Standardeinstellungen gespeichert",
"addModels": "Model hinzufügen",
"deleteModelImage": "Lösche Model Bild"
"deleteModelImage": "Lösche Model Bild",
"hfTokenInvalidErrorMessage": "Falscher oder fehlender HuggingFace Schlüssel.",
"huggingFaceRepoID": "HuggingFace Repo ID",
"hfToken": "HuggingFace Schlüssel",
"hfTokenInvalid": "Falscher oder fehlender HF Schlüssel",
"huggingFacePlaceholder": "besitzer/model-name",
"hfTokenSaved": "HF Schlüssel gespeichert",
"hfTokenUnableToVerify": "Konnte den HF Schlüssel nicht validieren"
},
"parameters": {
"images": "Bilder",
@ -685,7 +695,11 @@
"hands": "Hände",
"dwOpenpose": "DW Openpose",
"dwOpenposeDescription": "Posenschätzung mit DW Openpose",
"selectCLIPVisionModel": "Wähle ein CLIP Vision Model aus"
"selectCLIPVisionModel": "Wähle ein CLIP Vision Model aus",
"ipAdapterMethod": "Methode",
"composition": "Nur Komposition",
"full": "Voll",
"style": "Nur Style"
},
"queue": {
"status": "Status",
@ -716,7 +730,6 @@
"resume": "Wieder aufnehmen",
"item": "Auftrag",
"notReady": "Warteschlange noch nicht bereit",
"queueCountPrediction": "{{promptsCount}} Prompts × {{iterations}} Iterationen -> {{count}} Generationen",
"clearQueueAlertDialog": "\"Die Warteschlange leeren\" stoppt den aktuellen Prozess und leert die Warteschlange komplett.",
"completedIn": "Fertig in",
"cancelBatchSucceeded": "Stapel abgebrochen",

View File

@ -69,6 +69,7 @@
"auto": "Auto",
"back": "Back",
"batch": "Batch Manager",
"beta": "Beta",
"cancel": "Cancel",
"copy": "Copy",
"copyError": "$t(gallery.copy) Error",
@ -83,13 +84,17 @@
"direction": "Direction",
"ipAdapter": "IP Adapter",
"t2iAdapter": "T2I Adapter",
"positivePrompt": "Positive Prompt",
"negativePrompt": "Negative Prompt",
"discordLabel": "Discord",
"dontAskMeAgain": "Don't ask me again",
"editor": "Editor",
"error": "Error",
"file": "File",
"folder": "Folder",
"format": "format",
"githubLabel": "Github",
"goTo": "Go to",
"hotkeysLabel": "Hotkeys",
"imageFailedToLoad": "Unable to Load Image",
"img2img": "Image To Image",
@ -135,7 +140,13 @@
"red": "Red",
"green": "Green",
"blue": "Blue",
"alpha": "Alpha"
"alpha": "Alpha",
"selected": "Selected",
"tab": "Tab",
"viewing": "Viewing",
"viewingDesc": "Review images in a large gallery view",
"editing": "Editing",
"editingDesc": "Edit on the Control Layers canvas"
},
"controlnet": {
"controlAdapter_one": "Control Adapter",
@ -151,6 +162,7 @@
"balanced": "Balanced",
"base": "Base",
"beginEndStepPercent": "Begin / End Step Percentage",
"beginEndStepPercentShort": "Begin/End %",
"bgth": "bg_th",
"canny": "Canny",
"cannyDescription": "Canny edge detection",
@ -213,12 +225,17 @@
"resize": "Resize",
"resizeSimple": "Resize (Simple)",
"resizeMode": "Resize Mode",
"ipAdapterMethod": "Method",
"full": "Full",
"style": "Style Only",
"composition": "Composition Only",
"safe": "Safe",
"saveControlImage": "Save Control Image",
"scribble": "scribble",
"scribble": "Scribble",
"selectModel": "Select a model",
"selectCLIPVisionModel": "Select a CLIP Vision model",
"setControlImageDimensions": "Set Control Image Dimensions To W/H",
"setControlImageDimensions": "Copy size to W/H (optimize for model)",
"setControlImageDimensionsForce": "Copy size to W/H (ignore model)",
"showAdvanced": "Show Advanced",
"small": "Small",
"toggleControlNet": "Toggle this ControlNet",
@ -244,7 +261,6 @@
"queue": "Queue",
"queueFront": "Add to Front of Queue",
"queueBack": "Add to Queue",
"queueCountPrediction": "{{promptsCount}} prompts \u00d7 {{iterations}} iterations -> {{count}} generations",
"queueEmpty": "Queue Empty",
"enqueueing": "Queueing Batch",
"resume": "Resume",
@ -297,7 +313,13 @@
"batchFailedToQueue": "Failed to Queue Batch",
"graphQueued": "Graph queued",
"graphFailedToQueue": "Failed to queue graph",
"openQueue": "Open Queue"
"openQueue": "Open Queue",
"prompts_one": "Prompt",
"prompts_other": "Prompts",
"iterations_one": "Iteration",
"iterations_other": "Iterations",
"generations_one": "Generation",
"generations_other": "Generations"
},
"invocationCache": {
"invocationCache": "Invocation Cache",
@ -326,7 +348,8 @@
"drop": "Drop",
"dropOrUpload": "$t(gallery.drop) or Upload",
"dropToUpload": "$t(gallery.drop) to Upload",
"deleteImage": "Delete Image",
"deleteImage_one": "Delete Image",
"deleteImage_other": "Delete {{count}} Images",
"deleteImageBin": "Deleted images will be sent to your operating system's Bin.",
"deleteImagePermanent": "Deleted images cannot be restored.",
"download": "Download",
@ -572,6 +595,10 @@
"upscale": {
"desc": "Upscale the current image",
"title": "Upscale"
},
"toggleViewer": {
"desc": "Switches between the Image Viewer and workspace for the current tab.",
"title": "Toggle Image Viewer"
}
},
"metadata": {
@ -747,6 +774,7 @@
"cannotConnectOutputToOutput": "Cannot connect output to output",
"cannotConnectToSelf": "Cannot connect to self",
"cannotDuplicateConnection": "Cannot create duplicate connections",
"cannotMixAndMatchCollectionItemTypes": "Cannot mix and match collection item types",
"nodePack": "Node pack",
"collection": "Collection",
"collectionFieldType": "{{name}} Collection",
@ -769,6 +797,8 @@
"float": "Float",
"fullyContainNodes": "Fully Contain Nodes to Select",
"fullyContainNodesHelp": "Nodes must be fully inside the selection box to be selected",
"showEdgeLabels": "Show Edge Labels",
"showEdgeLabelsHelp": "Show labels on edges, indicating the connected nodes",
"hideLegendNodes": "Hide Field Type Legend",
"hideMinimapnodes": "Hide MiniMap",
"inputMayOnlyHaveOneConnection": "Input may only have one connection",
@ -849,6 +879,7 @@
"version": "Version",
"versionUnknown": " Version Unknown",
"workflow": "Workflow",
"graph": "Graph",
"workflowAuthor": "Author",
"workflowContact": "Contact",
"workflowDescription": "Short Description",
@ -884,6 +915,7 @@
"denoisingStrength": "Denoising Strength",
"downloadImage": "Download Image",
"general": "General",
"globalSettings": "Global Settings",
"height": "Height",
"imageFit": "Fit Initial Image To Output Size",
"images": "Images",
@ -901,23 +933,37 @@
"missingInputForField": "{{nodeLabel}} -> {{fieldLabel}} missing input",
"missingNodeTemplate": "Missing node template",
"noControlImageForControlAdapter": "Control Adapter #{{number}} has no control image",
"imageNotProcessedForControlAdapter": "Control Adapter #{{number}}'s image is not processed",
"noInitialImageSelected": "No initial image selected",
"noModelForControlAdapter": "Control Adapter #{{number}} has no model selected.",
"incompatibleBaseModelForControlAdapter": "Control Adapter #{{number}} model is incompatible with main model.",
"noModelSelected": "No model selected",
"noPrompts": "No prompts generated",
"noNodesInGraph": "No nodes in graph",
"systemDisconnected": "System disconnected"
"systemDisconnected": "System disconnected",
"layer": {
"initialImageNoImageSelected": "no initial image selected",
"controlAdapterNoModelSelected": "no Control Adapter model selected",
"controlAdapterIncompatibleBaseModel": "incompatible Control Adapter base model",
"controlAdapterNoImageSelected": "no Control Adapter image selected",
"controlAdapterImageNotProcessed": "Control Adapter image not processed",
"t2iAdapterIncompatibleDimensions": "T2I Adapter requires image dimension to be multiples of 64",
"ipAdapterNoModelSelected": "no IP adapter selected",
"ipAdapterIncompatibleBaseModel": "incompatible IP Adapter base model",
"ipAdapterNoImageSelected": "no IP Adapter image selected",
"rgNoPromptsOrIPAdapters": "no text prompts or IP Adapters",
"rgNoRegion": "no region selected"
}
},
"maskBlur": "Mask Blur",
"negativePromptPlaceholder": "Negative Prompt",
"globalNegativePromptPlaceholder": "Global Negative Prompt",
"noiseThreshold": "Noise Threshold",
"patchmatchDownScaleSize": "Downscale",
"perlinNoise": "Perlin Noise",
"positivePromptPlaceholder": "Positive Prompt",
"globalPositivePromptPlaceholder": "Global Positive Prompt",
"iterations": "Iterations",
"iterationsWithCount_one": "{{count}} Iteration",
"iterationsWithCount_other": "{{count}} Iterations",
"scale": "Scale",
"scaleBeforeProcessing": "Scale Before Processing",
"scaledHeight": "Scaled H",
@ -1174,6 +1220,10 @@
"heading": "Resize Mode",
"paragraphs": ["Method to fit Control Adapter's input image size to the output generation size."]
},
"ipAdapterMethod": {
"heading": "Method",
"paragraphs": ["Method by which to apply the current IP Adapter."]
},
"controlNetWeight": {
"heading": "Weight",
"paragraphs": [
@ -1423,6 +1473,7 @@
"eraseBoundingBox": "Erase Bounding Box",
"eraser": "Eraser",
"fillBoundingBox": "Fill Bounding Box",
"hideBoundingBox": "Hide Bounding Box",
"initialFitImageSize": "Fit Image Size on Drop",
"invertBrushSizeScrollDirection": "Invert Scroll for Brush Size",
"layer": "Layer",
@ -1440,6 +1491,7 @@
"saveMask": "Save $t(unifiedCanvas.mask)",
"saveToGallery": "Save To Gallery",
"scaledBoundingBox": "Scaled Bounding Box",
"showBoundingBox": "Show Bounding Box",
"showCanvasDebugInfo": "Show Additional Canvas Info",
"showGrid": "Show Grid",
"showResultsOn": "Show Results (On)",
@ -1482,9 +1534,63 @@
"workflowName": "Workflow Name",
"newWorkflowCreated": "New Workflow Created",
"workflowCleared": "Workflow Cleared",
"workflowEditorMenu": "Workflow Editor Menu"
"workflowEditorMenu": "Workflow Editor Menu",
"loadFromGraph": "Load Workflow from Graph",
"convertGraph": "Convert Graph",
"loadWorkflow": "$t(common.load) Workflow",
"autoLayout": "Auto Layout"
},
"app": {
"storeNotInitialized": "Store is not initialized"
},
"controlLayers": {
"deleteAll": "Delete All",
"addLayer": "Add Layer",
"moveToFront": "Move to Front",
"moveToBack": "Move to Back",
"moveForward": "Move Forward",
"moveBackward": "Move Backward",
"brushSize": "Brush Size",
"controlLayers": "Control Layers",
"globalMaskOpacity": "Global Mask Opacity",
"autoNegative": "Auto Negative",
"toggleVisibility": "Toggle Layer Visibility",
"deletePrompt": "Delete Prompt",
"resetRegion": "Reset Region",
"debugLayers": "Debug Layers",
"rectangle": "Rectangle",
"maskPreviewColor": "Mask Preview Color",
"addPositivePrompt": "Add $t(common.positivePrompt)",
"addNegativePrompt": "Add $t(common.negativePrompt)",
"addIPAdapter": "Add $t(common.ipAdapter)",
"regionalGuidance": "Regional Guidance",
"regionalGuidanceLayer": "$t(controlLayers.regionalGuidance) $t(unifiedCanvas.layer)",
"opacity": "Opacity",
"globalControlAdapter": "Global $t(controlnet.controlAdapter_one)",
"globalControlAdapterLayer": "Global $t(controlnet.controlAdapter_one) $t(unifiedCanvas.layer)",
"globalIPAdapter": "Global $t(common.ipAdapter)",
"globalIPAdapterLayer": "Global $t(common.ipAdapter) $t(unifiedCanvas.layer)",
"globalInitialImage": "Global Initial Image",
"globalInitialImageLayer": "$t(controlLayers.globalInitialImage) $t(unifiedCanvas.layer)",
"opacityFilter": "Opacity Filter",
"clearProcessor": "Clear Processor",
"resetProcessor": "Reset Processor to Defaults",
"noLayersAdded": "No Layers Added",
"layers_one": "Layer",
"layers_other": "Layers"
},
"ui": {
"tabs": {
"generation": "Generation",
"generationTab": "$t(ui.tabs.generation) $t(common.tab)",
"canvas": "Canvas",
"canvasTab": "$t(ui.tabs.canvas) $t(common.tab)",
"workflows": "Workflows",
"workflowsTab": "$t(ui.tabs.workflows) $t(common.tab)",
"models": "Models",
"modelsTab": "$t(ui.tabs.models) $t(common.tab)",
"queue": "Queue",
"queueTab": "$t(ui.tabs.queue) $t(common.tab)"
}
}
}

View File

@ -25,7 +25,24 @@
"areYouSure": "¿Estas seguro?",
"batch": "Administrador de lotes",
"modelManager": "Administrador de modelos",
"communityLabel": "Comunidad"
"communityLabel": "Comunidad",
"direction": "Dirección",
"ai": "Ia",
"add": "Añadir",
"auto": "Automático",
"copyError": "Error $t(gallery.copy)",
"details": "Detalles",
"or": "o",
"checkpoint": "Punto de control",
"controlNet": "ControlNet",
"aboutHeading": "Sea dueño de su poder creativo",
"advanced": "Avanzado",
"data": "Fecha",
"delete": "Borrar",
"copy": "Copiar",
"beta": "Beta",
"on": "En",
"aboutDesc": "¿Utilizas Invoke para trabajar? Mira aquí:"
},
"gallery": {
"galleryImageSize": "Tamaño de la imagen",
@ -33,7 +50,9 @@
"autoSwitchNewImages": "Auto seleccionar Imágenes nuevas",
"loadMore": "Cargar más",
"noImagesInGallery": "No hay imágenes para mostrar",
"deleteImage": "Eliminar Imagen",
"deleteImage_one": "Eliminar Imagen",
"deleteImage_many": "",
"deleteImage_other": "",
"deleteImageBin": "Las imágenes eliminadas se enviarán a la papelera de tu sistema operativo.",
"deleteImagePermanent": "Las imágenes eliminadas no se pueden restaurar.",
"assets": "Activos",
@ -441,7 +460,13 @@
"previousImage": "Imagen anterior",
"nextImage": "Siguiente imagen",
"showOptionsPanel": "Mostrar el panel lateral",
"menu": "Menú"
"menu": "Menú",
"showGalleryPanel": "Mostrar panel de galería",
"loadMore": "Cargar más",
"about": "Acerca de",
"createIssue": "Crear un problema",
"resetUI": "Interfaz de usuario $t(accessibility.reset)",
"mode": "Modo"
},
"nodes": {
"zoomInNodes": "Acercar",
@ -454,5 +479,68 @@
"reloadNodeTemplates": "Recargar las plantillas de nodos",
"loadWorkflow": "Cargar el flujo de trabajo",
"downloadWorkflow": "Descargar el flujo de trabajo en un archivo JSON"
},
"boards": {
"autoAddBoard": "Agregar panel automáticamente",
"changeBoard": "Cambiar el panel",
"clearSearch": "Borrar la búsqueda",
"deleteBoard": "Borrar el panel",
"selectBoard": "Seleccionar un panel",
"uncategorized": "Sin categoría",
"cancel": "Cancelar",
"addBoard": "Agregar un panel",
"movingImagesToBoard_one": "Moviendo {{count}} imagen al panel:",
"movingImagesToBoard_many": "Moviendo {{count}} imágenes al panel:",
"movingImagesToBoard_other": "Moviendo {{count}} imágenes al panel:",
"bottomMessage": "Al eliminar este panel y las imágenes que contiene, se restablecerán las funciones que los estén utilizando actualmente.",
"deleteBoardAndImages": "Borrar el panel y las imágenes",
"loading": "Cargando...",
"deletedBoardsCannotbeRestored": "Los paneles eliminados no se pueden restaurar",
"move": "Mover",
"menuItemAutoAdd": "Agregar automáticamente a este panel",
"searchBoard": "Buscando paneles…",
"topMessage": "Este panel contiene imágenes utilizadas en las siguientes funciones:",
"downloadBoard": "Descargar panel",
"deleteBoardOnly": "Borrar solo el panel",
"myBoard": "Mi panel",
"noMatching": "No hay paneles que coincidan"
},
"accordions": {
"compositing": {
"title": "Composición",
"infillTab": "Relleno"
},
"generation": {
"title": "Generación"
},
"image": {
"title": "Imagen"
},
"control": {
"title": "Control"
},
"advanced": {
"options": "$t(accordions.advanced.title) opciones",
"title": "Avanzado"
}
},
"ui": {
"tabs": {
"generationTab": "$t(ui.tabs.generation) $t(common.tab)",
"canvas": "Lienzo",
"generation": "Generación",
"queue": "Cola",
"queueTab": "$t(ui.tabs.queue) $t(common.tab)",
"workflows": "Flujos de trabajo",
"models": "Modelos",
"modelsTab": "$t(ui.tabs.models) $t(common.tab)",
"canvasTab": "$t(ui.tabs.canvas) $t(common.tab)",
"workflowsTab": "$t(ui.tabs.workflows) $t(common.tab)"
}
},
"controlLayers": {
"layers_one": "Capa",
"layers_many": "Capas",
"layers_other": "Capas"
}
}

View File

@ -5,7 +5,7 @@
"reportBugLabel": "Segnala un errore",
"settingsLabel": "Impostazioni",
"img2img": "Immagine a Immagine",
"unifiedCanvas": "Tela unificata",
"unifiedCanvas": "Tela",
"nodes": "Flussi di lavoro",
"upload": "Caricamento",
"load": "Carica",
@ -74,7 +74,18 @@
"file": "File",
"toResolve": "Da risolvere",
"add": "Aggiungi",
"loglevel": "Livello di log"
"loglevel": "Livello di log",
"beta": "Beta",
"positivePrompt": "Prompt positivo",
"negativePrompt": "Prompt negativo",
"selected": "Selezionato",
"goTo": "Vai a",
"editor": "Editor",
"tab": "Scheda",
"viewing": "Visualizza",
"viewingDesc": "Rivedi le immagini in un'ampia vista della galleria",
"editing": "Modifica",
"editingDesc": "Modifica nell'area Livelli di controllo"
},
"gallery": {
"galleryImageSize": "Dimensione dell'immagine",
@ -82,7 +93,9 @@
"autoSwitchNewImages": "Passaggio automatico a nuove immagini",
"loadMore": "Carica altro",
"noImagesInGallery": "Nessuna immagine da visualizzare",
"deleteImage": "Elimina l'immagine",
"deleteImage_one": "Elimina l'immagine",
"deleteImage_many": "Elimina {{count}} immagini",
"deleteImage_other": "Elimina {{count}} immagini",
"deleteImagePermanent": "Le immagini eliminate non possono essere ripristinate.",
"deleteImageBin": "Le immagini eliminate verranno spostate nel cestino del tuo sistema operativo.",
"assets": "Risorse",
@ -178,8 +191,8 @@
"desc": "Mostra le informazioni sui metadati dell'immagine corrente"
},
"sendToImageToImage": {
"title": "Invia a Immagine a Immagine",
"desc": "Invia l'immagine corrente a da Immagine a Immagine"
"title": "Invia a Generazione da immagine",
"desc": "Invia l'immagine corrente a Generazione da immagine"
},
"deleteImage": {
"title": "Elimina immagine",
@ -332,6 +345,10 @@
"remixImage": {
"desc": "Utilizza tutti i parametri tranne il seme dell'immagine corrente",
"title": "Remixa l'immagine"
},
"toggleViewer": {
"title": "Attiva/disattiva il visualizzatore di immagini",
"desc": "Passa dal Visualizzatore immagini all'area di lavoro per la scheda corrente."
}
},
"modelManager": {
@ -444,7 +461,8 @@
"hfTokenInvalidErrorMessage2": "Aggiornalo in ",
"main": "Principali",
"noModelsInstalledDesc1": "Installa i modelli con",
"ipAdapters": "Adattatori IP"
"ipAdapters": "Adattatori IP",
"noMatchingModels": "Nessun modello corrispondente"
},
"parameters": {
"images": "Immagini",
@ -468,8 +486,8 @@
"scaledHeight": "Altezza ridimensionata",
"infillMethod": "Metodo di riempimento",
"tileSize": "Dimensione piastrella",
"sendToImg2Img": "Invia a Immagine a Immagine",
"sendToUnifiedCanvas": "Invia a Tela Unificata",
"sendToImg2Img": "Invia a Generazione da immagine",
"sendToUnifiedCanvas": "Invia alla Tela",
"downloadImage": "Scarica l'immagine",
"usePrompt": "Usa Prompt",
"useSeed": "Usa Seme",
@ -505,13 +523,11 @@
"incompatibleBaseModelForControlAdapter": "Il modello dell'adattatore di controllo #{{number}} non è compatibile con il modello principale.",
"missingNodeTemplate": "Modello di nodo mancante",
"missingInputForField": "{{nodeLabel}} -> {{fieldLabel}} ingresso mancante",
"missingFieldTemplate": "Modello di campo mancante"
"missingFieldTemplate": "Modello di campo mancante",
"imageNotProcessedForControlAdapter": "L'immagine dell'adattatore di controllo #{{number}} non è stata elaborata"
},
"useCpuNoise": "Usa la CPU per generare rumore",
"iterations": "Iterazioni",
"iterationsWithCount_one": "{{count}} Iterazione",
"iterationsWithCount_many": "{{count}} Iterazioni",
"iterationsWithCount_other": "{{count}} Iterazioni",
"isAllowedToUpscale": {
"useX2Model": "L'immagine è troppo grande per l'ampliamento con il modello x4, utilizza il modello x2",
"tooLarge": "L'immagine è troppo grande per l'ampliamento, seleziona un'immagine più piccola"
@ -526,7 +542,15 @@
"aspect": "Aspetto",
"setToOptimalSizeTooLarge": "$t(parameters.setToOptimalSize) (potrebbe essere troppo grande)",
"remixImage": "Remixa l'immagine",
"coherenceEdgeSize": "Dim. bordo"
"coherenceEdgeSize": "Dim. bordo",
"infillMosaicTileWidth": "Larghezza piastrella",
"infillMosaicMinColor": "Colore minimo",
"infillMosaicMaxColor": "Colore massimo",
"infillMosaicTileHeight": "Altezza piastrella",
"infillColorValue": "Colore di riempimento",
"globalSettings": "Impostazioni globali",
"globalPositivePromptPlaceholder": "Prompt positivo globale",
"globalNegativePromptPlaceholder": "Prompt negativo globale"
},
"settings": {
"models": "Modelli",
@ -551,7 +575,7 @@
"intermediatesCleared_one": "Cancellata {{count}} immagine intermedia",
"intermediatesCleared_many": "Cancellate {{count}} immagini intermedie",
"intermediatesCleared_other": "Cancellate {{count}} immagini intermedie",
"clearIntermediatesDesc1": "La cancellazione delle immagini intermedie ripristinerà lo stato di Tela Unificata e ControlNet.",
"clearIntermediatesDesc1": "La cancellazione delle immagini intermedie ripristinerà lo stato della Tela e degli Adattatori di Controllo.",
"intermediatesClearedFailed": "Problema con la cancellazione delle immagini intermedie",
"clearIntermediatesWithCount_one": "Cancella {{count}} immagine intermedia",
"clearIntermediatesWithCount_many": "Cancella {{count}} immagini intermedie",
@ -567,8 +591,8 @@
"imageCopied": "Immagine copiata",
"imageNotLoadedDesc": "Impossibile trovare l'immagine",
"canvasMerged": "Tela unita",
"sentToImageToImage": "Inviato a Immagine a Immagine",
"sentToUnifiedCanvas": "Inviato a Tela Unificata",
"sentToImageToImage": "Inviato a Generazione da immagine",
"sentToUnifiedCanvas": "Inviato alla Tela",
"parametersNotSet": "Parametri non impostati",
"metadataLoadFailed": "Impossibile caricare i metadati",
"serverError": "Errore del Server",
@ -620,7 +644,8 @@
"uploadInitialImage": "Carica l'immagine iniziale",
"problemDownloadingImage": "Impossibile scaricare l'immagine",
"prunedQueue": "Coda ripulita",
"modelImportCanceled": "Importazione del modello annullata"
"modelImportCanceled": "Importazione del modello annullata",
"parameters": "Parametri"
},
"tooltip": {
"feature": {
@ -689,7 +714,10 @@
"coherenceModeBoxBlur": "Sfocatura Box",
"coherenceModeStaged": "Maschera espansa",
"invertBrushSizeScrollDirection": "Inverti scorrimento per dimensione pennello",
"discardCurrent": "Scarta l'attuale"
"discardCurrent": "Scarta l'attuale",
"initialFitImageSize": "Adatta dimensione immagine al rilascio",
"hideBoundingBox": "Nascondi il rettangolo di selezione",
"showBoundingBox": "Mostra il rettangolo di selezione"
},
"accessibility": {
"invokeProgressBar": "Barra di avanzamento generazione",
@ -783,7 +811,7 @@
"float": "In virgola mobile",
"currentImageDescription": "Visualizza l'immagine corrente nell'editor dei nodi",
"fieldTypesMustMatch": "I tipi di campo devono corrispondere",
"edge": "Bordo",
"edge": "Collegamento",
"currentImage": "Immagine corrente",
"integer": "Numero Intero",
"inputMayOnlyHaveOneConnection": "L'ingresso può avere solo una connessione",
@ -832,7 +860,10 @@
"editMode": "Modifica nell'editor del flusso di lavoro",
"resetToDefaultValue": "Ripristina il valore predefinito",
"noFieldsViewMode": "Questo flusso di lavoro non ha campi selezionati da visualizzare. Visualizza il flusso di lavoro completo per configurare i valori.",
"edit": "Modifica"
"edit": "Modifica",
"graph": "Grafico",
"showEdgeLabelsHelp": "Mostra etichette sui collegamenti, che indicano i nodi collegati",
"showEdgeLabels": "Mostra le etichette del collegamento"
},
"boards": {
"autoAddBoard": "Aggiungi automaticamente bacheca",
@ -909,7 +940,7 @@
"colorMapTileSize": "Dimensione piastrella",
"mediapipeFaceDescription": "Rilevamento dei volti tramite Mediapipe",
"hedDescription": "Rilevamento dei bordi nidificati olisticamente",
"setControlImageDimensions": "Imposta le dimensioni dell'immagine di controllo su L/A",
"setControlImageDimensions": "Copia le dimensioni in L/A (ottimizza per il modello)",
"maxFaces": "Numero massimo di volti",
"addT2IAdapter": "Aggiungi $t(common.t2iAdapter)",
"addControlNet": "Aggiungi $t(common.controlNet)",
@ -938,12 +969,17 @@
"mediapipeFace": "Mediapipe Volto",
"ip_adapter": "$t(controlnet.controlAdapter_one) #{{number}} ($t(common.ipAdapter))",
"t2i_adapter": "$t(controlnet.controlAdapter_one) #{{number}} ($t(common.t2iAdapter))",
"selectCLIPVisionModel": "Seleziona un modello CLIP Vision"
"selectCLIPVisionModel": "Seleziona un modello CLIP Vision",
"ipAdapterMethod": "Metodo",
"full": "Completo",
"composition": "Solo la composizione",
"style": "Solo lo stile",
"beginEndStepPercentShort": "Inizio/Fine %",
"setControlImageDimensionsForce": "Copia le dimensioni in L/A (ignora il modello)"
},
"queue": {
"queueFront": "Aggiungi all'inizio della coda",
"queueBack": "Aggiungi alla coda",
"queueCountPrediction": "{{promptsCount}} prompt × {{iterations}} iterazioni -> {{count}} generazioni",
"queue": "Coda",
"status": "Stato",
"pruneSucceeded": "Rimossi {{item_count}} elementi completati dalla coda",
@ -980,7 +1016,7 @@
"cancelBatchSucceeded": "Lotto annullato",
"clearTooltip": "Annulla e cancella tutti gli elementi",
"current": "Attuale",
"pauseTooltip": "Sospende l'elaborazione",
"pauseTooltip": "Sospendi l'elaborazione",
"failed": "Falliti",
"cancelItem": "Annulla l'elemento",
"next": "Prossimo",
@ -1346,13 +1382,13 @@
]
},
"seamlessTilingXAxis": {
"heading": "Asse X di piastrellatura senza cuciture",
"heading": "Piastrella senza giunte sull'asse X",
"paragraphs": [
"Affianca senza soluzione di continuità un'immagine lungo l'asse orizzontale."
]
},
"seamlessTilingYAxis": {
"heading": "Asse Y di piastrellatura senza cuciture",
"heading": "Piastrella senza giunte sull'asse Y",
"paragraphs": [
"Affianca senza soluzione di continuità un'immagine lungo l'asse verticale."
]
@ -1381,6 +1417,12 @@
"paragraphs": [
"La dimensione del bordo del passaggio di coerenza."
]
},
"ipAdapterMethod": {
"heading": "Metodo",
"paragraphs": [
"Metodo con cui applicare l'adattatore IP corrente."
]
}
},
"sdxl": {
@ -1476,7 +1518,11 @@
"name": "Nome",
"updated": "Aggiornato",
"projectWorkflows": "Flussi di lavoro del progetto",
"opened": "Aperto"
"opened": "Aperto",
"convertGraph": "Converti grafico",
"loadWorkflow": "$t(common.load) Flusso di lavoro",
"autoLayout": "Disposizione automatica",
"loadFromGraph": "Carica il flusso di lavoro dal grafico"
},
"app": {
"storeNotInitialized": "Il negozio non è inizializzato"
@ -1505,5 +1551,56 @@
"compatibleEmbeddings": "Incorporamenti compatibili",
"addPromptTrigger": "Aggiungi Trigger nel prompt",
"noMatchingTriggers": "Nessun Trigger corrispondente"
},
"controlLayers": {
"opacityFilter": "Filtro opacità",
"deleteAll": "Cancella tutto",
"addLayer": "Aggiungi Livello",
"moveToFront": "Sposta in primo piano",
"moveToBack": "Sposta in fondo",
"moveForward": "Sposta avanti",
"moveBackward": "Sposta indietro",
"brushSize": "Dimensioni del pennello",
"globalMaskOpacity": "Opacità globale della maschera",
"autoNegative": "Auto Negativo",
"toggleVisibility": "Attiva/disattiva la visibilità dei livelli",
"deletePrompt": "Cancella il prompt",
"debugLayers": "Debug dei Livelli",
"rectangle": "Rettangolo",
"maskPreviewColor": "Colore anteprima maschera",
"addPositivePrompt": "Aggiungi $t(common.positivePrompt)",
"addNegativePrompt": "Aggiungi $t(common.negativePrompt)",
"addIPAdapter": "Aggiungi $t(common.ipAdapter)",
"regionalGuidance": "Guida regionale",
"regionalGuidanceLayer": "$t(unifiedCanvas.layer) $t(controlLayers.regionalGuidance)",
"opacity": "Opacità",
"globalControlAdapter": "$t(controlnet.controlAdapter_one) Globale",
"globalControlAdapterLayer": "$t(controlnet.controlAdapter_one) - $t(unifiedCanvas.layer) Globale",
"globalIPAdapter": "$t(common.ipAdapter) Globale",
"globalIPAdapterLayer": "$t(common.ipAdapter) - $t(unifiedCanvas.layer) Globale",
"globalInitialImage": "Immagine iniziale",
"globalInitialImageLayer": "$t(controlLayers.globalInitialImage) - $t(unifiedCanvas.layer) Globale",
"clearProcessor": "Cancella processore",
"resetProcessor": "Ripristina il processore alle impostazioni predefinite",
"noLayersAdded": "Nessun livello aggiunto",
"resetRegion": "Reimposta la regione",
"controlLayers": "Livelli di controllo",
"layers_one": "Livello",
"layers_many": "Livelli",
"layers_other": "Livelli"
},
"ui": {
"tabs": {
"generation": "Generazione",
"generationTab": "$t(ui.tabs.generation) $t(common.tab)",
"canvas": "Tela",
"canvasTab": "$t(ui.tabs.canvas) $t(common.tab)",
"workflows": "Flussi di lavoro",
"workflowsTab": "$t(ui.tabs.workflows) $t(common.tab)",
"models": "Modelli",
"modelsTab": "$t(ui.tabs.models) $t(common.tab)",
"queue": "Coda",
"queueTab": "$t(ui.tabs.queue) $t(common.tab)"
}
}
}

View File

@ -90,7 +90,7 @@
"problemDeletingImages": "画像の削除中に問題が発生",
"drop": "ドロップ",
"dropOrUpload": "$t(gallery.drop) またはアップロード",
"deleteImage": "画像を削除",
"deleteImage_other": "画像を削除",
"deleteImageBin": "削除された画像はOSのゴミ箱に送られます。",
"deleteImagePermanent": "削除された画像は復元できません。",
"download": "ダウンロード",
@ -570,7 +570,6 @@
"pauseSucceeded": "処理が一時停止されました",
"queueFront": "キューの先頭へ追加",
"queueBack": "キューに追加",
"queueCountPrediction": "{{promptsCount}} プロンプト × {{iterations}} イテレーション -> {{count}} 枚生成",
"pause": "一時停止",
"queue": "キュー",
"pauseTooltip": "処理を一時停止",

View File

@ -82,7 +82,7 @@
"drop": "드랍",
"problemDeletingImages": "이미지 삭제 중 발생한 문제",
"downloadSelection": "선택 항목 다운로드",
"deleteImage": "이미지 삭제",
"deleteImage_other": "이미지 삭제",
"currentlyInUse": "이 이미지는 현재 다음 기능에서 사용되고 있습니다:",
"dropOrUpload": "$t(gallery.drop) 또는 업로드",
"copy": "복사",
@ -505,7 +505,6 @@
"completed": "완성된",
"queueBack": "Queue에 추가",
"cancelFailed": "항목 취소 중 발생한 문제",
"queueCountPrediction": "Queue에 {{predicted}} 추가",
"batchQueued": "Batch Queued",
"pauseFailed": "프로세서 중지 중 발생한 문제",
"clearFailed": "Queue 제거 중 발생한 문제",

View File

@ -42,7 +42,8 @@
"autoSwitchNewImages": "Wissel autom. naar nieuwe afbeeldingen",
"loadMore": "Laad meer",
"noImagesInGallery": "Geen afbeeldingen om te tonen",
"deleteImage": "Verwijder afbeelding",
"deleteImage_one": "Verwijder afbeelding",
"deleteImage_other": "",
"deleteImageBin": "Verwijderde afbeeldingen worden naar de prullenbak van je besturingssysteem gestuurd.",
"deleteImagePermanent": "Verwijderde afbeeldingen kunnen niet worden hersteld.",
"assets": "Eigen onderdelen",
@ -382,8 +383,6 @@
"useCpuNoise": "Gebruik CPU-ruis",
"imageActions": "Afbeeldingshandeling",
"iterations": "Iteraties",
"iterationsWithCount_one": "{{count}} iteratie",
"iterationsWithCount_other": "{{count}} iteraties",
"coherenceMode": "Modus"
},
"settings": {
@ -939,7 +938,6 @@
"completed": "Voltooid",
"queueBack": "Voeg toe aan wachtrij",
"cancelFailed": "Fout bij annuleren onderdeel",
"queueCountPrediction": "Voeg {{predicted}} toe aan wachtrij",
"batchQueued": "Reeks in wachtrij geplaatst",
"pauseFailed": "Fout bij onderbreken verwerker",
"clearFailed": "Fout bij wissen van wachtrij",

View File

@ -76,7 +76,18 @@
"localSystem": "Локальная система",
"aboutDesc": "Используя Invoke для работы? Проверьте это:",
"add": "Добавить",
"loglevel": "Уровень логов"
"loglevel": "Уровень логов",
"beta": "Бета",
"selected": "Выбрано",
"positivePrompt": "Позитивный запрос",
"negativePrompt": "Негативный запрос",
"editor": "Редактор",
"goTo": "Перейти к",
"tab": "Вкладка",
"viewing": "Просмотр",
"editing": "Редактирование",
"viewingDesc": "Просмотр изображений в режиме большой галереи",
"editingDesc": "Редактировать на холсте слоёв управления"
},
"gallery": {
"galleryImageSize": "Размер изображений",
@ -86,7 +97,9 @@
"noImagesInGallery": "Изображений нет",
"deleteImagePermanent": "Удаленные изображения невозможно восстановить.",
"deleteImageBin": "Удаленные изображения будут отправлены в корзину вашей операционной системы.",
"deleteImage": "Удалить изображение",
"deleteImage_one": "Удалить изображение",
"deleteImage_few": "Удалить {{count}} изображения",
"deleteImage_many": "Удалить {{count}} изображений",
"assets": "Ресурсы",
"autoAssignBoardOnClick": "Авто-назначение доски по клику",
"deleteSelection": "Удалить выделенное",
@ -334,6 +347,10 @@
"remixImage": {
"desc": "Используйте все параметры, кроме сида из текущего изображения",
"title": "Ремикс изображения"
},
"toggleViewer": {
"title": "Переключить просмотр изображений",
"desc": "Переключение между средством просмотра изображений и рабочей областью для текущей вкладки."
}
},
"modelManager": {
@ -448,7 +465,9 @@
"loraModels": "LoRAs",
"main": "Основные",
"noModelsInstalled": "Нет установленных моделей",
"noModelsInstalledDesc1": "Установите модели с помощью"
"noModelsInstalledDesc1": "Установите модели с помощью",
"noMatchingModels": "Нет подходящих моделей",
"ipAdapters": "IP адаптеры"
},
"parameters": {
"images": "Изображения",
@ -508,7 +527,8 @@
"missingNodeTemplate": "Отсутствует шаблон узла",
"missingFieldTemplate": "Отсутствует шаблон поля",
"addingImagesTo": "Добавление изображений в",
"invoke": "Создать"
"invoke": "Создать",
"imageNotProcessedForControlAdapter": "Изображение адаптера контроля №{{number}} не обрабатывается"
},
"isAllowedToUpscale": {
"useX2Model": "Изображение слишком велико для увеличения с помощью модели x4. Используйте модель x2",
@ -519,9 +539,6 @@
"useCpuNoise": "Использовать шум CPU",
"imageActions": "Действия с изображениями",
"iterations": "Кол-во",
"iterationsWithCount_one": "{{count}} Интеграция",
"iterationsWithCount_few": "{{count}} Итерации",
"iterationsWithCount_many": "{{count}} Итераций",
"useSize": "Использовать размер",
"coherenceMode": "Режим",
"aspect": "Соотношение",
@ -532,7 +549,15 @@
"lockAspectRatio": "Заблокировать соотношение",
"remixImage": "Ремикс изображения",
"coherenceMinDenoise": "Мин. шумоподавление",
"coherenceEdgeSize": "Размер края"
"coherenceEdgeSize": "Размер края",
"infillMosaicTileWidth": "Ширина плиток",
"infillMosaicTileHeight": "Высота плиток",
"infillMosaicMinColor": "Мин цвет",
"infillMosaicMaxColor": "Макс цвет",
"infillColorValue": "Цвет заливки",
"globalSettings": "Глобальные настройки",
"globalNegativePromptPlaceholder": "Глобальный негативный запрос",
"globalPositivePromptPlaceholder": "Глобальный запрос"
},
"settings": {
"models": "Модели",
@ -626,7 +651,8 @@
"uploadInitialImage": "Загрузить начальное изображение",
"resetInitialImage": "Сбросить начальное изображение",
"prunedQueue": "Урезанная очередь",
"modelImportCanceled": "Импорт модели отменен"
"modelImportCanceled": "Импорт модели отменен",
"parameters": "Параметры"
},
"tooltip": {
"feature": {
@ -695,7 +721,10 @@
"coherenceModeGaussianBlur": "Размытие по Гауссу",
"coherenceModeBoxBlur": "коробчатое размытие",
"discardCurrent": "Отбросить текущее",
"invertBrushSizeScrollDirection": "Инвертировать прокрутку для размера кисти"
"invertBrushSizeScrollDirection": "Инвертировать прокрутку для размера кисти",
"initialFitImageSize": "Подогнать размер изображения при перебросе",
"hideBoundingBox": "Скрыть ограничительную рамку",
"showBoundingBox": "Показать ограничительную рамку"
},
"accessibility": {
"uploadImage": "Загрузить изображение",
@ -838,7 +867,10 @@
"editMode": "Открыть в редакторе узлов",
"resetToDefaultValue": "Сбросить к стандартному значкнию",
"edit": "Редактировать",
"noFieldsViewMode": "В этом рабочем процессе нет выбранных полей для отображения. Просмотрите полный рабочий процесс для настройки значений."
"noFieldsViewMode": "В этом рабочем процессе нет выбранных полей для отображения. Просмотрите полный рабочий процесс для настройки значений.",
"graph": "График",
"showEdgeLabels": "Показать метки на ребрах",
"showEdgeLabelsHelp": "Показать метки на ребрах, указывающие на соединенные узлы"
},
"controlnet": {
"amult": "a_mult",
@ -906,8 +938,8 @@
"lineartAnime": "Контурный рисунок в стиле аниме",
"mediapipeFaceDescription": "Обнаружение лиц с помощью Mediapipe",
"hedDescription": "Целостное обнаружение границ",
"setControlImageDimensions": "Установите размеры контрольного изображения на Ш/В",
"scribble": "каракули",
"setControlImageDimensions": "Скопируйте размер в Ш/В (оптимизируйте для модели)",
"scribble": "Штрихи",
"maxFaces": "Макс Лица",
"mlsdDescription": "Минималистичный детектор отрезков линии",
"resizeSimple": "Изменить размер (простой)",
@ -921,7 +953,19 @@
"modelSize": "Размер модели",
"small": "Маленький",
"body": "Тело",
"hands": "Руки"
"hands": "Руки",
"selectCLIPVisionModel": "Выбрать модель CLIP Vision",
"ipAdapterMethod": "Метод",
"full": "Всё",
"mlsd": "M-LSD",
"h": "H",
"style": "Только стиль",
"dwOpenpose": "DW Openpose",
"pidi": "PIDI",
"composition": "Только композиция",
"hed": "HED",
"beginEndStepPercentShort": "Начало/конец %",
"setControlImageDimensionsForce": "Скопируйте размер в Ш/В (игнорируйте модель)"
},
"boards": {
"autoAddBoard": "Авто добавление Доски",
@ -1300,6 +1344,12 @@
"paragraphs": [
"Плавно укладывайте изображение вдоль вертикальной оси."
]
},
"ipAdapterMethod": {
"heading": "Метод",
"paragraphs": [
"Метод, с помощью которого применяется текущий IP-адаптер."
]
}
},
"metadata": {
@ -1347,7 +1397,6 @@
"completed": "Выполнено",
"queueBack": "Добавить в очередь",
"cancelFailed": "Проблема с отменой элемента",
"queueCountPrediction": "{{promptsCount}} запросов × {{iterations}} изображений -> {{count}} генераций",
"batchQueued": "Пакетная очередь",
"pauseFailed": "Проблема с приостановкой рендеринга",
"clearFailed": "Проблема с очисткой очереди",
@ -1463,7 +1512,11 @@
"projectWorkflows": "Рабочие процессы проекта",
"defaultWorkflows": "Стандартные рабочие процессы",
"name": "Имя",
"noRecentWorkflows": "Нет последних рабочих процессов"
"noRecentWorkflows": "Нет последних рабочих процессов",
"loadWorkflow": "Рабочий процесс $t(common.load)",
"convertGraph": "Конвертировать график",
"loadFromGraph": "Загрузка рабочего процесса из графика",
"autoLayout": "Автоматическое расположение"
},
"hrf": {
"enableHrf": "Включить исправление высокого разрешения",
@ -1516,5 +1569,56 @@
"addPromptTrigger": "Добавить триггер запроса",
"compatibleEmbeddings": "Совместимые встраивания",
"noMatchingTriggers": "Нет соответствующих триггеров"
},
"controlLayers": {
"moveToBack": "На задний план",
"moveForward": "Переместить вперёд",
"moveBackward": "Переместить назад",
"brushSize": "Размер кисти",
"controlLayers": "Слои управления",
"globalMaskOpacity": "Глобальная непрозрачность маски",
"autoNegative": "Авто негатив",
"deletePrompt": "Удалить запрос",
"resetRegion": "Сбросить регион",
"debugLayers": "Слои отладки",
"rectangle": "Прямоугольник",
"maskPreviewColor": "Цвет предпросмотра маски",
"addNegativePrompt": "Добавить $t(common.negativePrompt)",
"regionalGuidance": "Региональная точность",
"opacity": "Непрозрачность",
"globalControlAdapter": "Глобальный $t(controlnet.controlAdapter_one)",
"globalControlAdapterLayer": "Глобальный $t(controlnet.controlAdapter_one) $t(unifiedCanvas.layer)",
"globalIPAdapter": "Глобальный $t(common.ipAdapter)",
"globalIPAdapterLayer": "Глобальный $t(common.ipAdapter) $t(unifiedCanvas.layer)",
"opacityFilter": "Фильтр непрозрачности",
"deleteAll": "Удалить всё",
"addLayer": "Добавить слой",
"moveToFront": "На передний план",
"toggleVisibility": "Переключить видимость слоя",
"addPositivePrompt": "Добавить $t(common.positivePrompt)",
"addIPAdapter": "Добавить $t(common.ipAdapter)",
"regionalGuidanceLayer": "$t(controlLayers.regionalGuidance) $t(unifiedCanvas.layer)",
"resetProcessor": "Сброс процессора по умолчанию",
"clearProcessor": "Чистый процессор",
"globalInitialImage": "Глобальное исходное изображение",
"globalInitialImageLayer": "$t(controlLayers.globalInitialImage) $t(unifiedCanvas.layer)",
"noLayersAdded": "Без слоев",
"layers_one": "Слой",
"layers_few": "Слоя",
"layers_many": "Слоев"
},
"ui": {
"tabs": {
"generation": "Генерация",
"canvas": "Холст",
"workflowsTab": "$t(ui.tabs.workflows) $t(common.tab)",
"models": "Модели",
"generationTab": "$t(ui.tabs.generation) $t(common.tab)",
"workflows": "Рабочие процессы",
"canvasTab": "$t(ui.tabs.canvas) $t(common.tab)",
"queueTab": "$t(ui.tabs.queue) $t(common.tab)",
"modelsTab": "$t(ui.tabs.models) $t(common.tab)",
"queue": "Очередь"
}
}
}

View File

@ -298,7 +298,8 @@
"noImagesInGallery": "Gösterilecek Görsel Yok",
"autoSwitchNewImages": "Yeni Görseli Biter Bitmez Gör",
"currentlyInUse": "Bu görsel şurada kullanımda:",
"deleteImage": "Görseli Sil",
"deleteImage_one": "Görseli Sil",
"deleteImage_other": "",
"loadMore": "Daha Getir",
"setCurrentImage": "Çalışma Görseli Yap",
"unableToLoad": "Galeri Yüklenemedi",

View File

@ -65,7 +65,12 @@
"nextPage": "下一页",
"saveAs": "保存为",
"ai": "ai",
"or": "或"
"or": "或",
"aboutDesc": "使用 Invoke 工作?来看看:",
"add": "添加",
"loglevel": "日志级别",
"copy": "复制",
"localSystem": "本地系统"
},
"gallery": {
"galleryImageSize": "预览大小",
@ -73,7 +78,7 @@
"autoSwitchNewImages": "自动切换到新图像",
"loadMore": "加载更多",
"noImagesInGallery": "无图像可用于显示",
"deleteImage": "删除图片",
"deleteImage_other": "删除图片",
"deleteImageBin": "被删除的图片会发送到你操作系统的回收站。",
"deleteImagePermanent": "删除的图片无法被恢复。",
"assets": "素材",
@ -440,7 +445,6 @@
"useX2Model": "图像太大,无法使用 x4 模型,使用 x2 模型作为替代",
"tooLarge": "图像太大无法进行放大,请选择更小的图像"
},
"iterationsWithCount_other": "{{count}} 次迭代生成",
"cfgRescaleMultiplier": "CFG 重缩放倍数",
"useSize": "使用尺寸",
"setToOptimalSize": "优化模型大小",
@ -599,7 +603,8 @@
"loadMore": "加载更多",
"mode": "模式",
"resetUI": "$t(accessibility.reset) UI",
"createIssue": "创建问题"
"createIssue": "创建问题",
"about": "关于"
},
"tooltip": {
"feature": {
@ -847,7 +852,6 @@
"pruneSucceeded": "从队列修剪 {{item_count}} 个已完成的项目",
"notReady": "无法排队",
"batchFailedToQueue": "批次加入队列失败",
"queueCountPrediction": "{{promptsCount}} 提示词 × {{iterations}} 迭代次数 -> {{count}} 次生成",
"batchQueued": "加入队列的批次",
"front": "前",
"pruneTooltip": "修剪 {{item_count}} 个已完成的项目",
@ -1201,7 +1205,16 @@
"workflows": "工作流",
"noDescription": "无描述",
"uploadWorkflow": "从文件中加载",
"newWorkflowCreated": "已创建新的工作流"
"newWorkflowCreated": "已创建新的工作流",
"name": "名称",
"defaultWorkflows": "默认工作流",
"created": "已创建",
"ascending": "升序",
"descending": "降序",
"updated": "已更新",
"userWorkflows": "我的工作流",
"projectWorkflows": "项目工作流",
"opened": "已打开"
},
"app": {
"storeNotInitialized": "商店尚未初始化"
@ -1219,7 +1232,8 @@
"title": "生成"
},
"advanced": {
"title": "高级"
"title": "高级",
"options": "$t(accordions.advanced.title) 选项"
},
"image": {
"title": "图像"

View File

@ -1,3 +1,4 @@
/* eslint-disable no-console */
import fs from 'node:fs';
import openapiTS from 'openapi-typescript';

View File

@ -1,5 +1,6 @@
import { Box, useGlobalModifiersInit } from '@invoke-ai/ui-library';
import { useSocketIO } from 'app/hooks/useSocketIO';
import { useSyncQueueStatus } from 'app/hooks/useSyncQueueStatus';
import { useLogger } from 'app/logging/useLogger';
import { appStarted } from 'app/store/middleware/listenerMiddleware/listeners/appStarted';
import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
@ -70,6 +71,7 @@ const App = ({ config = DEFAULT_CONFIG, selectedImage }: Props) => {
}, [dispatch]);
useStarterModelsToast();
useSyncQueueStatus();
return (
<ErrorBoundary onReset={handleReset} FallbackComponent={AppErrorBoundaryFallback}>

View File

@ -67,6 +67,8 @@ export const useSocketIO = () => {
if ($isDebugging.get() || import.meta.env.MODE === 'development') {
window.$socketOptions = $socketOptions;
// This is only enabled manually for debugging, console is allowed.
/* eslint-disable-next-line no-console */
console.log('Socket initialized', socket);
}
@ -75,6 +77,8 @@ export const useSocketIO = () => {
return () => {
if ($isDebugging.get() || import.meta.env.MODE === 'development') {
window.$socketOptions = undefined;
// This is only enabled manually for debugging, console is allowed.
/* eslint-disable-next-line no-console */
console.log('Socket teardown', socket);
}
socket.disconnect();

View File

@ -0,0 +1,25 @@
import { useEffect } from 'react';
import { useGetQueueStatusQuery } from 'services/api/endpoints/queue';
const baseTitle = document.title;
const invokeLogoSVG = 'assets/images/invoke-favicon.svg';
const invokeAlertLogoSVG = 'assets/images/invoke-alert-favicon.svg';
/**
* This hook synchronizes the queue status with the page's title and favicon.
* It should be considered a singleton and only used once in the component tree.
*/
export const useSyncQueueStatus = () => {
const { queueSize } = useGetQueueStatusQuery(undefined, {
selectFromResult: (res) => ({
queueSize: res.data ? res.data.queue.pending + res.data.queue.in_progress : 0,
}),
});
useEffect(() => {
document.title = queueSize > 0 ? `(${queueSize}) ${baseTitle}` : baseTitle;
const faviconEl = document.getElementById('invoke-favicon');
if (faviconEl instanceof HTMLLinkElement) {
faviconEl.href = queueSize > 0 ? invokeAlertLogoSVG : invokeLogoSVG;
}
}, [queueSize]);
};

View File

@ -20,14 +20,14 @@ export type LoggerNamespace =
| 'models'
| 'config'
| 'canvas'
| 'txt2img'
| 'img2img'
| 'generation'
| 'nodes'
| 'system'
| 'socketio'
| 'session'
| 'queue'
| 'dnd';
| 'dnd'
| 'controlLayers';
export const logger = (namespace: LoggerNamespace) => $logger.get().child({ namespace });

View File

@ -1,3 +1,6 @@
/* eslint-disable no-console */
// This is only enabled manually for debugging, console is allowed.
import type { Middleware, MiddlewareAPI } from '@reduxjs/toolkit';
import { diff } from 'jsondiffpatch';

View File

@ -1,7 +1,6 @@
import type { UnknownAction } from '@reduxjs/toolkit';
import { deepClone } from 'common/util/deepClone';
import { isAnyGraphBuilt } from 'features/nodes/store/actions';
import { nodeTemplatesBuilt } from 'features/nodes/store/nodesSlice';
import { appInfoApi } from 'services/api/endpoints/appInfo';
import type { Graph } from 'services/api/types';
import { socketGeneratorProgress } from 'services/events/actions';
@ -25,13 +24,6 @@ export const actionSanitizer = <A extends UnknownAction>(action: A): A => {
};
}
if (nodeTemplatesBuilt.match(action)) {
return {
...action,
payload: '<Node templates omitted>',
};
}
if (socketGeneratorProgress.match(action)) {
const sanitized = deepClone(action);
if (sanitized.payload.data.progress_image) {

Some files were not shown because too many files have changed in this diff Show More