Compare commits

...

523 Commits

Author SHA1 Message Date
9d3cd85bdd chore: black 2023-08-14 13:02:33 +10:00
46a8eed33e Merge branch 'main' into feat/refactor_generation_backend 2023-08-14 13:01:28 +10:00
9fee3f7b66 Revert "Add magic to debug"
This reverts commit 511da59793.
2023-08-14 12:58:08 +10:00
9217a217d4 fix(ui): refiner uses steps directly, no math 2023-08-14 12:56:37 +10:00
b2700ffde4 Update post processing docs 2023-08-13 22:25:49 -04:00
511da59793 Add magic to debug 2023-08-14 05:14:24 +03:00
409e5d01ba Fix cpu_only schedulers(unipc) 2023-08-14 05:14:05 +03:00
58d5c61c79 fix: SDXL Inpaint & Outpaint using regular Img2Img strength 2023-08-14 12:55:18 +12:00
3d8da67be3 Remove callback-generator wrapper 2023-08-14 03:35:15 +03:00
957ee6d370 fix: SDXL Canvas Inpaint & Outpaint not respecting SDXL Refiner start value 2023-08-14 12:13:29 +12:00
fecad2c014 fix: SDXL Denoising Strength not plugged in correctly 2023-08-14 11:59:11 +12:00
550e6ef27a re: Set the image denoise str back to 0
Bug has been fixed. No longer needed.
2023-08-14 10:27:07 +12:00
cc85c98bf3 feat: Upgrade Diffusers to 0.19.3
Needed for some schedulers
2023-08-14 09:26:28 +12:00
75fb3f429f re: Readd Refiner Step Math but cap max steps to 1000 2023-08-14 09:26:01 +12:00
d63bb39475 Make dpmpp_sde(_k) use not random seed 2023-08-14 00:24:38 +03:00
096333ba3f Fix error on zero timesteps 2023-08-14 00:20:01 +03:00
7a8f14d595 Clean-up code a bit 2023-08-13 19:50:48 +03:00
59ba9fc0f6 Flip bits in seed for sde/ancestral schedulers to have different noise from initial 2023-08-13 19:50:16 +03:00
6e0beb1ed4 Fixes for second order scheduler timesteps 2023-08-13 19:31:47 +03:00
94636ddb03 Fix empty prompt handling 2023-08-13 19:31:14 +03:00
746e099f0d fix: Do not do step math for refinerSteps
This is probably better done on the backend or in a different way. This can cause steps to go above 1000 which is more than the set number for the model.
2023-08-14 04:04:15 +12:00
499e89d6f6 feat: Add SDXL Negative Aesthetic Score 2023-08-14 04:02:36 +12:00
250d530260 Fixed import issue in invokeai/frontend/install/model_install.py (#4259)
This fixes an import issue introduced in commit 1bfe983. The change made
'invokeai_configure' into a module but this line still tries to call it
as if it's a function. This will result in a `'module' not callable`
error.

## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [x] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [x] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [x] No


## Description

imic from discord ask that I submit a PR to fix this bug.

## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-08-14 02:40:08 +12:00
90fa3eebb3 feat: Make SDXL Style Prompt not take spaces 2023-08-14 02:25:39 +12:00
0aba105a8f Missed a spot in configure_invokeai.py 2023-08-13 05:32:35 -07:00
9e2e82a752 Fixed import issue in invokeai/frontend/install/model_install.py
This fixes an import issue introduced in commit 1bfe983.
The change made 'invokeai_configure' into a module but this line still tries to call it as if it's a function. This will result in a `'module' not callable` error.
2023-08-13 05:15:55 -07:00
561951ad98 chore: Black linting 2023-08-13 21:28:39 +12:00
3ff9961bda fix: Circular dependency in Mask Blur Method 2023-08-13 21:26:20 +12:00
33779b6339 chore: Remove shouldFitToWidthHeight from Inpaint Graphs
Was never used for inpainting but was fed to the node anyway.
2023-08-13 21:16:37 +12:00
b35cdc05a5 feat: Scaled Processing to Inpainting & Outpainting / 1.x & SDXL 2023-08-13 20:17:23 +12:00
9afb5d6ace Update version to 3.0.2post1 2023-08-12 19:49:33 -04:00
50177b8ed9 Update frontend JS files 2023-08-12 19:49:33 -04:00
c8864e475b fix: SDXL Lora's not working on Canvas Image To Image 2023-08-13 04:34:15 +12:00
fcf7f4ac77 feat: Add SDXL ControlNet To Linear UI 2023-08-13 04:27:38 +12:00
29f1c6dc82 fix: Image To Image FP32 Fix for Canvas SDXL 2023-08-13 04:23:52 +12:00
28208e6f49 fix: Fix VAE Precision not working for SDXL Canvas Modes 2023-08-13 04:09:51 +12:00
c33acf951e feat: Make Refiner work with Canvas 2023-08-13 03:53:40 +12:00
500cd552bc feat: Make SDXL work across the board + Custom VAE Support
Also a major cleanup pass to the SDXL graphs to ensure there's no ID overlap
2023-08-13 01:45:03 +12:00
55d27f71a3 feat: Give each graph its own unique id 2023-08-13 00:51:10 +12:00
746c7c59ff fix: remove extra node for canvas output catch 2023-08-12 22:39:30 +12:00
ad96c41156 feat: Add Canvas Output node to all Canvas Graphs 2023-08-12 22:04:43 +12:00
27bd127fb0 fix: Do not add anything but final output to staging area 2023-08-12 21:10:30 +12:00
f296e5c41e wip: Remove MaskBlur / Adjust color correction 2023-08-12 20:54:30 +12:00
a67d8376c7 fix missed spot for autoAddBoardId none 2023-08-12 18:07:01 +10:00
9f6221fe8c Merge branch 'main' into feat/refactor_generation_backend 2023-08-12 18:37:47 +12:00
7587b54787 chore: Cleanup, comment and organize Node Graphs
Before it gets too chaotic
2023-08-12 17:17:46 +12:00
7254ffc3e7 chore: Split Inpaint and Outpaint Graphs 2023-08-12 16:30:20 +12:00
6034fa12de feat: Add Mask Blur node 2023-08-12 16:20:58 +12:00
ce3675fc14 Apply denoising_start/end according on timestep value 2023-08-12 03:19:49 +03:00
8acd7eeca5 feat: Disable clip skip for SDXL Canvas 2023-08-12 08:18:30 +12:00
7293a6036a feat(wip): Add SDXL To Canvas 2023-08-12 08:16:05 +12:00
0b11f309ca instead of crashing when a corrupted model is detected, warn and move on 2023-08-11 15:05:14 -04:00
6a8eb392b2 Add support for loading SDXL LoRA weights in diffusers format. 2023-08-11 14:40:22 -04:00
f343ab0302 wip: Port Outpainting to new backend 2023-08-12 06:15:59 +12:00
824ca92760 fix maximum python version instructions 2023-08-11 13:49:39 -04:00
d7d6298ec0 feat: Add Infill Method support 2023-08-12 05:32:11 +12:00
58a48bf197 fix: LoRA list name sorting 2023-08-12 04:47:15 +12:00
5629d8fa37 fix; Key issue in Lora List 2023-08-12 04:43:40 +12:00
1affb7f647 feat: Add Paste / Mask Blur / Color Correction to Inpainting
Seam options are now removed. They are replaced by two options --Mask Blur and Mask Blur Method .. which control the softness of the mask that is being painted.
2023-08-12 03:28:19 +12:00
69a9dc7b36 wip: Add initial Inpaint Graph 2023-08-12 02:42:13 +12:00
f3ae52ff97 Fix error at high denoising_start, fix unipc(cpu_only) 2023-08-11 15:46:16 +03:00
7479f9cc02 feat: Update LinearUI to use new backend (except Inpaint) 2023-08-11 22:22:01 +12:00
87ce4ab27c fix: Update default_graph to use new DenoiseLatents 2023-08-11 22:21:13 +12:00
7c0023ad9e feat: Remove TextToLatents / Rename Latents To Latents -> DenoiseLatents 2023-08-11 22:20:37 +12:00
231e665675 Merge branch 'main' into feat/refactor_generation_backend 2023-08-11 20:53:38 +12:00
80fd4c2176 undo lint changes 2023-08-11 14:26:09 +10:00
3b6e425e17 fix error detail in toast 2023-08-11 14:26:09 +10:00
50415450d8 invalidate board total when images deleted, only run date range logic if board has less than 20 images 2023-08-11 14:26:09 +10:00
06296896a9 Update invokeai version 2023-08-10 22:23:41 -04:00
a7399aca0c Add new JS files for 3.0.2 build 2023-08-10 22:23:41 -04:00
d1ea8b1e98 Two changes to command-line scripts (#4235)
During install testing I discovered two small problems in the
command-line scripts. These are fixed.

## What type of PR is this? (check all applicable)

- [X Bug Fix

## Have you discussed this change with the InvokeAI team?
- [X] Yes
- 
      
## Have you updated all relevant documentation?
- [X] Yes


## Description

- installer - use correct entry point for invokeai-configure
- model merge script - prevent error when `--root` not provided
2023-08-10 21:11:45 -04:00
f851ad7ba0 Two changes to command-line scripts
- installer - use correct entry point for invokeai-configure
- model merge script - prevent error when `--root` not provided
2023-08-10 20:59:22 -04:00
591838a84b Add support for LyCORIS IA3 format (#4234)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [x] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [x] No

      
## Have you updated all relevant documentation?
- [ ] Yes
- [x] No


## Description
Add support for LyCORIS IA3 format

## Related Tickets & Documents
- Closes #4229 

## Added/updated tests?

- [ ] Yes
- [x] No
2023-08-11 03:30:35 +03:00
c0c2ab3dcf Format by black 2023-08-11 03:20:56 +03:00
56023bc725 Add support for LyCORIS IA3 format 2023-08-11 02:08:08 +03:00
2ef6a8995b Temporary force set vae to same precision as unet 2023-08-10 18:01:58 -04:00
d0fee93aac round slider values to nice numbers 2023-08-10 18:00:45 -04:00
1bfe9835cf clip cache settings to permissible values; remove redundant imports in install __init__ file 2023-08-10 18:00:45 -04:00
8e7eae6cc7 Probe LoRAs that do not have the text encoder (#4181)
## What type of PR is this? (check all applicable)

- [X] Bug Fix

## Have you discussed this change with the InvokeAI team?
- [X] No - minor fix

      
## Have you updated all relevant documentation?
- [X] Yes

## Description

It turns out that some LoRAs do not have the text encoder model, and
this was causing the code that distinguishes the model base type during
model import to reject them as having an unknown base model. This PR
enables detection of these cases.
2023-08-10 17:50:20 -04:00
f6522c8971 Merge branch 'main' into fix/detect-more-loras 2023-08-10 17:33:16 -04:00
a969707e45 prevent vae: '' from crashing model 2023-08-10 17:33:04 -04:00
6c8e898f09 Update scripts/verify_checkpoint_template.py
Co-authored-by: Eugene Brodsky <ebr@users.noreply.github.com>
2023-08-10 16:00:33 -04:00
7bad9bcf53 update dependencies and docs to cu118 2023-08-10 15:19:12 -04:00
d42b45116f fix(ui): fix lora sort (#4222)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [s] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [x] Yes
- [ ] No, because:

      

## Description

was sorting with disabled at top of list instead of bottom

fixes #4217

## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #4217

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

![image](https://github.com/invoke-ai/InvokeAI/assets/4822129/dd895b86-05de-4303-8674-9b181037abaa)
2023-08-10 21:04:28 +12:00
d4812bbc8d Merge branch 'main' into fix/ui/fix-lora-sort 2023-08-10 19:00:26 +10:00
3cd05cf6bf fix(ui): fix lora sort
was sorting with disabled at top of list instead of bottom

fixes #4217
2023-08-10 15:31:29 +10:00
2564301aeb fix(ui): fix canvas model switching (#4221)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [x] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [x] Yes
- [ ] No, because:

## Description

There was no check at all to see if the canvas had a valid model already
selected. The first model in the list was selected every time.

Now, we check if its valid. If not, we go through the logic to try and
pick the first valid model.

If there are no valid models, or there was a problem listing models, the
model selection is cleared.

## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->


- Closes #4125

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

- Go to Canvas tab
- Select a model other than the first one in the list
- Go to a different tab
- Go back to Canvas tab
- The model should be the same as you selected
2023-08-10 17:29:41 +12:00
da0efeaa7f fix(ui): fix canvas model switching
There was no check at all to see if the canvas had a valid model already selected. The first model in the list was selected every time.

Now, we check if its valid. If not, we go through the logic to try and pick the first valid model.

If there are no valid models, or there was a problem listing models, the model selection is cleared.
2023-08-10 15:20:37 +10:00
49cce1eec6 feat: add app_version to image metadata 2023-08-10 14:22:39 +10:00
e9ec5ab85c Apply requested changes
Co-Authored-By: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
2023-08-10 06:19:22 +03:00
17fed1c870 Fix merge conflict errors 2023-08-10 05:03:33 +03:00
ade78b9591 Merge branch 'main' into feat/refactor_generation_backend 2023-08-10 04:32:16 +03:00
c8fbaf54b6 Add self.min, not self.max 2023-08-10 09:59:22 +10:00
f86d388786 refactor(diffusers_pipeline): remove unused pipeline methods 🚮 (#4175) 2023-08-09 15:19:27 -07:00
cd2c688562 Merge branch 'main' into refactor/remove_unused_pipeline_methods 2023-08-09 17:26:09 -04:00
2d29ac6f0d Add techjedi's image import script (#4171)
## What type of PR is this? (check all applicable)

- [X ] Feature

## Have you discussed this change with the InvokeAI team?
- [X] Yes

## Have you updated all relevant documentation?
- [X] Yes

## Description

This PR adds the `invokeai-import-images` script, which imports a
directory of 2.*.* -generated images into the current InvokeAI root
directory, preserving and converting their metadata. The script also
handles 3.* images.

Many thanks to @techjedi for writing this. This version differs from the
original in two minor respects:

1. It is installed as an `invokeai-import-images` command.
2. The prompts for image and database paths use file completion provided
by the `prompt_toolkit` library.
## To Test

1. Activate the virtual environment for the destination root to import
INTO
2. Run `invokeai-import-images`
3. Follow the prompts

## Related Tickets & Documents

This is a frequently-requested feature on Discord, but I couldn't find
an Issue.

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [X] No : but should in the future
2023-08-09 13:17:08 -04:00
2c2b731386 fix typo 2023-08-09 13:08:59 -04:00
2f68a1a76c use Stalker's simplified LoRA vector-length detection code 2023-08-09 09:21:29 -04:00
930e7bc754 Merge branch 'main' into feat/image-import-script 2023-08-09 08:54:56 -04:00
7d4ace962a Merge branch 'main' into fix/detect-more-loras 2023-08-09 08:48:27 -04:00
06842f8e0a Update to 3.0.2rc1 2023-08-09 00:29:43 -04:00
c82da330db Pin safetensors to 0.3.1
Safetensors 0.3.2 does not ship an ARM64 wheel so install on macOS fails
2023-08-09 00:29:43 -04:00
628df4ec98 Add updated frontend html file 2023-08-09 00:29:43 -04:00
16b956616f Update version to 3.0.2 2023-08-09 00:29:43 -04:00
604cc17a3a Yarn build JS files 2023-08-09 00:29:43 -04:00
37c9b85549 Add slider for VRAM cache in configure script (#4133)
## What type of PR is this? (check all applicable)

- [X ] Feature

## Have you discussed this change with the InvokeAI team?
- [X] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [X] No - will be in release notes

## Description

On CUDA systems, this PR adds a new slider to the install-time configure
script for adjusting the VRAM cache and suggests a good starting value
based on the user's max VRAM (this is subject to verification).

On non-CUDA systems this slider is suppressed.

Please test on both CUDA and non-CUDA systems using:
```
invokeai-configure --root ~/invokeai-main/ --skip-sd --skip-support
```

To see and test the default values, move `invokeai.yaml` out of the way
before running.

**Note added 8 August 2023**

This PR also fixes the configure and model install scripts so that if
the window is too small to fit the user interface, the user will be
prompted to interactively resize the window and/or change font size
(with the option to give up). This will prevent `npyscreen` from
generating its horrible tracebacks.

## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-08-09 12:27:54 +10:00
8b39b67ec7 Merge branch 'main' into feat/select-vram-in-config 2023-08-09 12:17:27 +10:00
a933977861 Pick correct config file for sdxl models (#4191)
## What type of PR is this? (check all applicable)

- [X] Bug Fix

## Have you discussed this change with the InvokeAI team?
- [X] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [X Yes
- [ ] No


## Description

If `models.yaml` is cleared out for some reason, the model manager will
repopulate it by scanning `models`. However, this would fail with a
pydantic validation error if any SDXL checkpoint models were present
because the lack of logic to pick the correct configuration file. This
has now been added.
2023-08-09 11:16:48 +10:00
dfb41d8461 Merge branch 'main' into bugfix/autodetect-sdxl-ckpt-config 2023-08-09 03:57:44 +03:00
e98f7eda2e Fix total_steps in generation event, order field added 2023-08-09 03:34:25 +03:00
b4a74f6523 Add MaskEdge and ColorCorrect nodes
Co-Authored-By: Kent Keirsey <31807370+hipsterusername@users.noreply.github.com>
2023-08-08 23:57:02 +03:00
f7aec3b934 Move conditioning class to backend 2023-08-08 23:33:52 +03:00
4d5169e16d Merge branch 'main' into feat/select-vram-in-config 2023-08-08 13:50:02 -04:00
a7e44678fb Remove legacy/unused code 2023-08-08 20:49:01 +03:00
da0184a786 Invert mask, fix l2l on no mask conntected, remove zeroing latents on zero start 2023-08-08 20:01:49 +03:00
f56f19710d allow user to interactively resize screen before UI runs 2023-08-08 12:27:25 -04:00
96b7248051 Add mask to l2l 2023-08-08 18:50:36 +03:00
e77400ab62 remove deprecated options from config 2023-08-08 08:33:30 -07:00
13347f6aec blackified 2023-08-08 08:33:30 -07:00
a9bf387e5e turned on Pydantic validate_assignment 2023-08-08 08:33:30 -07:00
8258c87a9f refrain from writing deprecated legacy options to invokeai.yaml 2023-08-08 08:33:30 -07:00
1b1b399fd0 Fix crash when attempting to update a model (#4192)
## What type of PR is this? (check all applicable)

- [X] Bug Fix


## Have you discussed this change with the InvokeAI team?
- [X No, because small fix

      
## Have you updated all relevant documentation?
- [X] Yes

## Description

A logic bug was introduced in PR #4109 that caused Web-based model
updates to fail with a pydantic validation error. This corrects the
problem.

## Related Tickets & Documents

PR #4109
2023-08-08 10:54:27 -04:00
a8d3e078c0 Merge branch 'main' into fix/detect-more-loras 2023-08-08 10:42:45 -04:00
6ed7ba57dd Merge branch 'main' into bugfix/fix-model-updates 2023-08-08 09:05:25 -04:00
2b3b77a276 api(images): allow HEAD request on image/full (#4193) 2023-08-08 00:08:48 -07:00
8b8ec68b30 Merge branch 'main' into feat/image_http_head 2023-08-08 00:02:48 -07:00
e20af5aef0 feat(ui): add LoRA support to SDXL linear UI
new graph modifier `addSDXLLoRasToGraph()` handles adding LoRA to the SDXL t2i and i2i graphs.
2023-08-08 15:02:00 +10:00
57e8ec9488 chore(ui): lint/format 2023-08-08 12:53:47 +10:00
734a9e4271 invalidate board total when images deleted, only run date range logic if board has less than 20 images 2023-08-08 12:53:47 +10:00
fe924daee3 add option to disable multiselect 2023-08-08 12:53:47 +10:00
750f09fbed blackify 2023-08-07 21:01:59 -04:00
4df581811e add template verification script 2023-08-07 21:01:48 -04:00
eb70bc2ae4 add scripts to create model templates and check whether they match 2023-08-07 21:00:47 -04:00
5f29526a8e Add seed to latents field 2023-08-08 04:00:33 +03:00
492bfe002a Remove sdxl t2l/l2l nodes 2023-08-08 03:38:42 +03:00
809705c30d api(images): allow HEAD request on image/full 2023-08-07 15:11:47 -07:00
f0918edf98 improve error reporting on unrecognized lora models 2023-08-07 16:38:58 -04:00
a846d82fa1 Add techedi code to avoid rendering prompt/seed with null
- Added techjedi github and real names
2023-08-07 16:29:46 -04:00
22f7cf0638 add stalker's complicated but effective code for finding token vector length in LoRAs 2023-08-07 16:19:57 -04:00
25c669b1d6 Merge remote-tracking branch 'origin/main' into refactor/remove_unused_pipeline_methods 2023-08-07 13:03:10 -07:00
4367061b19 fix(ModelManager): fix overridden VAE with relative path (#4059) 2023-08-07 12:57:32 -07:00
0fd13d3604 Merge branch 'main' into feat/select-vram-in-config 2023-08-07 15:51:59 -04:00
72a3e776b2 fix logic error introduced in PR 4109 2023-08-07 15:38:22 -04:00
af044007d5 pick correct config file for sdxl models 2023-08-07 15:19:49 -04:00
1db2c93f75 Fix preview, inpaint 2023-08-07 21:27:32 +03:00
f272a44feb Merge branch 'main' into refactor/model_manager_instantiate 2023-08-07 10:59:28 -07:00
2539e26c18 Apply denoising_start/end, add torch-sdp to memory effictiend attention func 2023-08-07 19:57:11 +03:00
b0738b7f70 Fixes, zero tensor for empty negative prompt, remove raw prompt node 2023-08-07 18:37:06 +03:00
8469d3e95a chore: black 2023-08-07 10:05:52 +10:00
ae17d01e1d Fix hue adjustment (#4182)
* Fix hue adjustment

Hue adjustment wasn't working correctly because color channels got swapped. This has now been fixed and we're using PIL rather than cv2 to do the RGBA->HSV->RGBA conversion. The range of hue adjustment is also the more typical 0..360 degrees.
2023-08-06 23:23:51 +00:00
f3d3316558 probe LoRAs that do not have the text encoder 2023-08-06 16:00:53 -04:00
5a6cefb0ea add backslash to end of incomplete windows paths 2023-08-06 12:34:35 -04:00
1a6f5f0860 use backslash on Windows systems for autoadded delimiter 2023-08-06 12:29:31 -04:00
5bfd6cb66f Merge remote-tracking branch 'origin/main' into refactor/model_manager_instantiate
# Conflicts:
#	invokeai/backend/model_management/model_manager.py
2023-08-05 22:02:28 -07:00
59caff7ff0 refactor(diffusers_pipeline): remove unused img2img wrappers 🚮
invokeai.app no longer needs this as a single method, as it builds on latents2latents instead.
2023-08-05 21:50:52 -07:00
6487e7d906 refactor(diffusers_pipeline): remove unused ModelGroup 🚮
orphaned since #3550 removed the LazilyLoadedModelGroup code, probably unused since ModelCache took over responsibility for sequential offload somewhere around #3335.
2023-08-05 21:50:52 -07:00
77033eabd3 refactor(diffusers_pipeline): remove unused precision 🚮 2023-08-05 21:50:52 -07:00
b80abdd101 refactor(diffusers_pipeline): remove unused image_from_embeddings 🚮 2023-08-05 21:50:52 -07:00
006d782cc8 refactor(diffusers_pipeline): tidy imports 🚮 2023-08-05 21:50:52 -07:00
d09dfc3e9b fix(api): use db_location instead of db_path_string
This may just be the SQLite memory sentinel value.
2023-08-06 14:09:04 +10:00
66f524cae7 fix(mm): fix a lot of typing issues
Most fixes are just things being typed as `str` but having default values of `None`, but there are some minor logic changes.
2023-08-06 14:09:04 +10:00
9ba50130a1 fix(api): fix db location types
The services all want strings instead of `Path`s; create variable for the string representation of the path provided by the config services.
2023-08-06 14:09:04 +10:00
d4cf2d2666 fix(api): fix ApiDependencies.invoker types
ApiDependencies.invoker` provides typing for the API's services layer. Marking it `Optional` results in all the routes seeing it as optional, which is not good.

Instead of marking it optional to satisfy the initial assignment to `None`, we can just skip the initial assignment. This preserves the IDE hinting in API layer and is types-legal.
2023-08-06 14:09:04 +10:00
9aaf67c5b4 wip 2023-08-06 05:05:25 +03:00
b8b589c150 fix(nodes): fix hsl nodes rebase conflict 2023-08-06 09:57:49 +10:00
d93900a8de Added HSL Nodes 2023-08-06 09:57:49 +10:00
7f4c387080 test(model_management): factor out name strings 2023-08-05 15:46:46 -07:00
80876bbbd1 Merge remote-tracking branch 'origin/refactor/model_manager_instantiate' into refactor/model_manager_instantiate 2023-08-05 15:25:05 -07:00
7a4ff4c089 Merge branch 'main' into refactor/model_manager_instantiate 2023-08-05 15:23:38 -07:00
44bf308192 test(model_management): add a couple tests for _get_model_path 2023-08-05 15:22:23 -07:00
12e51c84ae blackified 2023-08-05 14:26:16 -07:00
b2eb83deff add docs 2023-08-05 14:26:16 -07:00
0ccc3b509e add techjedi's import script, with some filecompletion tweaks 2023-08-05 14:26:16 -07:00
4043a4c21c blackified 2023-08-05 12:44:58 -04:00
c8ceb96091 add docs 2023-08-05 12:26:52 -04:00
83f75750a9 add techjedi's import script, with some filecompletion tweaks 2023-08-05 12:19:24 -04:00
dc96a3e79d Fix random number generator
Passing in seed=0 is not equivalent to seed=None. The latter will get a new seed from entropy in the OS, and that's what we should be using.
2023-08-06 00:29:08 +10:00
c076f1397e rebuild frontend 2023-08-05 14:40:42 +10:00
2568aafc0b bump version number so that pip updates work 2023-08-05 14:40:42 +10:00
65ed224bfc Merge branch 'main' into refactor/model_manager_instantiate 2023-08-04 21:34:38 -07:00
b6e369c745 chore: black 2023-08-05 12:28:35 +10:00
ecabfc252b devices.py - Update MPS FP16 check to account for upcoming MacOS Sonoma
float16 doesn't seem to work on MacOS Sonoma due to further changes with Metal. This'll default back to float32 for Sonoma users.
2023-08-05 12:28:35 +10:00
da96a41103 Merge branch 'main' into feat/select-vram-in-config 2023-08-05 12:11:50 +10:00
d162b78767 fix broken civitai example link 2023-08-05 12:10:52 +10:00
eb6c317f04 chore: black 2023-08-05 12:05:24 +10:00
6d7223238f fix: fix typo in message 2023-08-05 12:05:24 +10:00
8607d124c5 improve message about the consequences of the --ignore_missing_core_models flag 2023-08-05 12:05:24 +10:00
23497bf759 add --ignore_missing_core_models CLI flag to bypass checking for missing core models 2023-08-05 12:05:24 +10:00
b10cf20eb1 Merge branch 'main' into refactor/model_manager_instantiate
# Conflicts:
#	invokeai/backend/model_management/model_manager.py
2023-08-04 18:28:18 -07:00
3d93851dba Installer should download fp16 models if user has specified 'auto' in config (#4129)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [X] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [X] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [X] Yes
- [ ] No


## Description

At install time, when the user's config specified "auto" precision, the
installer was downloading the fp32 models even when an fp16 model would
be appropriate for the OS.


## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Closes #4127
2023-08-05 01:56:25 +03:00
9bacd77a79 Merge branch 'main' into bugfix/fp16-models 2023-08-05 01:42:43 +03:00
1b158f62c4 resolve vae overrides correctly 2023-08-04 18:24:47 -04:00
6ad565d84c folded in changes from 4099 2023-08-04 18:24:47 -04:00
04229082d6 Provide ti name from model manager, not from ti itself 2023-08-04 18:24:47 -04:00
03c27412f7 [WIP] Add sdxl lora support (#4097)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [x] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [x] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [x] No


## Description
Add lora loading for sdxl.
NOT TESTED - I run only 2 loras, please check more(including lycoris if
they already exists).

## QA Instructions, Screenshots, Recordings
https://civitai.com/models/118536/voxel-xl

![image](https://github.com/invoke-ai/InvokeAI/assets/7768370/76a6abff-cb0a-43b4-b779-a0b0e5b46e56)


## Added/updated tests?

- [ ] Yes
- [x] No
2023-08-04 16:12:22 -04:00
f0613bb0ef Fix merge conflict resolve - restore full/diff layer support 2023-08-04 19:53:27 +03:00
0e9f92b868 Merge branch 'main' into feat/sdxl_lora 2023-08-04 19:22:13 +03:00
7d0cc6ec3f chore: black 2023-08-05 02:04:22 +10:00
2f8b928486 Add support for diff/full lora layers 2023-08-05 02:04:22 +10:00
0d3c27f46c Fix typo
Co-authored-by: Ryan Dick <ryanjdick3@gmail.com>
2023-08-04 11:44:56 -04:00
cff91f06d3 Add lora apply in sdxl l2l node 2023-08-04 11:44:56 -04:00
1d5d187ba1 model probe detects sdxl lora models 2023-08-04 11:44:56 -04:00
1ac14a1e43 add sdxl lora support 2023-08-04 11:44:56 -04:00
cfc3a20565 autoAddBoardId should always be defined 2023-08-04 22:19:11 +10:00
05ae4e283c Stop checking for unet/model.onnx when a model_index.json is detected (#4132)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [x] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [x] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [ ] No


## Description


## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-08-03 22:10:37 -04:00
f06fee4581 Merge branch 'main' into remove-onnx-model-check-from-pipeline-download 2023-08-03 22:02:05 -04:00
9091e19de8 Add execution stat reporting after each invocation (#4125)
## What type of PR is this? (check all applicable)

- [X] Feature


## Have you discussed this change with the InvokeAI team?
- [X] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [X] Yes
- [ ] No

## Description

This PR adds execution time and VRAM usage reporting to each graph
invocation. The log output will look like this:

```
[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> Graph stats: c7764585-9c68-4d9d-a199-55e8186790f3                                                                                              
[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> Node                 Calls  Seconds  VRAM Used                                                                                                 
[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> main_model_loader        1   0.005s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> clip_skip                1   0.004s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> compel                   2   0.512s     0.26G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> rand_int                 1   0.001s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> range_of_size            1   0.001s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> iterate                  1   0.001s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> metadata_accumulator     1   0.002s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> noise                    1   0.002s     0.01G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> t2l                      1   3.541s     1.93G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> l2i                      1   0.679s     0.58G                                                                                                  
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> TOTAL GRAPH EXECUTION TIME:  4.749s                                                                                                            
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> Current VRAM utilization 0.01G                                                                                                                 
```
On systems without CUDA, the VRAM stats are not printed.

The current implementation keeps track of graph ids separately so will
not be confused when several graphs are executing in parallel. It
handles exceptions, and it is integrated into the app framework by
defining an abstract base class and storing an implementation instance
in `InvocationServices`.
2023-08-03 20:05:21 -04:00
0a0b7141af Merge branch 'main' into feat/execution-stats 2023-08-03 19:49:00 -04:00
1deca89fde Merge branch 'main' into feat/select-vram-in-config 2023-08-03 19:27:58 -04:00
446fb4a438 blackify 2023-08-03 19:24:23 -04:00
ab5d938a1d use variant instead of revision 2023-08-03 19:23:52 -04:00
9942af756a Merge branch 'main' into remove-onnx-model-check-from-pipeline-download 2023-08-03 10:10:51 -04:00
06742faca7 Merge branch 'feat/execution-stats' of github.com:invoke-ai/InvokeAI into feat/execution-stats 2023-08-03 08:48:05 -04:00
d2bddf7f91 tweak formatting to accommodate longer runtimes 2023-08-03 08:47:56 -04:00
91ebf9f76e Merge branch 'main' into refactor/model_manager_instantiate 2023-08-02 19:01:21 -07:00
bf94412d14 feat: add multi-select to gallery
multi-select actions include:
- drag to board to move all to that board
- right click to add all to board or delete all

backend changes:
- add routes for changing board for list of image names, deleting list of images
- change image-specific routes to `images/i/{image_name}` to not clobber other routes (like `images/upload`, `images/delete`)
- subclass pydantic `BaseModel` as `BaseModelExcludeNull`, which excludes null values when calling `dict()` on the model. this fixes inconsistent types related to JSON parsing null values into `null` instead of `undefined`
- remove `board_id` from `remove_image_from_board`

frontend changes:
- multi-selection stuff uses `ImageDTO[]` as payloads, for dnd and other mutations. this gives us access to image `board_id`s when hitting routes, and enables efficient cache updates.
- consolidate change board and delete image modals to handle single and multiples
- board totals are now re-fetched on mutation and not kept in sync manually - was way too tedious to do this
- fixed warning about nested `<p>` elements
- closes #4088 , need to handle case when `autoAddBoardId` is `"none"`
- add option to show gallery image delete button on every gallery image

frontend refactors/organisation:
- make typegen script js instead of ts
- enable `noUncheckedIndexedAccess` to help avoid bugs when indexing into arrays, many small changes needed to satisfy TS after this
- move all image-related endpoints into `endpoints/images.ts`, its a big file now, but this fixes a number of circular dependency issues that were otherwise felt impossible to resolve
2023-08-03 11:46:59 +10:00
e080fd1e08 blackify 2023-08-03 11:25:20 +10:00
eeef1e08f8 restore ability to convert merged inpaint .safetensors files 2023-08-03 11:25:20 +10:00
b3b94b5a8d use correct prop 2023-08-03 11:01:21 +10:00
5c9787c145 add project-id header to requests 2023-08-03 11:01:21 +10:00
cf72eba15c Merge branch 'main' into feat/execution-stats 2023-08-03 10:53:25 +10:00
a6f9396a30 fix(db): retrieve metadata even when no session_id
this was unnecessarily skipped if there was no `session_id`.
2023-08-03 10:43:44 +10:00
118d5b387b deploy: refactor github workflows
Currently we use some workflow trigger conditionals to run either a real test workflow (installing the app and running it) or a fake workflow, disguised as the real one, that just auto-passes.

This change refactors the workflow to use a single workflow that can be skipped, using another github action to determine which things to run depending on the paths changed.
2023-08-03 10:32:50 +10:00
02d2cc758d Merge branch 'main' into refactor/model_manager_instantiate 2023-08-02 17:11:23 -07:00
db545f8801 chore: move PR template to .github/ dir (#4060)
## What type of PR is this? (check all applicable)

- [x] Refactor

## Have you discussed this change with the InvokeAI team?
- [x] No, because it's pretty minor

      
## Have you updated all relevant documentation?
- [x] No


## Description

This PR just moves the PR template to within the `.github/` directory
leading to a overall minimal project structure.

## Added/updated tests?

- [x] No : because this change doesn't affect or need a separate test
2023-08-03 10:08:17 +10:00
b0d72b15b3 Merge branch 'main' into patch-1 2023-08-03 10:04:47 +10:00
4e0949fa55 fix .swap() by reverting improperly merged @classmethod change 2023-08-03 10:00:43 +10:00
f028342f5b Merge branch 'main' into patch-1 2023-08-03 10:00:10 +10:00
7021467048 (ci) do not install all dependencies when running static checks (#4036)
Co-authored-by: psychedelicious <4822129+psychedelicious@users.noreply.github.com>
2023-08-02 23:46:02 +00:00
26ef5249b1 guard board switching in board context menu 2023-08-03 09:18:46 +10:00
87424be95d block auto add board change during generation. Switch condition to isProcessing 2023-08-03 09:18:46 +10:00
366952f810 fix localization 2023-08-03 09:18:46 +10:00
450e95de59 auto change board waiting for isReady 2023-08-03 09:18:46 +10:00
0ba8a0ea6c Board assignment changing on click 2023-08-03 09:18:46 +10:00
f4981f26d5 Merge branch 'main' into bugfix/fp16-models 2023-08-02 19:17:55 -04:00
6bc21984c6 Merge branch 'main' into feat/select-vram-in-config 2023-08-02 19:12:43 -04:00
43d6312587 Merge branch 'main' into feat/execution-stats 2023-08-02 19:12:08 -04:00
0d125bf3e4 chore: delete nonfunctional shell.nix
This was for v2.3 and is very broken. See `flake.nix`, thanks to @zopieux
2023-08-03 09:09:40 +10:00
921ccad04d added stats service to the cli_app startup 2023-08-02 18:41:43 -04:00
05c9207e7b Merge branch 'feat/execution-stats' of github.com:invoke-ai/InvokeAI into feat/execution-stats 2023-08-02 18:31:33 -04:00
3fc789a7ee fix unit tests 2023-08-02 18:31:10 -04:00
008362918e Merge branch 'main' into feat/execution-stats 2023-08-02 18:15:51 -04:00
8fc75a71ee integrate correctly into app API and add features
- Create abstract base class InvocationStatsServiceBase
- Store InvocationStatsService in the InvocationServices object
- Collect and report stats on simultaneous graph execution
  independently for each graph id
- Track VRAM usage for each node
- Handle cancellations and other exceptions gracefully
2023-08-02 18:10:52 -04:00
82d259f43b Merge branch 'main' into remove-onnx-model-check-from-pipeline-download 2023-08-02 16:35:46 -04:00
ec48779080 blackify 2023-08-02 14:28:19 -04:00
bc20fe4cb5 Merge branch 'main' into feat/select-vram-in-config 2023-08-02 14:27:17 -04:00
5de42be4a6 reduce VRAM cache default; take max RAM from system 2023-08-02 14:27:13 -04:00
818c55cd53 Refactor/cleanup root detection (#4102)
## What type of PR is this? (check all applicable)

- [ X] Refactor
- [ ] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [ X] No, because: invisible change

      
## Have you updated all relevant documentation?
- [ X] Yes
- [ ] No


## Description

There was a problem in 3.0.1 with root resolution. If INVOKEAI_ROOT were
set to "." (or any relative path), then the location of root would
change if the code did an os.chdir() after config initialization. I
fixed this in a quick and dirty way for 3.0.1.post3.

This PR cleans up the code with a little refactoring.

## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-08-02 10:36:12 -04:00
0db1e97119 Merge branch 'main' into refactor/cleanup-root-detection 2023-08-02 09:46:46 -04:00
29ac252501 blackify 2023-08-02 09:44:06 -04:00
880727436c fix default vram cache size calculation 2023-08-02 09:43:52 -04:00
77c5c18542 add slider for VRAM cache 2023-08-02 09:11:24 -04:00
ed76250dba Stop checking for unet/model.onnx when a model_index.json is detected 2023-08-02 07:21:21 -04:00
4d22cafdad Installer should download fp16 models if user has specified 'auto' in config
- Closes #4127
2023-08-01 22:06:27 -04:00
1f9e984b0d Merge branch 'main' into refactor/model_manager_instantiate 2023-08-01 16:49:39 -07:00
8a4e5f73aa reset stats on exception 2023-08-01 19:39:42 -04:00
4599575e65 fix(ui): use const for wsProtocol, lint 2023-08-02 09:26:20 +10:00
242d860a47 fix https/wss behind reverse proxy 2023-08-02 09:26:20 +10:00
0c1a7e72d4 Fix manual installation documentation (#4107)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [ ]X Bug Fix
- [ ] Optimization
- [ X] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [ X] No, because: obvious problem

      
## Have you updated all relevant documentation?
- [ X] Yes
- [ ] No


## Description

The manual installation documentation in both README.md and
020_MANUAL_INSTALL give an incomplete `invokeai-configure` command which
leaves out the path to the root directory to create. As a result, the
invokeai root directory gets created in the user’s home directory, even
if they intended it to be placed somewhere else.

This is a fairly important issue.
2023-08-01 18:55:53 -04:00
11a44b944d fix installation documentation 2023-08-01 18:52:17 -04:00
fd7b842419 add execution stat reporting after each invocation 2023-08-01 17:44:09 -04:00
5998509888 Merge branch 'main' into refactor/model_manager_instantiate 2023-08-01 11:09:43 -07:00
403a6e88f2 fix: flake: add opencv with CUDA, new patchmatch dependency. 2023-08-01 23:56:41 +10:00
c9d452b9d4 fix: Model Manager Tab Issues (#4087)
## What type of PR is this? (check all applicable)

- [x] Refactor
- [x] Feature
- [x] Bug Fix
- [?] Optimization


## Have you discussed this change with the InvokeAI team?
- [x] No

     
## Description

- Fixed filter type select using `images` instead of `all` -- Probably
some merge conflict.
- Added loading state for model lists. Handy when the model list takes
longer than a second for any reason to fetch. Better to show this than
an empty screen.
- Refactored the render model list function so we modify the display
component in one area rather than have repeated code.

### Other Issues

- The list can get a bit laggy on initial load when you have a hundreds
of models / loras. This needs to be fixed. Will make another PR for
this.
2023-08-02 01:06:53 +12:00
dcc274a2b9 feat: Make ModelListWrapper instead of rendering conditionally 2023-08-01 22:50:10 +10:00
f404669831 fix: Rename loading vars for consistency 2023-08-01 22:42:05 +10:00
ce687b28ef fix: Model Manager Tab Issues 2023-08-01 22:41:32 +10:00
7292d89108 Merge branch 'main' into refactor/cleanup-root-detection 2023-08-01 22:14:56 +10:00
41d6a38690 Update lint-frontend.yml
The action should run on every PR. We can make this more efficient in the future.
2023-08-01 22:10:56 +10:00
fb8f218901 fix(ui): post-onnx fixes 2023-08-01 07:59:01 -04:00
437f45a97f do not depend on existence of /tmp directory 2023-08-01 00:41:35 -04:00
13ef33ed64 Merge branch 'refactor/cleanup-root-detection' of github.com:invoke-ai/InvokeAI into refactor/cleanup-root-detection 2023-08-01 00:19:55 -04:00
86d8b46fca Merge branch 'main' into refactor/cleanup-root-detection 2023-08-01 00:14:26 -04:00
e86925d424 Add onnxruntime to the main dependencies 2023-08-01 00:03:10 -04:00
df53b62048 get rid of dangling debug statements 2023-07-31 22:39:11 -04:00
55d3f04476 additional refactoring 2023-07-31 22:36:11 -04:00
72ebe2ce68 refactor root directory detection to be cleaner 2023-07-31 22:30:06 -04:00
7cd8b2f207 Refactor root detection code 2023-07-31 21:15:44 -04:00
52437205bb chore(ui): lint 2023-08-01 08:54:03 +10:00
ceebb501a4 try named export 2023-08-01 08:54:03 +10:00
cbe874b964 add chakra as peer dep 2023-08-01 08:54:03 +10:00
e2e5918ee2 export theme nad move chakra to peer dep 2023-08-01 08:54:03 +10:00
1b131e328a add optional projectId - unused so far 2023-08-01 08:54:03 +10:00
81654daed7 ONNX Support (#3562)
Note: this branch based on #3548, not on main

While find out what needs to be done to implement onnx, found that I can
do draft of it pretty quickly, so... here it is)
Supports LoRA and TI.
As example - cat with sadcatmeme lora:

![image](https://github.com/invoke-ai/InvokeAI/assets/7768370/dbd1a5df-0629-4741-94b3-8e09f4b4d5a3)

![image](https://github.com/invoke-ai/InvokeAI/assets/7768370/d918836c-fdc7-43c0-aa81-dde9182f2e0f)
2023-07-31 17:34:27 -04:00
746afcd235 Merge branch 'main' into feat/onnx 2023-07-31 16:56:34 -04:00
ae0f4efcca Add missing Optional on a few nullable fields (#4076)
## What type of PR is this? (check all applicable)

- [x] Refactor

## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [x] No, because: trivial

## Description

Adds a few obviously missing `Optional` on fields that default to
`None`.
2023-07-31 16:56:10 -04:00
23647336ce Merge branch 'main' into fix-optional 2023-07-31 16:55:57 -04:00
4ca54dd5fa Added a getting started guide & updated the user landing page flow (#4028)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [ ] Bug Fix
- [ ] Optimization
- [X] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [X] No, because: Just a documentation update

      
## Have you updated all relevant documentation?
- [X] Yes
- [ ] No


## Description
Updated documentation with a getting started guide & a glossary of terms
needed to get started
Updated the landing page flow for users 

<img width="1430" alt="Screenshot 2023-07-27 at 9 53 25 PM"
src="https://github.com/invoke-ai/InvokeAI/assets/7254508/d0006ba7-2ed4-4044-a1bc-ca9a99df9397">

## Related Tickets & Documents

<!--
For pull requests that relate or
 close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-07-31 16:55:25 -04:00
d3a3067164 Merge branch 'main' into main 2023-07-31 16:54:48 -04:00
aeac557c41 Run python black, point out that onnx is an alpha feature in the installer 2023-07-31 16:47:48 -04:00
af4fd328a6 Merge branch 'main' into feat/onnx 2023-07-31 16:45:12 -04:00
c40c7424b6 Merge branch 'main' into fix-optional 2023-07-31 15:59:12 -04:00
a6b907150b Add python black check to pre-commit (#4094)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [ ] Bug Fix
- [x] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [ ] No


## Description


## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-07-31 15:58:20 -04:00
bacdf985f1 doc(model_manager): docstrings 2023-07-31 09:16:32 -07:00
e3519052ae Merge remote-tracking branch 'origin/main' into refactor/model_manager_instantiate 2023-07-31 08:46:09 -07:00
b0e84c6497 Add python black check to pre-commit 2023-07-31 11:42:08 -04:00
f784e8412c Some cleanup after the merge 2023-07-31 11:23:43 -04:00
1bafbafdd3 Regen schema and rebuild frontend after merging main 2023-07-31 11:02:15 -04:00
f5ac73b091 Merge branch 'main' into feat/onnx 2023-07-31 10:58:40 -04:00
eb642653cb Add Nix Flake for development, which uses Python virtualenv. 2023-07-31 19:14:30 +10:00
2c07f54b6e Merge branch 'main' into fix-optional 2023-07-31 16:31:01 +10:00
0691e0a12a Few modifications to getting started doc 2023-07-31 15:35:20 +10:00
79afcbd07e Merge branch 'main' of https://github.com/invoke-ai/InvokeAI 2023-07-31 14:19:37 +10:00
f4ead5e07f fix keyerror bug that was causing merge script to crash 2023-07-30 19:25:44 -04:00
6d24ca7f52 3.0.1post3 (#4082)
This is a relatively stable release that corrects the urgent windows
install and model manager problems in 3.0.1. It still has two known
bugs:

1. Many inpainting models are not loading correctly.
2. The merge script is failing to start.
2023-07-30 18:03:35 -04:00
2164da8592 blackify 2023-07-30 16:25:06 -04:00
adfd1e52f4 refactor(model_manager): avoid copy/paste logic 2023-07-30 11:53:12 -07:00
0e48c98330 Merge remote-tracking branch 'origin/main' into refactor/model_manager_instantiate
# Conflicts:
#	invokeai/backend/model_management/model_manager.py
2023-07-30 11:33:13 -07:00
4121c261a0 fix missing models when INVOKEAI_ROOT="." 2023-07-30 13:37:18 -04:00
99823d5039 more fixes to update and install 2023-07-30 11:57:06 -04:00
0abceb0e7b Merge branch 'main' of github.com:invoke-ai/InvokeAI 2023-07-30 11:08:27 -04:00
83d3f2347e fix "unrecognized arguments: --yes" bug on unattended upgrade 2023-07-30 11:07:06 -04:00
73e25d8dbe Update communityNodes.md
- Remove FaceMask and add link FaceTools repository, which includes FaceMask, FaceOff, and FacePlace
- Move Ideal Size up from under the template entry
2023-07-30 10:59:56 -04:00
50e00feceb Add missing Optional on a few nullable fields. 2023-07-30 16:25:12 +02:00
03594c949a blackified 2023-07-30 10:18:39 -04:00
adb85036e6 dependency tweaks to avoid installing/uninstalling pkgs 2023-07-30 10:17:04 -04:00
7d7a9273ed Merge branch 'main' of github.com:invoke-ai/InvokeAI 2023-07-30 09:19:14 -04:00
f17ad227cf fix relative model paths to be against config.models_path, not root (#4061)
## What type of PR is this? (check all applicable)

- [ X] Bug Fix

## Have you discussed this change with the InvokeAI team?
- [X] Yes - bug discovered by jpphoto
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [ X] Not needed

## Description

The user can customize the location of the models directory by setting
configuration variable `models_dir`. However, the model manager and the
TUI installer were all treating model relative paths as relative to the
invokeai root rather than the designated models directory. This has been
fixed by changing path resolution calls from using `config.root_path` to
`config.models_path`

Unfortunately there were many instances that needed replacement, so this
needs a bit of functional testing - try adding models, removing models,
renaming them, converting checkpoints, etc.
2023-07-30 08:51:35 -04:00
f91d01eb38 Merge branch 'main' into bugfix/model-manager-rel-paths 2023-07-30 08:25:37 -04:00
adfcb610b6 Installer tweaks (#4070)
## What type of PR is this? (check all applicable)


- [ X] Optimization

## Have you discussed this change with the InvokeAI team?
- [X ] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [X ] Yes
- [ ] No


## Description

This PR does two things:

1. if the environment variable INVOKEAI_ROOT is defined at install time,
the zipfile installer will default to its value when asking the user
where to install the software
2. If the user has more than 72 models of any type installed, then the
list will be truncated in the TUI and the user given a warning. Anything
larger than this number of models causes the vertical space to overflow.
The only effect of truncation is that the user will not be able to see
and delete the models that were truncated. The message advises the user
to go to the Web Model Manager interface in this event.
2023-07-30 08:25:11 -04:00
cafcd16657 Merge branch 'main' into install/tui-tweaks 2023-07-30 08:19:45 -04:00
2537ff0280 Merge branch 'main' into bugfix/model-manager-rel-paths 2023-07-30 08:17:36 -04:00
0f5f08e494 Merge branch 'bugfix/model-manager-rel-paths' of github.com:invoke-ai/InvokeAI into bugfix/model-manager-rel-paths 2023-07-30 08:17:21 -04:00
e20c4dc1e8 blackified 2023-07-30 08:17:10 -04:00
6dc4ddef1b Fix various bugs in ckpt to diffusers conversion script (#4065)
## What type of PR is this? (check all applicable)


- [X ] Bug Fix


## Have you discussed this change with the InvokeAI team?
- [ X] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [ X] No


## Description

This PR fixes several issues with the 3.0.0 conversion script:

- Handles checkpoint variants that don't put dots between fields in the
long state dict key names
- Handles ema, non-ema, pruned and non-pruned ckpts
- Does not add safety checker to converted checkpoints
- Respects precision of original checkpoint file
2023-07-30 08:16:37 -04:00
26af5ec341 Merge branch 'main' into bugfix/model-manager-rel-paths 2023-07-30 08:08:17 -04:00
10b182f316 Merge branch 'main' into bugfix/convert-script 2023-07-30 08:07:51 -04:00
ac84a9f915 reenable display of autoloaded models 2023-07-30 08:05:05 -04:00
844578ab88 fix lora loading crash 2023-07-30 07:57:10 -04:00
ff1c40747e lint: formatting 2023-07-29 20:02:31 -07:00
dbfd1bcb5e Merge branch 'main' into refactor/model_manager_instantiate 2023-07-29 19:53:21 -07:00
444390617f rebuild front end 2023-07-29 22:00:16 -04:00
6cb40d9d7b bump version for hotfix 3.0.1post1 2023-07-29 21:58:57 -04:00
ca895a9cd0 Unpin pydantic and numpy in pyproject.toml (#4062)
## What type of PR is this? (check all applicable)

- [ X] Bug Fix


## Have you discussed this change with the InvokeAI team?
- [ X] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [X] Not needed

## Description

Windows users have been getting a lot of OSErrors while installing 3.0.1
during the pip dependency installation phase. Generally the errors have
involved just two packages, pydantic and numpy. Looking at the install
logs, I see that both of these packages are first installed under one
version number by a dependency, and then uninstalled and replaced by a
slightly different version specified in invoke's `pyproject.toml`. I
think this is the problem - maybe the earlier package is not completely
closed before it is uninstalled and reinstalled.

This PR relaxes pinning of numpy and pydantic in `pyproject.toml`.
Everything seems to install and run properly. Hopefully it will address
the windows install bug as well.
2023-07-29 21:57:21 -04:00
7d27c7b1a4 Merge branch 'main' into lstein/no-pydantic-in-pyproject 2023-07-29 21:47:16 -04:00
6c82229910 fix recovery recipe 2023-07-29 20:00:06 -04:00
43b1eb8e84 wording changes 2023-07-29 19:49:58 -04:00
b10b07220e blackify code 2023-07-29 19:20:20 -04:00
c2eb50d1cd make installer use initial INVOKEAI_ROOT as default install location 2023-07-29 19:19:42 -04:00
73f3b7f84b remove dangling comment 2023-07-29 17:32:33 -04:00
bb18251fad Merge branch 'bugfix/convert-script' of github.com:invoke-ai/InvokeAI into bugfix/convert-script 2023-07-29 17:31:02 -04:00
348bee8981 blackified 2023-07-29 17:30:54 -04:00
078b33bda2 Merge branch 'main' into bugfix/convert-script 2023-07-29 17:30:40 -04:00
e82eb0b9fc add correct optional annotation to precision arg 2023-07-29 17:30:21 -04:00
ad976e5198 Merge branch 'main' into bugfix/model-manager-rel-paths 2023-07-29 17:27:16 -04:00
0e28961e69 Merge branch 'main' into lstein/no-pydantic-in-pyproject 2023-07-29 17:27:02 -04:00
6ce059f063 blackified again 2023-07-29 17:26:40 -04:00
1de783b1ce fix mistake in indexing flat_ema_key 2023-07-29 17:20:26 -04:00
3f9105be50 make convert script respect setting of use_ema in config file 2023-07-29 17:17:45 -04:00
781322a647 installer respects INVOKEAI_ROOT for default root location 2023-07-29 16:16:44 -04:00
9a1cfadd8b fix: SDXL Metadata not being retrieved (#4057)
## What type of PR is this? (check all applicable)

- [x] Bug Fix

## Have you discussed this change with the InvokeAI team?
- [x] Yes

## Description

- SDXL Metadata was not being retrieved. This PR fixes it.
2023-07-29 15:37:02 -04:00
2a2d988928 convert script handles more ckpt variants 2023-07-29 15:28:39 -04:00
ccceb32a85 lint: formatting 2023-07-29 11:50:04 -07:00
72c519c6ad fix incorrect key construction 2023-07-29 13:51:47 -04:00
af12f67948 Merge branch 'lstein/no-pydantic-in-pyproject' of github.com:invoke-ai/InvokeAI into lstein/no-pydantic-in-pyproject 2023-07-29 13:28:38 -04:00
60f5606c2d downgrade torchmetrics to fix model import problem 2023-07-29 13:28:29 -04:00
24b19166dd further refactoring 2023-07-29 13:13:22 -04:00
0396bce4f9 Merge branch 'main' into lstein/no-pydantic-in-pyproject 2023-07-29 13:06:30 -04:00
71768f5988 restore unpinned versions of pydantic and numpy 2023-07-29 13:04:34 -04:00
0fb7328022 blackify code 2023-07-29 13:00:43 -04:00
99daa97978 more refactoring; fixed place where rel conversion missed 2023-07-29 13:00:07 -04:00
21617e60e1 Merge remote-tracking branch 'origin/main' into refactor/model_manager_instantiate 2023-07-29 08:21:26 -07:00
982a568349 blackify pr 2023-07-29 10:47:55 -04:00
d79d5a4ff7 modest refactoring 2023-07-29 10:45:26 -04:00
9968ff2893 fix relative model paths to be against config.models_path, not root 2023-07-29 10:30:27 -04:00
35dd58e273 chore: move PR template to .github/ dir 2023-07-29 12:59:56 +05:30
6d82a1019a fix: Black linting 2023-07-29 17:34:43 +12:00
6ed1bf7084 Merge branch 'main' into metadata-fix 2023-07-29 17:33:30 +12:00
974175be45 fix: Prompt Node using incorrect output type (#4058)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [ ] No


## Description


## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-07-29 17:32:10 +12:00
86b8b69e88 internal(ModelManager): add instantiate method 2023-07-28 22:30:25 -07:00
bc9a5038fd refactor(ModelManager): factor out get_model_path 2023-07-28 22:29:36 -07:00
bee678fdd1 fix: Prompt Node using incorrect output type 2023-07-29 17:12:25 +12:00
c5caf1e8fe fix: SDXL Metadata not being retrieved 2023-07-29 17:03:19 +12:00
b163ae6a4d refactor(ModelManager): factor out get_model_config 2023-07-28 21:30:20 -07:00
dca685ac25 refactor(ModelManager): refactor rescan-on-miss to exists() method 2023-07-28 21:11:00 -07:00
72708eb53c Feat/Nodes: Change Input to Textbox (#3853)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [X] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [X] No, because:
not yet, making pr to show
      
## Have you updated relevant documentation?
- [ ] Yes
- [ ] No


## Description
Temp Change Node String input to a textbox, to allow easier input of
prompts and larger strings, it works for me but please tell me if I did
it wrong and if the size is ok

## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-07-29 16:10:32 +12:00
aae1670080 fix: Incorrect Prompt Node output type 2023-07-29 16:04:19 +12:00
e70bedba7d refactor(ModelManager): factor out _get_implementation method 2023-07-28 21:03:27 -07:00
1e776d2523 chore: Regen types 2023-07-29 15:59:52 +12:00
8e06e6abbc feat: Update 'style' string input to also display text area 2023-07-29 15:52:59 +12:00
8a0e1b6cfc feat: Create Prompt Input Node 2023-07-29 15:52:37 +12:00
2d9bc79ca4 Merge branch 'main' into nodepromptsize 2023-07-29 12:43:29 +10:00
6886eb094d Make more Simple 2023-07-29 12:40:17 +10:00
6ca0c38ee3 Merge branch 'main' into feat/onnx 2023-07-28 22:06:28 -04:00
d633eb1612 remove pydantic and numpy from pyproject.toml 2023-07-28 21:56:22 -04:00
1bbf2f269d Update installer 2023-07-28 21:02:48 -04:00
ac22652686 rebuild front end 2023-07-28 18:21:05 -04:00
77cfec5cc8 Release 3.0.1 release candidate 3 (#4025)
Branch used to rebuild front end and make minor doc changes during
release process. Merge before next release.
2023-07-28 18:03:44 -04:00
3e4420c1ae bugfix: Float64 error for mps devices on set_timesteps (#4040)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [ ] Feature
- [x] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [x] No, because: minor fix, let me know your thoughts

      
## Have you updated all relevant documentation?
- [x] Yes
- [ ] No

## Description


## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue # https://github.com/invoke-ai/InvokeAI/issues/4017
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [x] No : Requires mps device

## [optional] Are there any post deployment tasks we need to perform?

Please test on an MPS (M1/M2) device. 

Relevant code causing the error in #4017 


01b6ec21fa/src/diffusers/schedulers/scheduling_euler_discrete.py (L263C3-L268C75)

```
        self.sigmas = torch.from_numpy(sigmas).to(device=device)
        if str(device).startswith("mps"):
            # mps does not support float64
            self.timesteps = torch.from_numpy(timesteps).to(device, dtype=torch.float32)
        else:
            self.timesteps = torch.from_numpy(timesteps).to(device=device)
```
2023-07-28 18:02:39 -04:00
f8181ab1b3 fix: Concat Link Styling (#4048)
## What type of PR is this? (check all applicable)

- [x] Bug Fix

## Description

- Fix SDXL Concat Link animation not considering the fact that prompt
boxes can be resized.
- Also fixed a minor issue where the overlaying animation box would
block the prompt input resize slightly. Should be good now.
2023-07-28 18:02:22 -04:00
3dfeead9b8 Update troubleshooting guide with ~ydantic and SDXL unet issue advice (#4054)
## What type of PR is this? (check all applicable)


- [X ] Documentation Update


## Have you discussed this change with the InvokeAI team?
- [X ] Yes

      
## Have you updated all relevant documentation?
- [X ] Yes

## Description

Added solutions for installation issues related to large SDXL files and
Windows dependency glitches.
2023-07-28 18:02:04 -04:00
d3f6c7f983 Remove onnxruntime 2023-07-28 16:58:06 -04:00
390ce9f249 Fix onnx installer 2023-07-28 16:54:03 -04:00
3da0be7eb9 update troubleshooting guide with ~ydantic and SDXL unet issue workarounds 2023-07-28 16:42:57 -04:00
8935ae0ea3 Fix issues caused by merge 2023-07-28 14:00:32 -04:00
31e5f4bb0e Merge branch 'main' into set-timestep-mps-fix 2023-07-28 08:58:12 -07:00
2164674b01 Black format 2023-07-28 07:49:29 -07:00
8f2a646286 fix: Lint errors 2023-07-29 02:37:59 +12:00
5ff4dd26bb fix: Concat Link Styling 2023-07-29 02:30:10 +12:00
e342ca872f fix to work on non-MPS systems 2023-07-28 10:27:49 -04:00
a2aa66f43a Run Python black 2023-07-28 10:00:09 -04:00
da751da3dd Merge branch 'main' into feat/onnx 2023-07-28 09:59:35 -04:00
2b7b3dd4ba Run python black 2023-07-28 09:46:44 -04:00
dc1148106d Just install onnxruntime by default 2023-07-28 09:32:43 -04:00
062a369044 feat: Unify Promp Area Styling (#4033)
## What type of PR is this? (check all applicable)

- [ ] Refactor
- [x] Feature
- [ ] Bug Fix
- [ ] Optimization
- [ ] Documentation Update
- [ ] Community Node Submission


## Have you discussed this change with the InvokeAI team?
- [ ] Yes
- [ ] No, because:

      
## Have you updated all relevant documentation?
- [ ] Yes
- [ ] No


## Description

Making the prompt area styling match across all tabs / models and move
all prompt related components into a parent components for quick add.

Cherry picked stuff from the Styles PR coz we ain't gonna merge that.


## Related Tickets & Documents

<!--
For pull requests that relate or close an issue, please include them
below. 

For example having the text: "closes #1234" would connect the current
pull
request to issue 1234.  And when we merge the pull request, Github will
automatically close the issue.
-->

- Related Issue #
- Closes #

## QA Instructions, Screenshots, Recordings

<!-- 
Please provide steps on how to test changes, any hardware or 
software specifications as well as any other pertinent information. 
-->

## Added/updated tests?

- [ ] Yes
- [ ] No : _please replace this line with details on why tests
      have not been included_

## [optional] Are there any post deployment tasks we need to perform?
2023-07-28 22:10:08 +12:00
e4a2f56ad1 feat(ui): tweak link colors
- make the `SDXLConcatLink` icon match existing colors in light mode
- make the link toggle button accent color when active (its not super obvious but at least there is *some* visual difference for the button)
2023-07-28 19:57:05 +10:00
1df30f7260 feat: Pulse Animate SDXL Concat Link 2023-07-28 20:45:39 +12:00
514722d67a Update definitions to be more accurate 2023-07-28 18:35:05 +10:00
5dbde2116f Merge branch 'invoke-ai:main' into main 2023-07-28 18:34:33 +10:00
14c4650801 fix: Lint Errors (returning possible null component) 2023-07-28 19:03:29 +12:00
f155b03eee feat: New animation for Concat Link 2023-07-28 18:55:59 +12:00
ddaf753f7b Merge branch 'set-timestep-mps-fix' of ssh://github.com/ZachNagengast/InvokeAI into set-timestep-mps-fix 2023-07-27 23:40:44 -07:00
e6d14c708c Fix variable name 2023-07-27 23:40:23 -07:00
7f81a95b20 Merge branch 'main' into set-timestep-mps-fix 2023-07-28 16:12:07 +10:00
6a49eec606 feat: Add Concat Link Animation
Might remove the lines. Just pushing to save changes for now.
2023-07-28 15:01:40 +12:00
fd67b18c9a Merge branch 'main' into unify-prompt 2023-07-28 14:48:13 +12:00
9affdbbaad chore: black 2023-07-28 11:38:52 +10:00
8d300bddd0 feat(ui): alias existing type for UpdateLoRAModelResponse 2023-07-28 11:38:52 +10:00
aa2c94be9e make LoRAs editable 2023-07-28 11:38:52 +10:00
4c79350300 persist LoRA model info in models.yaml 2023-07-28 11:38:52 +10:00
10e1d623c3 Add LoRAs to the model manager. 2023-07-28 11:38:52 +10:00
aa1f827271 Fix unet_info location, can have no device prop 2023-07-27 14:47:09 -07:00
fb113b9077 Merge branch 'main' into release/invokeai-3-0-1 2023-07-27 16:24:29 -04:00
6edeb4e072 Pass device to set_timestep to avoid float64 error 2023-07-27 12:52:18 -07:00
006075483d Merge branch 'main' into release/invokeai-3-0-1 2023-07-27 15:21:08 -04:00
1ea9ba84f5 Release session if applying ti or lora 2023-07-27 15:20:38 -04:00
52bd29d484 Merge branch 'main' into release/invokeai-3-0-1 2023-07-27 15:19:05 -04:00
bfdc8c80f3 Testing caching onnx sessions 2023-07-27 14:13:29 -04:00
3bb81bedbd Merge branch 'main' into unify-prompt 2023-07-28 05:36:04 +12:00
b8b46aec09 Revert "fix: Lint Errors"
This reverts commit f057d5c85b.
2023-07-28 04:34:41 +12:00
4d2b87ea01 fix(ui): fix types for controlnet models
`ControlNetModelConfig` was split into `ControlNetModelCheckpointConfig` and `ControlNetModelDiffusersConfig`, need to update the UI types
2023-07-28 04:34:29 +12:00
59716938bf Remove TensorRT support at the current time until we validate it works, remove time step recorder 2023-07-27 11:18:50 -04:00
611f31c057 fix: Adjust embedding button on PositivePrompt for new changes 2023-07-28 03:07:50 +12:00
b60adc31d0 feat: Unify Prompt Area Design Between SDXL and Regular Models 2023-07-28 03:07:50 +12:00
a98ed3a5ba fix: TextArea Resizer styling when disabled 2023-07-28 03:06:31 +12:00
f057d5c85b fix: Lint Errors 2023-07-28 03:06:31 +12:00
918a0dedc0 Always install onnx 2023-07-27 11:00:40 -04:00
a491e326c5 This is no longer needed 2023-07-27 10:52:36 -04:00
f7bb4c3f05 Remove more files no longer needed in main 2023-07-27 10:49:43 -04:00
57271ad125 Move onnx to optional dependencies 2023-07-27 10:28:26 -04:00
33245b37ad Removed things no longer needed in main 2023-07-27 10:23:55 -04:00
81d8fb8762 Removed things no longer needed in main 2023-07-27 10:14:55 -04:00
989d3d7f3c Remove onnx changes from canvas img2img, inpaint, and linear image2image 2023-07-27 10:08:45 -04:00
d2a46b4308 Fix dist and schema after merge 2023-07-27 09:55:28 -04:00
eb1ba8d74b Merge branch 'main' into feat/onnx 2023-07-27 09:54:30 -04:00
4ebde013ea Allow deleting onnx models in model manager ui 2023-07-27 09:50:20 -04:00
024f92f9a9 Add onnx models to the model manager UI 2023-07-27 09:37:37 -04:00
562c937a14 Updated new user flow 2023-07-27 21:46:39 +10:00
5300e353d8 updated community nodes doc 2023-07-27 18:58:44 +10:00
d78c97f8a8 Updated getting started guide and links 2023-07-27 18:51:48 +10:00
52f61698e9 added getting started with Invoke guide 2023-07-27 18:29:12 +10:00
4d732e06de Remove onnx models from img2img and unified canvas 2023-07-26 16:30:02 -04:00
f26a423e95 Fix merge issue 2023-07-26 15:32:28 -04:00
861c0fe76b Correct issues caused by merging main 2023-07-26 12:25:46 -04:00
c16da75ac7 Merge branch 'main' into feat/onnx 2023-07-26 10:42:31 -04:00
36455f6cac Merge branch 'main' into nodepromptsize 2023-07-26 18:54:54 +10:00
2d0f932737 Lint Code 2023-07-26 18:35:04 +10:00
de73e4f5b9 Merge branch 'main' into nodepromptsize 2023-07-23 18:28:25 +10:00
0689e36390 Merge branch 'main' into nodepromptsize 2023-07-22 07:20:28 +10:00
78750042f5 Pass in dim overrides 2023-07-21 12:16:24 -04:00
13e7614508 add text so string node uses textarea 2023-07-21 19:36:27 +10:00
4e1786d9ae Remove Resize: none 2023-07-21 13:55:40 +10:00
585520d8d2 Only apply Textaera to Prompt 2023-07-21 13:17:27 +10:00
98b2734240 Merge branch 'main' into nodepromptsize 2023-07-21 08:07:55 +10:00
7b428b5240 Make height smaller and allow width to change with node 2023-07-21 08:03:01 +10:00
ce08aa350c Allow controlnet passthrough for now 2023-07-20 14:14:04 -04:00
ba1a934297 Fix Lora typings 2023-07-20 14:02:23 -04:00
4e90376d11 Allow passing in of precision, use available providers if none provided 2023-07-20 13:15:45 -04:00
f73b45bcb5 Feat: Change Input to Textbox 2023-07-20 19:11:18 +10:00
23f4a4ea1a Fix dist 2023-07-19 18:27:51 -04:00
6aab8f16ce Fix issue from merge 2023-07-19 18:27:15 -04:00
8f61413865 Setup dist folder 2023-07-19 17:49:27 -04:00
43b6a077fb io binding seems to be massively resource intensive compared to session.run 2023-07-19 17:42:28 -04:00
e8299d0abb Comment out erroniously removed del statement, comment out opt tests 2023-07-18 23:23:34 -04:00
a28ab654ef Setup dist folder 2023-07-18 23:18:46 -04:00
8699fd7050 Fix invoke UI graphs for onnx 2023-07-18 23:16:51 -04:00
9e65470ada Setup dist 2023-07-18 23:07:31 -04:00
f4e52fafac Fix as part of merging main in 2023-07-18 23:05:33 -04:00
ee7b36cea5 Merge branch 'main' into onnx-testing 2023-07-18 22:56:41 -04:00
487455ef2e Add model_type to the model state object 2023-07-18 22:40:27 -04:00
e201ad2f51 Switch to io_binding for run, testing different session options 2023-07-18 21:54:54 -04:00
869f418b03 Setup onnx on linear text2image 2023-07-18 14:27:54 -04:00
35d5ef9118 Emit step completions 2023-07-18 12:35:07 -04:00
bcce70fca6 Testing different session opts, added timings for testing 2023-07-17 16:27:33 -04:00
932112b640 testing being super wasteful with data 2023-07-16 00:17:33 -04:00
91112167b1 Fix syntax err 2023-07-15 23:56:48 -04:00
bd7b59910d Testing onnx in new ui updates 2023-07-14 14:24:15 -04:00
524888bf3b Merge branch 'main' into feat/onnx 2023-07-13 14:23:57 -04:00
0327eae509 chore: Regen API 2023-06-23 05:21:06 +12:00
bb85608890 Merge branch 'main' into feat/onnx 2023-06-23 05:18:41 +12:00
6c7668aaca Update onnx model structure, change code according 2023-06-22 20:03:17 +03:00
7759b3f75a Small refactor 2023-06-21 04:24:25 +03:00
4d337f6abc ONNX Model/runtime first implementation 2023-06-21 02:12:21 +03:00
92c86fd0b8 Set model type to const value in openapi schema, add model format enums to model schema(as they not not referenced in case of Literal definition) 2023-06-20 03:44:58 +03:00
46dc751139 Update model format field to use enums 2023-06-20 03:30:09 +03:00
4cefe37723 Rename format to model_format(still named format when work with config) 2023-06-20 03:25:08 +03:00
82b73c50a0 Remove default model logic 2023-06-20 03:13:10 +03:00
7df7a95299 Merge branch 'main' into model-manager-ui-30 2023-06-19 23:26:11 +12:00
85b4b359c2 tweal: UI colors 2023-06-19 23:16:14 +12:00
cfe81b5e00 fix: Adjust the Schedular select width
So the long names do not get cut off.
2023-06-19 23:05:32 +12:00
b0c4451324 Merge branch 'main' into model-manager-ui-30 2023-06-19 23:02:59 +12:00
d4931522d4 Merge branch 'main' into model-manager-ui-30 2023-06-19 22:53:13 +12:00
17e2a35228 fix: merge conflicts 2023-06-18 22:25:48 +12:00
91016d8b29 Merge branch 'main' into model-manager-ui-30 2023-06-18 22:23:18 +12:00
9fda21cf40 Revert "feat: Port Schedulers to Mantine"
This reverts commit e0c105f413.
2023-06-18 22:22:56 +12:00
809ec7163e fix: Remove type from Model type name 2023-06-18 19:41:30 +12:00
7c9a939b47 fix: Unserialization key issue 2023-06-18 19:38:15 +12:00
9634c96020 revert: getModels to receivedModels 2023-06-18 19:35:46 +12:00
e0c105f413 feat: Port Schedulers to Mantine 2023-06-18 19:31:53 +12:00
f0bf32c476 Merge branch 'main' into model-manager-ui-30 2023-06-18 17:37:34 +12:00
28373dbb98 cleanup: Updated model slice names to be more descriptive
Basically updated all slices to be more descriptive in their names. Did so in order to make sure theres good naming scheme available for secondary models.
2023-06-18 17:36:23 +12:00
4133d77772 wip: Move Model Selector to own file 2023-06-18 09:19:13 +12:00
61c426f502 feat: Enable 2.x Model Generation in Linear UI 2023-06-18 08:27:13 +12:00
bf0577c882 fix: 2.1 models breaking generation
Co-Authored-By: StAlKeR7779 <7768370+StAlKeR7779@users.noreply.github.com>
2023-06-18 08:26:25 +12:00
24673fd859 chore: Rebuild API - base_model and type added 2023-06-18 07:50:28 +12:00
dc669d1447 Add name, base_mode, type fields to model info 2023-06-17 22:48:44 +03:00
ce4110b9f4 wip: Add 2.x Models to the Model List 2023-06-18 07:01:44 +12:00
0f3b7d2b3d chore: Rebuild API with new Model API names 2023-06-18 03:00:16 +12:00
16dc78f6c6 Generate config names for openapi 2023-06-17 17:15:36 +03:00
7a66856785 wip: Update Linear UI Txt2Img and Img2Img Graphs
Update the text to imaeg and image to image graphs to work with the new model loader. Currently only supports 1.x models. Will update this soon to make it work with all models.
2023-06-18 01:38:01 +12:00
c8dfa49d86 fix: Update missing name types to new names 2023-06-17 22:04:28 +12:00
76dd749b1e chore: Rebuild API 2023-06-17 21:29:32 +12:00
67d05d2066 chore: Update model config type names 2023-06-17 21:28:43 +12:00
312 changed files with 14988 additions and 9017 deletions

View File

@ -2,8 +2,6 @@ name: Lint frontend
on:
pull_request:
paths:
- 'invokeai/frontend/web/**'
types:
- 'ready_for_review'
- 'opened'
@ -11,8 +9,6 @@ on:
push:
branches:
- 'main'
paths:
- 'invokeai/frontend/web/**'
merge_group:
workflow_dispatch:

View File

@ -1,13 +1,14 @@
name: Black # TODO: add isort and flake8 later
name: style checks
# just formatting for now
# TODO: add isort and flake8 later
on:
pull_request: {}
pull_request:
push:
branches: master
tags: "*"
branches: main
jobs:
test:
black:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
@ -19,8 +20,7 @@ jobs:
- name: Install dependencies with pip
run: |
pip install --upgrade pip wheel
pip install .[test]
pip install black
# - run: isort --check-only .
- run: black --check .

View File

@ -1,50 +0,0 @@
name: Test invoke.py pip
# This is a dummy stand-in for the actual tests
# we don't need to run python tests on non-Python changes
# But PRs require passing tests to be mergeable
on:
pull_request:
paths:
- '**'
- '!pyproject.toml'
- '!invokeai/**'
- '!tests/**'
- 'invokeai/frontend/web/**'
merge_group:
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
matrix:
if: github.event.pull_request.draft == false
strategy:
matrix:
python-version:
- '3.10'
pytorch:
- linux-cuda-11_7
- linux-rocm-5_2
- linux-cpu
- macos-default
- windows-cpu
include:
- pytorch: linux-cuda-11_7
os: ubuntu-22.04
- pytorch: linux-rocm-5_2
os: ubuntu-22.04
- pytorch: linux-cpu
os: ubuntu-22.04
- pytorch: macos-default
os: macOS-12
- pytorch: windows-cpu
os: windows-2022
name: ${{ matrix.pytorch }} on ${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
steps:
- name: skip
run: echo "no build required"

View File

@ -3,16 +3,7 @@ on:
push:
branches:
- 'main'
paths:
- 'pyproject.toml'
- 'invokeai/**'
- '!invokeai/frontend/web/**'
pull_request:
paths:
- 'pyproject.toml'
- 'invokeai/**'
- 'tests/**'
- '!invokeai/frontend/web/**'
types:
- 'ready_for_review'
- 'opened'
@ -65,10 +56,23 @@ jobs:
id: checkout-sources
uses: actions/checkout@v3
- name: Check for changed python files
id: changed-files
uses: tj-actions/changed-files@v37
with:
files_yaml: |
python:
- 'pyproject.toml'
- 'invokeai/**'
- '!invokeai/frontend/web/**'
- 'tests/**'
- name: set test prompt to main branch validation
if: steps.changed-files.outputs.python_any_changed == 'true'
run: echo "TEST_PROMPTS=tests/validate_pr_prompt.txt" >> ${{ matrix.github-env }}
- name: setup python
if: steps.changed-files.outputs.python_any_changed == 'true'
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
@ -76,6 +80,7 @@ jobs:
cache-dependency-path: pyproject.toml
- name: install invokeai
if: steps.changed-files.outputs.python_any_changed == 'true'
env:
PIP_EXTRA_INDEX_URL: ${{ matrix.extra-index-url }}
run: >
@ -83,6 +88,7 @@ jobs:
--editable=".[test]"
- name: run pytest
if: steps.changed-files.outputs.python_any_changed == 'true'
id: run-pytest
run: pytest

View File

@ -161,7 +161,7 @@ the command `npm install -g yarn` if needed)
_For Windows/Linux with an NVIDIA GPU:_
```terminal
pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117
pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu118
```
_For Linux with an AMD GPU:_
@ -184,8 +184,9 @@ the command `npm install -g yarn` if needed)
6. Configure InvokeAI and install a starting set of image generation models (you only need to do this once):
```terminal
invokeai-configure
invokeai-configure --root .
```
Don't miss the dot at the end!
7. Launch the web server (do it every time you run InvokeAI):
@ -193,15 +194,9 @@ the command `npm install -g yarn` if needed)
invokeai-web
```
8. Build Node.js assets
8. Point your browser to http://localhost:9090 to bring up the web interface.
```terminal
cd invokeai/frontend/web/
yarn vite build
```
9. Point your browser to http://localhost:9090 to bring up the web interface.
10. Type `banana sushi` in the box on the top left and click `Invoke`.
9. Type `banana sushi` in the box on the top left and click `Invoke`.
Be sure to activate the virtual environment each time before re-launching InvokeAI,
using `source .venv/bin/activate` or `.venv\Scripts\activate`.
@ -311,13 +306,30 @@ InvokeAI. The second will prepare the 2.3 directory for use with 3.0.
You may now launch the WebUI in the usual way, by selecting option [1]
from the launcher script
#### Migration Caveats
#### Migrating Images
The migration script will migrate your invokeai settings and models,
including textual inversion models, LoRAs and merges that you may have
installed previously. However it does **not** migrate the generated
images stored in your 2.3-format outputs directory. You will need to
manually import selected images into the 3.0 gallery via drag-and-drop.
images stored in your 2.3-format outputs directory. To do this, you
need to run an additional step:
1. From a working InvokeAI 3.0 root directory, start the launcher and
enter menu option [8] to open the "developer's console".
2. At the developer's console command line, type the command:
```bash
invokeai-import-images
```
3. This will lead you through the process of confirming the desired
source and destination for the imported images. The images will
appear in the gallery board of your choice, and contain the
original prompt, model name, and other parameters used to generate
the image.
(Many kudos to **techjedi** for contributing this script.)
## Hardware Requirements

Binary file not shown.

Before

Width:  |  Height:  |  Size: 310 KiB

After

Width:  |  Height:  |  Size: 297 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

View File

@ -16,7 +16,7 @@ If you don't feel ready to make a code contribution yet, no problem! You can als
There are two paths to making a development contribution:
1. Choosing an open issue to address. Open issues can be found in the [Issues](https://github.com/invoke-ai/InvokeAI/issues?q=is%3Aissue+is%3Aopen) section of the InvokeAI repository. These are tagged by the issue type (bug, enhancement, etc.) along with the “good first issues” tag denoting if they are suitable for first time contributors.
1. Additional items can be found on our roadmap <******************************link to roadmap>******************************. The roadmap is organized in terms of priority, and contains features of varying size and complexity. If there is an inflight item youd like to help with, reach out to the contributor assigned to the item to see how you can help.
1. Additional items can be found on our [roadmap](https://github.com/orgs/invoke-ai/projects/7). The roadmap is organized in terms of priority, and contains features of varying size and complexity. If there is an inflight item youd like to help with, reach out to the contributor assigned to the item to see how you can help.
2. Opening a new issue or feature to add. **Please make sure you have searched through existing issues before creating new ones.**
*Regardless of what you choose, please post in the [#dev-chat](https://discord.com/channels/1020123559063990373/1049495067846524939) channel of the Discord before you start development in order to confirm that the issue or feature is aligned with the current direction of the project. We value our contributors time and effort and want to ensure that no ones time is being misspent.*

View File

@ -4,35 +4,13 @@ title: Postprocessing
# :material-image-edit: Postprocessing
## Intro
This extension provides the ability to restore faces and upscale images.
This sections details the ability to improve faces and upscale images.
## Face Fixing
The default face restoration module is GFPGAN. The default upscale is
Real-ESRGAN. For an alternative face restoration module, see
[CodeFormer Support](#codeformer-support) below.
As of InvokeAI 3.0, the easiest way to improve faces created during image generation is through the Inpainting functionality of the Unified Canvas. Simply add the image containing the faces that you would like to improve to the canvas, mask the face to be improved and run the invocation. For best results, make sure to use an inpainting specific model; these are usually identified by the "-inpainting" term in the model name.
As of version 1.14, environment.yaml will install the Real-ESRGAN package into
the standard install location for python packages, and will put GFPGAN into a
subdirectory of "src" in the InvokeAI directory. Upscaling with Real-ESRGAN
should "just work" without further intervention. Simply indicate the desired scale on
the popup in the Web GUI.
**GFPGAN** requires a series of downloadable model files to work. These are
loaded when you run `invokeai-configure`. If GFPAN is failing with an
error, please run the following from the InvokeAI directory:
```bash
invokeai-configure
```
If you do not run this script in advance, the GFPGAN module will attempt to
download the models files the first time you try to perform facial
reconstruction.
### Upscaling
## Upscaling
Open the upscaling dialog by clicking on the "expand" icon located
above the image display area in the Web UI:
@ -41,82 +19,23 @@ above the image display area in the Web UI:
![upscale1](../assets/features/upscale-dialog.png)
</figure>
There are three different upscaling parameters that you can
adjust. The first is the scale itself, either 2x or 4x.
The default upscaling option is Real-ESRGAN x2 Plus, which will scale your image by a factor of two. This means upscaling a 512x512 image will result in a new 1024x1024 image.
The second is the "Denoising Strength." Higher values will smooth out
the image and remove digital chatter, but may lose fine detail at
higher values.
Other options are the x4 upscalers, which will scale your image by a factor of 4.
Third, "Upscale Strength" allows you to adjust how the You can set the
scaling stength between `0` and `1.0` to control the intensity of the
scaling. AI upscalers generally tend to smooth out texture details. If
you wish to retain some of those for natural looking results, we
recommend using values between `0.5 to 0.8`.
[This figure](../assets/features/upscaling-montage.png) illustrates
the effects of denoising and strength. The original image was 512x512,
4x scaled to 2048x2048. The "original" version on the upper left was
scaled using simple pixel averaging. The remainder use the ESRGAN
upscaling algorithm at different levels of denoising and strength.
<figure markdown>
![upscaling](../assets/features/upscaling-montage.png){ width=720 }
</figure>
Both denoising and strength default to 0.75.
### Face Restoration
InvokeAI offers alternative two face restoration algorithms,
[GFPGAN](https://github.com/TencentARC/GFPGAN) and
[CodeFormer](https://huggingface.co/spaces/sczhou/CodeFormer). These
algorithms improve the appearance of faces, particularly eyes and
mouths. Issues with faces are less common with the latest set of
Stable Diffusion models than with the original 1.4 release, but the
restoration algorithms can still make a noticeable improvement in
certain cases. You can also apply restoration to old photographs you
upload.
To access face restoration, click the "smiley face" icon in the
toolbar above the InvokeAI image panel. You will be presented with a
dialog that offers a choice between the two algorithm and sliders that
allow you to adjust their parameters. Alternatively, you may open the
left-hand accordion panel labeled "Face Restoration" and have the
restoration algorithm of your choice applied to generated images
automatically.
Like upscaling, there are a number of parameters that adjust the face
restoration output. GFPGAN has a single parameter, `strength`, which
controls how much the algorithm is allowed to adjust the
image. CodeFormer has two parameters, `strength`, and `fidelity`,
which together control the quality of the output image as described in
the [CodeFormer project
page](https://shangchenzhou.com/projects/CodeFormer/). Default values
are 0.75 for both parameters, which achieves a reasonable balance
between changing the image too much and not enough.
[This figure](../assets/features/restoration-montage.png) illustrates
the effects of adjusting GFPGAN and CodeFormer parameters.
<figure markdown>
![upscaling](../assets/features/restoration-montage.png){ width=720 }
</figure>
!!! note
GFPGAN and Real-ESRGAN are both memory intensive. In order to avoid crashes and memory overloads
Real-ESRGAN is memory intensive. In order to avoid crashes and memory overloads
during the Stable Diffusion process, these effects are applied after Stable Diffusion has completed
its work.
In single image generations, you will see the output right away but when you are using multiple
iterations, the images will first be generated and then upscaled and face restored after that
iterations, the images will first be generated and then upscaled after that
process is complete. While the image generation is taking place, you will still be able to preview
the base images.
## How to disable
If, for some reason, you do not wish to load the GFPGAN and/or ESRGAN libraries,
you can disable them on the invoke.py command line with the `--no_restore` and
`--no_esrgan` options, respectively.
If, for some reason, you do not wish to load the ESRGAN libraries,
you can disable them on the invoke.py command line with the `--no_esrgan` options.

View File

@ -4,6 +4,9 @@ title: Overview
Here you can find the documentation for InvokeAI's various features.
## The [Getting Started Guide](../help/gettingStartedWithAI)
A getting started guide for those new to AI image generation.
## The Basics
### * The [Web User Interface](WEB.md)
Guide to the Web interface. Also see the [WebUI Hotkeys Reference Guide](WEBUIHOTKEYS.md)
@ -46,7 +49,7 @@ Personalize models by adding your own style or subjects.
## Other Features
### * [The NSFW Checker](NSFW.md)
### * [The NSFW Checker](WATERMARK+NSFW.md)
Prevent InvokeAI from displaying unwanted racy images.
### * [Controlling Logging](LOGGING.md)

View File

@ -0,0 +1,95 @@
# Getting Started with AI Image Generation
New to image generation with AI? Youre in the right place!
This is a high level walkthrough of some of the concepts and terms youll see as you start using InvokeAI. Please note, this is not an exhaustive guide and may be out of date due to the rapidly changing nature of the space.
## Using InvokeAI
### **Prompt Crafting**
- Prompts are the basis of using InvokeAI, providing the models directions on what to generate. As a general rule of thumb, the more detailed your prompt is, the better your result will be.
*To get started, heres an easy template to use for structuring your prompts:*
- Subject, Style, Quality, Aesthetic
- **Subject:** What your image will be about. E.g. “a futuristic city with trains”, “penguins floating on icebergs”, “friends sharing beers”
- **Style:** The style or medium in which your image will be in. E.g. “photograph”, “pencil sketch”, “oil paints”, or “pop art”, “cubism”, “abstract”
- **Quality:** A particular aspect or trait that you would like to see emphasized in your image. E.g. "award-winning", "featured in {relevant set of high quality works}", "professionally acclaimed". Many people often use "masterpiece".
- **Aesthetics:** The visual impact and design of the artwork. This can be colors, mood, lighting, setting, etc.
- There are two prompt boxes: *Positive Prompt* & *Negative Prompt*.
- A **Positive** Prompt includes words you want the model to reference when creating an image.
- Negative Prompt is for anything you want the model to eliminate when creating an image. It doesnt always interpret things exactly the way you would, but helps control the generation process. Always try to include a few terms - you can typically use lower quality image terms like “blurry” or “distorted” with good success.
- Some examples prompts you can try on your own:
- A detailed oil painting of a tranquil forest at sunset with vibrant+ colors and soft, golden light filtering through the trees
- friends sharing beers in a busy city, realistic colored pencil sketch, twilight, masterpiece, bright, lively
### Generation Workflows
- Invoke offers a number of different workflows for interacting with models to produce images. Each is extremely powerful on its own, but together provide you an unparalleled way of producing high quality creative outputs that align with your vision.
- **Text to Image:** The text to image tab focuses on the key workflow of using a prompt to generate a new image. It includes other features that help control the generation process as well.
- **Image to Image:** With image to image, you provide an image as a reference (called the “initial image”), which provides more guidance around color and structure to the AI as it generates a new image. This is provided alongside the same features as Text to Image.
- **Unified Canvas:** The Unified Canvas is an advanced AI-first image editing tool that is easy to use, but hard to master. Drag an image onto the canvas from your gallery in order to regenerate certain elements, edit content or colors (known as inpainting), or extend the image with an exceptional degree of consistency and clarity (called outpainting).
### Improving Image Quality
- Fine tuning your prompt - the more specific you are, the closer the image will turn out to what is in your head! Adding more details in the Positive Prompt or Negative Prompt can help add / remove pieces of your image to improve it - You can also use advanced techniques like upweighting and downweighting to control the influence of certain words. [Learn more here](https://invoke-ai.github.io/InvokeAI/features/PROMPTS/#prompt-syntax-features).
- **Tip: If youre seeing poor results, try adding the things you dont like about the image to your negative prompt may help. E.g. distorted, low quality, unrealistic, etc.**
- Explore different models - Other models can produce different results due to the data theyve been trained on. Each model has specific language and settings it works best with; a models documentation is your friend here. Play around with some and see what works best for you!
- Increasing Steps - The number of steps used controls how much time the model is given to produce an image, and depends on the “Scheduler” used. The schedule controls how each step is processed by the model. More steps tends to mean better results, but will take longer - We recommend at least 30 steps for most
- Tweak and Iterate - Remember, its best to change one thing at a time so you know what is working and what isn't. Sometimes you just need to try a new image, and other times using a new prompt might be the ticket. For testing, consider turning off the “random” Seed - Using the same seed with the same settings will produce the same image, which makes it the perfect way to learn exactly what your changes are doing.
- Explore Advanced Settings - InvokeAI has a full suite of tools available to allow you complete control over your image creation process - Check out our [docs if you want to learn more](https://invoke-ai.github.io/InvokeAI/features/).
## Terms & Concepts
If you're interested in learning more, check out [this presentation](https://docs.google.com/presentation/d/1IO78i8oEXFTZ5peuHHYkVF-Y3e2M6iM5tCnc-YBfcCM/edit?usp=sharing) from one of our maintainers (@lstein).
### Stable Diffusion
Stable Diffusion is deep learning, text-to-image model that is the foundation of the capabilities found in InvokeAI. Since the release of Stable Diffusion, there have been many subsequent models created based on Stable Diffusion that are designed to generate specific types of images.
### Prompts
Prompts provide the models directions on what to generate. As a general rule of thumb, the more detailed your prompt is, the better your result will be.
### Models
Models are the magic that power InvokeAI. These files represent the output of training a machine on understanding massive amounts of images - providing them with the capability to generate new images using just a text description of what youd like to see. (Like Stable Diffusion!)
Invoke offers a simple way to download several different models upon installation, but many more can be discovered online, including at ****. Each model can produce a unique style of output, based on the images it was trained on - Try out different models to see which best fits your creative vision!
- *Models that contain “inpainting” in the name are designed for use with the inpainting feature of the Unified Canvas*
### Scheduler
Schedulers guide the process of removing noise (de-noising) from data. They determine:
1. The number of steps to take to remove the noise.
2. Whether the steps are random (stochastic) or predictable (deterministic).
3. The specific method (algorithm) used for de-noising.
Experimenting with different schedulers is recommended as each will produce different outputs!
### Steps
The number of de-noising steps each generation through.
Schedulers can be intricate and there's often a balance to strike between how quickly they can de-noise data and how well they can do it. It's typically advised to experiment with different schedulers to see which one gives the best results. There has been a lot written on the internet about different schedulers, as well as exploring what the right level of "steps" are for each. You can save generation time by reducing the number of steps used, but you'll want to make sure that you are satisfied with the quality of images produced!
### Low-Rank Adaptations / LoRAs
Low-Rank Adaptations (LoRAs) are like a smaller, more focused version of models, intended to focus on training a better understanding of how a specific character, style, or concept looks.
### Textual Inversion Embeddings
Textual Inversion Embeddings, like LoRAs, assist with more easily prompting for certain characters, styles, or concepts. However, embeddings are trained to update the relationship between a specific word (known as the “trigger”) and the intended output.
### ControlNet
ControlNets are neural network models that are able to extract key features from an existing image and use these features to guide the output of the image generation model.
### VAE
Variational auto-encoder (VAE) is a encode/decode model that translates the "latents" image produced during the image generation procees to the large pixel images that we see.

View File

@ -11,6 +11,33 @@ title: Home
```
-->
<!-- CSS styling -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.2.1/css/fontawesome.min.css">
<style>
.button {
width: 300px;
height: 50px;
background-color: #448AFF;
color: #fff;
font-size: 16px;
border: none;
cursor: pointer;
border-radius: 0.2rem;
}
.button-container {
display: grid;
grid-template-columns: repeat(3, 300px);
gap: 20px;
}
.button:hover {
background-color: #526CFE;
}
</style>
<div align="center" markdown>
@ -70,61 +97,23 @@ image-to-image generator. It provides a streamlined process with various new
features and options to aid the image generation process. It runs on Windows,
Mac and Linux machines, and runs on GPU cards with as little as 4 GB of RAM.
**Quick links**: [<a href="https://discord.gg/ZmtBAhwWhy">Discord Server</a>]
[<a href="https://github.com/invoke-ai/InvokeAI/">Code and Downloads</a>] [<a
href="https://github.com/invoke-ai/InvokeAI/issues">Bug Reports</a>] [<a
href="https://github.com/invoke-ai/InvokeAI/discussions">Discussion, Ideas &
Q&A</a>]
<div align="center"><img src="assets/invoke-web-server-1.png" width=640></div>
!!! note
!!! Note
This fork is rapidly evolving. Please use the [Issues tab](https://github.com/invoke-ai/InvokeAI/issues) to report bugs and make feature requests. Be sure to use the provided templates. They will help aid diagnose issues faster.
This project is rapidly evolving. Please use the [Issues tab](https://github.com/invoke-ai/InvokeAI/issues) to report bugs and make feature requests. Be sure to use the provided templates as it will help aid response time.
## :octicons-package-dependencies-24: Installation
## :octicons-link-24: Quick Links
This fork is supported across Linux, Windows and Macintosh. Linux users can use
either an Nvidia-based card (with CUDA support) or an AMD card (using the ROCm
driver).
### [Installation Getting Started Guide](installation)
#### **[Automated Installer](installation/010_INSTALL_AUTOMATED.md)**
✅ This is the recommended installation method for first-time users.
#### [Manual Installation](installation/020_INSTALL_MANUAL.md)
This method is recommended for experienced users and developers
#### [Docker Installation](installation/040_INSTALL_DOCKER.md)
This method is recommended for those familiar with running Docker containers
### Other Installation Guides
- [PyPatchMatch](installation/060_INSTALL_PATCHMATCH.md)
- [XFormers](installation/070_INSTALL_XFORMERS.md)
- [CUDA and ROCm Drivers](installation/030_INSTALL_CUDA_AND_ROCM.md)
- [Installing New Models](installation/050_INSTALLING_MODELS.md)
## :fontawesome-solid-computer: Hardware Requirements
### :octicons-cpu-24: System
You wil need one of the following:
- :simple-nvidia: An NVIDIA-based graphics card with 4 GB or more VRAM memory.
- :simple-amd: An AMD-based graphics card with 4 GB or more VRAM memory (Linux
only)
- :fontawesome-brands-apple: An Apple computer with an M1 chip.
We do **not recommend** the following video cards due to issues with their
running in half-precision mode and having insufficient VRAM to render 512x512
images in full-precision mode:
- NVIDIA 10xx series cards such as the 1080ti
- GTX 1650 series cards
- GTX 1660 series cards
### :fontawesome-solid-memory: Memory and Disk
- At least 12 GB Main Memory RAM.
- At least 18 GB of free disk space for the machine learning model, Python, and
all its dependencies.
<div class="button-container">
<a href="installation/INSTALLATION"> <button class="button">Installation</button> </a>
<a href="features/"> <button class="button">Features</button> </a>
<a href="help/gettingStartedWithAI/"> <button class="button">Getting Started</button> </a>
<a href="contributing/CONTRIBUTING/"> <button class="button">Contributing</button> </a>
<a href="https://github.com/invoke-ai/InvokeAI/"> <button class="button">Code and Downloads</button> </a>
<a href="https://github.com/invoke-ai/InvokeAI/issues"> <button class="button">Bug Reports </button> </a>
<a href="https://discord.gg/ZmtBAhwWhy"> <button class="button"> Join the Discord Server!</button> </a>
</div>
## :octicons-gift-24: InvokeAI Features
@ -230,7 +219,7 @@ encouraged to do so.
## :octicons-person-24: Contributors
This fork is a combined effort of various people from across the world.
This software is a combined effort of various people from across the world.
[Check out the list of all these amazing people](other/CONTRIBUTORS.md). We
thank them for their time, hard work and effort.

View File

@ -264,7 +264,7 @@ experimental versions later.
you can create several levels of subfolders and drop your models into
whichever ones you want.
- ***Autoimport FolderLICENSE***
- ***LICENSE***
At the bottom of the screen you will see a checkbox for accepting
the CreativeML Responsible AI Licenses. You need to accept the license
@ -372,8 +372,71 @@ experimental versions later.
Once InvokeAI is installed, do not move or remove this directory."
<a name="troubleshooting"></a>
## Troubleshooting
### _OSErrors on Windows while installing dependencies_
During a zip file installation or an online update, installation stops
with an error like this:
![broken-dependency-screenshot](../assets/troubleshooting/broken-dependency.png){:width="800px"}
This seems to happen particularly often with the `pydantic` and
`numpy` packages. The most reliable solution requires several manual
steps to complete installation.
Open up a Powershell window and navigate to the `invokeai` directory
created by the installer. Then give the following series of commands:
```cmd
rm .\.venv -r -force
python -mvenv .venv
.\.venv\Scripts\activate
pip install invokeai
invokeai-configure --yes --root .
```
If you see anything marked as an error during this process please stop
and seek help on the Discord [installation support
channel](https://discord.com/channels/1020123559063990373/1041391462190956654). A
few warning messages are OK.
If you are updating from a previous version, this should restore your
system to a working state. If you are installing from scratch, there
is one additional command to give:
```cmd
wget -O invoke.bat https://raw.githubusercontent.com/invoke-ai/InvokeAI/main/installer/templates/invoke.bat.in
```
This will create the `invoke.bat` script needed to launch InvokeAI and
its related programs.
### _Stable Diffusion XL Generation Fails after Trying to Load unet_
InvokeAI is working in other respects, but when trying to generate
images with Stable Diffusion XL you get a "Server Error". The text log
in the launch window contains this log line above several more lines of
error messages:
```INFO --> Loading model:D:\LONG\PATH\TO\MODEL, type sdxl:main:unet```
This failure mode occurs when there is a network glitch during
downloading the very large SDXL model.
To address this, first go to the Web Model Manager and delete the
Stable-Diffusion-XL-base-1.X model. Then navigate to HuggingFace and
manually download the .safetensors version of the model. The 1.0
version is located at
https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/tree/main
and the file is named `sd_xl_base_1.0.safetensors`.
Save this file to disk and then reenter the Model Manager. Navigate to
Import Models->Add Model, then type (or drag-and-drop) the path to the
.safetensors file. Press "Add Model".
### _Package dependency conflicts_
If you have previously installed InvokeAI or another Stable Diffusion
@ -408,7 +471,7 @@ Then type the following commands:
=== "NVIDIA System"
```bash
pip install torch torchvision --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu117
pip install torch torchvision --force-reinstall --extra-index-url https://download.pytorch.org/whl/cu118
pip install xformers
```

View File

@ -148,7 +148,7 @@ manager, please follow these steps:
=== "CUDA (NVidia)"
```bash
pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117
pip install "InvokeAI[xformers]" --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu118
```
=== "ROCm (AMD)"
@ -192,8 +192,10 @@ manager, please follow these steps:
your outputs.
```terminal
invokeai-configure
invokeai-configure --root .
```
Don't miss the dot at the end of the command!
The script `invokeai-configure` will interactively guide you through the
process of downloading and installing the weights files needed for InvokeAI.
@ -225,12 +227,6 @@ manager, please follow these steps:
!!! warning "Make sure that the virtual environment is activated, which should create `(.venv)` in front of your prompt!"
=== "CLI"
```bash
invokeai
```
=== "local Webserver"
```bash
@ -243,6 +239,12 @@ manager, please follow these steps:
invokeai --web --host 0.0.0.0
```
=== "CLI"
```bash
invokeai
```
If you choose the run the web interface, point your browser at
http://localhost:9090 in order to load the GUI.
@ -310,7 +312,7 @@ installation protocol (important!)
=== "CUDA (NVidia)"
```bash
pip install -e .[xformers] --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117
pip install -e .[xformers] --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu118
```
=== "ROCm (AMD)"
@ -354,7 +356,7 @@ you can do so using this unsupported recipe:
mkdir ~/invokeai
conda create -n invokeai python=3.10
conda activate invokeai
pip install InvokeAI[xformers] --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu117
pip install InvokeAI[xformers] --use-pep517 --extra-index-url https://download.pytorch.org/whl/cu118
invokeai-configure --root ~/invokeai
invokeai --root ~/invokeai --web
```

View File

@ -34,11 +34,11 @@ directly from NVIDIA. **Do not try to install Ubuntu's
nvidia-cuda-toolkit package. It is out of date and will cause
conflicts among the NVIDIA driver and binaries.**
Go to [CUDA Toolkit 11.7
Downloads](https://developer.nvidia.com/cuda-11-7-0-download-archive),
and use the target selection wizard to choose your operating system,
hardware platform, and preferred installation method (e.g. "local"
versus "network").
Go to [CUDA Toolkit
Downloads](https://developer.nvidia.com/cuda-downloads), and use the
target selection wizard to choose your operating system, hardware
platform, and preferred installation method (e.g. "local" versus
"network").
This will provide you with a downloadable install file or, depending
on your choices, a recipe for downloading and running a install shell
@ -61,7 +61,7 @@ Runtime Site](https://developer.nvidia.com/nvidia-container-runtime)
When installing torch and torchvision manually with `pip`, remember to provide
the argument `--extra-index-url
https://download.pytorch.org/whl/cu117` as described in the [Manual
https://download.pytorch.org/whl/cu118` as described in the [Manual
Installation Guide](020_INSTALL_MANUAL.md).
## :simple-amd: ROCm

View File

@ -124,7 +124,7 @@ installation. Examples:
invokeai-model-install --list controlnet
# (install the model at the indicated URL)
invokeai-model-install --add http://civitai.com/2860
invokeai-model-install --add https://civitai.com/api/download/models/128713
# (delete the named model)
invokeai-model-install --delete sd-1/main/analog-diffusion
@ -170,4 +170,4 @@ elsewhere on disk and they will be autoimported. You can also create
subfolders and organize them as you wish.
The location of the autoimport directories are controlled by settings
in `invokeai.yaml`. See [Configuration](../features/CONFIGURATION.md).
in `invokeai.yaml`. See [Configuration](../features/CONFIGURATION.md).

View File

@ -28,18 +28,21 @@ command line, then just be sure to activate it's virtual environment.
Then run the following three commands:
```sh
pip install xformers==0.0.16rc425
pip install triton
pip install xformers~=0.0.19
pip install triton # WON'T WORK ON WINDOWS
python -m xformers.info output
```
The first command installs `xformers`, the second installs the
`triton` training accelerator, and the third prints out the `xformers`
installation status. If all goes well, you'll see a report like the
installation status. On Windows, please omit the `triton` package,
which is not available on that platform.
If all goes well, you'll see a report like the
following:
```sh
xFormers 0.0.16rc425
xFormers 0.0.20
memory_efficient_attention.cutlassF: available
memory_efficient_attention.cutlassB: available
memory_efficient_attention.flshattF: available
@ -48,22 +51,28 @@ memory_efficient_attention.smallkF: available
memory_efficient_attention.smallkB: available
memory_efficient_attention.tritonflashattF: available
memory_efficient_attention.tritonflashattB: available
indexing.scaled_index_addF: available
indexing.scaled_index_addB: available
indexing.index_select: available
swiglu.dual_gemm_silu: available
swiglu.gemm_fused_operand_sum: available
swiglu.fused.p.cpp: available
is_triton_available: True
is_functorch_available: False
pytorch.version: 1.13.1+cu117
pytorch.version: 2.0.1+cu118
pytorch.cuda: available
gpu.compute_capability: 8.6
gpu.name: NVIDIA RTX A2000 12GB
gpu.compute_capability: 8.9
gpu.name: NVIDIA GeForce RTX 4070
build.info: available
build.cuda_version: 1107
build.python_version: 3.10.9
build.torch_version: 1.13.1+cu117
build.cuda_version: 1108
build.python_version: 3.10.11
build.torch_version: 2.0.1+cu118
build.env.TORCH_CUDA_ARCH_LIST: 5.0+PTX 6.0 6.1 7.0 7.5 8.0 8.6
build.env.XFORMERS_BUILD_TYPE: Release
build.env.XFORMERS_ENABLE_DEBUG_ASSERTIONS: None
build.env.NVCC_FLAGS: None
build.env.XFORMERS_PACKAGE_FROM: wheel-v0.0.16rc425
build.env.XFORMERS_PACKAGE_FROM: wheel-v0.0.20
build.nvcc_version: 11.8.89
source.privacy: open source
```
@ -83,14 +92,14 @@ installed from source. These instructions were written for a system
running Ubuntu 22.04, but other Linux distributions should be able to
adapt this recipe.
#### 1. Install CUDA Toolkit 11.7
#### 1. Install CUDA Toolkit 11.8
You will need the CUDA developer's toolkit in order to compile and
install xFormers. **Do not try to install Ubuntu's nvidia-cuda-toolkit
package.** It is out of date and will cause conflicts among the NVIDIA
driver and binaries. Instead install the CUDA Toolkit package provided
by NVIDIA itself. Go to [CUDA Toolkit 11.7
Downloads](https://developer.nvidia.com/cuda-11-7-0-download-archive)
by NVIDIA itself. Go to [CUDA Toolkit 11.8
Downloads](https://developer.nvidia.com/cuda-11-8-0-download-archive)
and use the target selection wizard to choose your platform and Linux
distribution. Select an installer type of "runfile (local)" at the
last step.
@ -101,17 +110,17 @@ example, the install script recipe for Ubuntu 22.04 running on a
x86_64 system is:
```
wget https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
sudo sh cuda_11.7.0_515.43.04_linux.run
wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
sudo sh cuda_11.8.0_520.61.05_linux.run
```
Rather than cut-and-paste this example, We recommend that you walk
through the toolkit wizard in order to get the most up to date
installer for your system.
#### 2. Confirm/Install pyTorch 1.13 with CUDA 11.7 support
#### 2. Confirm/Install pyTorch 2.01 with CUDA 11.8 support
If you are using InvokeAI 2.3 or higher, these will already be
If you are using InvokeAI 3.0.2 or higher, these will already be
installed. If not, you can check whether you have the needed libraries
using a quick command. Activate the invokeai virtual environment,
either by entering the "developer's console", or manually with a
@ -124,7 +133,7 @@ Then run the command:
python -c 'exec("import torch\nprint(torch.__version__)")'
```
If it prints __1.13.1+cu117__ you're good. If not, you can install the
If it prints __1.13.1+cu118__ you're good. If not, you can install the
most up to date libraries with this command:
```sh

View File

@ -1,6 +1,4 @@
---
title: Overview
---
# Overview
We offer several ways to install InvokeAI, each one suited to your
experience and preferences. We suggest that everyone start by
@ -15,6 +13,56 @@ See the [troubleshooting
section](010_INSTALL_AUTOMATED.md#troubleshooting) of the automated
install guide for frequently-encountered installation issues.
This fork is supported across Linux, Windows and Macintosh. Linux users can use
either an Nvidia-based card (with CUDA support) or an AMD card (using the ROCm
driver).
### [Installation Getting Started Guide](installation)
#### **[Automated Installer](010_INSTALL_AUTOMATED.md)**
✅ This is the recommended installation method for first-time users.
#### [Manual Installation](020_INSTALL_MANUAL.md)
This method is recommended for experienced users and developers
#### [Docker Installation](040_INSTALL_DOCKER.md)
This method is recommended for those familiar with running Docker containers
### Other Installation Guides
- [PyPatchMatch](installation/060_INSTALL_PATCHMATCH.md)
- [XFormers](installation/070_INSTALL_XFORMERS.md)
- [CUDA and ROCm Drivers](installation/030_INSTALL_CUDA_AND_ROCM.md)
- [Installing New Models](installation/050_INSTALLING_MODELS.md)
## :fontawesome-solid-computer: Hardware Requirements
### :octicons-cpu-24: System
You wil need one of the following:
- :simple-nvidia: An NVIDIA-based graphics card with 4 GB or more VRAM memory.
- :simple-amd: An AMD-based graphics card with 4 GB or more VRAM memory (Linux
only)
- :fontawesome-brands-apple: An Apple computer with an M1 chip.
** SDXL 1.0 Requirements*
To use SDXL, user must have one of the following:
- :simple-nvidia: An NVIDIA-based graphics card with 8 GB or more VRAM memory.
- :simple-amd: An AMD-based graphics card with 16 GB or more VRAM memory (Linux
only)
- :fontawesome-brands-apple: An Apple computer with an M1 chip.
### :fontawesome-solid-memory: Memory and Disk
- At least 12 GB Main Memory RAM.
- At least 18 GB of free disk space for the machine learning model, Python, and
all its dependencies.
We do **not recommend** the following video cards due to issues with their
running in half-precision mode and having insufficient VRAM to render 512x512
images in full-precision mode:
- NVIDIA 10xx series cards such as the 1080ti
- GTX 1650 series cards
- GTX 1660 series cards
## Installation options
1. [Automated Installer](010_INSTALL_AUTOMATED.md)

View File

@ -14,23 +14,28 @@ The nodes linked below have been developed and contributed by members of the Inv
## List of Nodes
### Face Mask
### FaceTools
**Description:** This node autodetects a face in the image using MediaPipe and masks it by making it transparent. Via outpainting you can swap faces with other faces, or invert the mask and swap things around the face with other things. Additionally, you can supply X and Y offset values to scale/change the shape of the mask for finer control. The node also outputs an all-white mask in the same dimensions as the input image. This is needed by the inpaint node (and unified canvas) for outpainting.
**Description:** FaceTools is a collection of nodes created to manipulate faces as you would in Unified Canvas. It includes FaceMask, FaceOff, and FacePlace. FaceMask autodetects a face in the image using MediaPipe and creates a mask from it. FaceOff similarly detects a face, then takes the face off of the image by adding a square bounding box around it and cropping/scaling it. FacePlace puts the bounded face image from FaceOff back onto the original image. Using these nodes with other inpainting node(s), you can put new faces on existing things, put new things around existing faces, and work closer with a face as a bounded image. Additionally, you can supply X and Y offset values to scale/change the shape of the mask for finer control on FaceMask and FaceOff. See GitHub repository below for usage examples.
**Node Link:** https://github.com/ymgenesis/InvokeAI/blob/facemaskmediapipe/invokeai/app/invocations/facemask.py
**Node Link:** https://github.com/ymgenesis/FaceTools/
**Example Node Graph:** https://www.mediafire.com/file/gohn5sb1bfp8use/21-July_2023-FaceMask.json/file
**FaceMask Output Examples**
**Output Examples**
![5cc8abce-53b0-487a-b891-3bf94dcc8960](https://github.com/invoke-ai/InvokeAI/assets/25252829/43f36d24-1429-4ab1-bd06-a4bedfe0955e)
![b920b710-1882-49a0-8d02-82dff2cca907](https://github.com/invoke-ai/InvokeAI/assets/25252829/7660c1ed-bf7d-4d0a-947f-1fc1679557ba)
![71a91805-fda5-481c-b380-264665703133](https://github.com/invoke-ai/InvokeAI/assets/25252829/f8f6a2ee-2b68-4482-87da-b90221d5c3e2)
![2e3168cb-af6a-475d-bfac-c7b7fd67b4c2](https://github.com/ymgenesis/InvokeAI/assets/25252829/a5ad7d44-2ada-4b3c-a56e-a21f8244a1ac)
![2_annotated](https://github.com/ymgenesis/InvokeAI/assets/25252829/53416c8a-a23b-4d76-bb6d-3cfd776e0096)
![2fe2150c-fd08-4e26-8c36-f0610bf441bb](https://github.com/ymgenesis/InvokeAI/assets/25252829/b0f7ecfe-f093-4147-a904-b9f131b41dc9)
![831b6b98-4f0f-4360-93c8-69a9c1338cbe](https://github.com/ymgenesis/InvokeAI/assets/25252829/fc7b0622-e361-4155-8a76-082894d084f0)
<hr>
### Ideal Size
**Description:** This node calculates an ideal image size for a first pass of a multi-pass upscaling. The aim is to avoid duplication that results from choosing a size larger than the model is capable of.
**Node Link:** https://github.com/JPPhoto/ideal-size-node
--------------------------------
### Super Cool Node Template
### Example Node Template
**Description:** This node allows you to do super cool things with InvokeAI.
@ -40,13 +45,9 @@ The nodes linked below have been developed and contributed by members of the Inv
**Output Examples**
![Invoke AI](https://invoke-ai.github.io/InvokeAI/assets/invoke_ai_banner.png)
### Ideal Size
**Description:** This node calculates an ideal image size for a first pass of a multi-pass upscaling. The aim is to avoid duplication that results from choosing a size larger than the model is capable of.
**Node Link:** https://github.com/JPPhoto/ideal-size-node
![Example Image](https://invoke-ai.github.io/InvokeAI/assets/invoke_ai_banner.png){: style="height:115px;width:240px"}
## Help
If you run into any issues with a node, please post in the [InvokeAI Discord](https://discord.gg/ZmtBAhwWhy).

25
flake.lock generated Normal file
View File

@ -0,0 +1,25 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1690630721,
"narHash": "sha256-Y04onHyBQT4Erfr2fc82dbJTfXGYrf4V0ysLUYnPOP8=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d2b52322f35597c62abf56de91b0236746b2a03d",
"type": "github"
},
"original": {
"id": "nixpkgs",
"type": "indirect"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

91
flake.nix Normal file
View File

@ -0,0 +1,91 @@
# Important note: this flake does not attempt to create a fully isolated, 'pure'
# Python environment for InvokeAI. Instead, it depends on local invocations of
# virtualenv/pip to install the required (binary) packages, most importantly the
# prebuilt binary pytorch packages with CUDA support.
# ML Python packages with CUDA support, like pytorch, are notoriously expensive
# to compile so it's purposefuly not what this flake does.
{
description = "An (impure) flake to develop on InvokeAI.";
outputs = { self, nixpkgs }:
let
system = "x86_64-linux";
pkgs = import nixpkgs {
inherit system;
config.allowUnfree = true;
};
python = pkgs.python310;
mkShell = { dir, install }:
let
setupScript = pkgs.writeScript "setup-invokai" ''
# This must be sourced using 'source', not executed.
${python}/bin/python -m venv ${dir}
${dir}/bin/python -m pip install ${install}
# ${dir}/bin/python -c 'import torch; assert(torch.cuda.is_available())'
source ${dir}/bin/activate
'';
in
pkgs.mkShell rec {
buildInputs = with pkgs; [
# Backend: graphics, CUDA.
cudaPackages.cudnn
cudaPackages.cuda_nvrtc
cudatoolkit
pkgconfig
libconfig
cmake
blas
freeglut
glib
gperf
procps
libGL
libGLU
linuxPackages.nvidia_x11
python
(opencv4.override {
enableGtk3 = true;
enableFfmpeg = true;
enableCuda = true;
enableUnfree = true;
})
stdenv.cc
stdenv.cc.cc.lib
xorg.libX11
xorg.libXext
xorg.libXi
xorg.libXmu
xorg.libXrandr
xorg.libXv
zlib
# Pre-commit hooks.
black
# Frontend.
yarn
nodejs
];
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs;
CUDA_PATH = pkgs.cudatoolkit;
EXTRA_LDFLAGS = "-L${pkgs.linuxPackages.nvidia_x11}/lib";
shellHook = ''
if [[ -f "${dir}/bin/activate" ]]; then
source "${dir}/bin/activate"
echo "Using Python: $(which python)"
else
echo "Use 'source ${setupScript}' to set up the environment."
fi
'';
};
in
{
devShells.${system} = rec {
develop = mkShell { dir = "venv"; install = "-e '.[xformers]' --extra-index-url https://download.pytorch.org/whl/cu118"; };
default = develop;
};
};
}

View File

@ -13,7 +13,7 @@ from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Union
SUPPORTED_PYTHON = ">=3.9.0,<3.11"
SUPPORTED_PYTHON = ">=3.9.0,<=3.11.100"
INSTALLER_REQS = ["rich", "semver", "requests", "plumbum", "prompt-toolkit"]
BOOTSTRAP_VENV_PREFIX = "invokeai-installer-tmp"
@ -149,7 +149,7 @@ class Installer:
return venv_dir
def install(
self, root: str = "~/invokeai-3", version: str = "latest", yes_to_all=False, find_links: Path = None
self, root: str = "~/invokeai", version: str = "latest", yes_to_all=False, find_links: Path = None
) -> None:
"""
Install the InvokeAI application into the given runtime path
@ -168,7 +168,8 @@ class Installer:
messages.welcome()
self.dest = Path(root).expanduser().resolve() if yes_to_all else messages.dest_path(root)
default_path = os.environ.get("INVOKEAI_ROOT") or Path(root).expanduser().resolve()
self.dest = default_path if yes_to_all else messages.dest_path(root)
# create the venv for the app
self.venv = self.app_venv()
@ -248,6 +249,9 @@ class InvokeAiInstance:
pip[
"install",
"--require-virtualenv",
"numpy~=1.24.0", # choose versions that won't be uninstalled during phase 2
"urllib3~=1.26.0",
"requests~=2.28.0",
"torch~=2.0.0",
"torchmetrics==0.11.4",
"torchvision>=0.14.1",
@ -344,7 +348,7 @@ class InvokeAiInstance:
introduction()
from invokeai.frontend.install import invokeai_configure
from invokeai.frontend.install.invokeai_configure import invokeai_configure
# NOTE: currently the config script does its own arg parsing! this means the command-line switches
# from the installer will also automatically propagate down to the config script.
@ -451,7 +455,7 @@ def get_torch_source() -> (Union[str, None], str):
device = graphical_accelerator()
url = None
optional_modules = None
optional_modules = "[onnx]"
if OS == "Linux":
if device == "rocm":
url = "https://download.pytorch.org/whl/rocm5.4.2"
@ -459,8 +463,11 @@ def get_torch_source() -> (Union[str, None], str):
url = "https://download.pytorch.org/whl/cpu"
if device == "cuda":
url = "https://download.pytorch.org/whl/cu117"
optional_modules = "[xformers]"
url = "https://download.pytorch.org/whl/cu118"
optional_modules = "[xformers,onnx-cuda]"
if device == "cuda_and_dml":
url = "https://download.pytorch.org/whl/cu118"
optional_modules = "[xformers,onnx-directml]"
# in all other cases, Torch wheels should be coming from PyPi as of Torch 1.13

View File

@ -3,6 +3,7 @@ InvokeAI Installer
"""
import argparse
import os
from pathlib import Path
from installer import Installer
@ -15,7 +16,7 @@ if __name__ == "__main__":
dest="root",
type=str,
help="Destination path for installation",
default="~/invokeai",
default=os.environ.get("INVOKEAI_ROOT") or "~/invokeai",
)
parser.add_argument(
"-y",

View File

@ -167,6 +167,10 @@ def graphical_accelerator():
"an [gold1 b]NVIDIA[/] GPU (using CUDA™)",
"cuda",
)
nvidia_with_dml = (
"an [gold1 b]NVIDIA[/] GPU (using CUDA™, and DirectML™ for ONNX) -- ALPHA",
"cuda_and_dml",
)
amd = (
"an [gold1 b]AMD[/] GPU (using ROCm™)",
"rocm",
@ -181,7 +185,7 @@ def graphical_accelerator():
)
if OS == "Windows":
options = [nvidia, cpu]
options = [nvidia, nvidia_with_dml, cpu]
if OS == "Linux":
options = [nvidia, amd, cpu]
elif OS == "Darwin":

View File

@ -8,16 +8,13 @@ Preparations:
to work. Instructions are given here:
https://invoke-ai.github.io/InvokeAI/installation/INSTALL_AUTOMATED/
NOTE: At this time we do not recommend Python 3.11. We recommend
Version 3.10.9, which has been extensively tested with InvokeAI.
Before you start the installer, please open up your system's command
line window (Terminal or Command) and type the commands:
python --version
If all is well, it will print "Python 3.X.X", where the version number
is at least 3.9.1, and less than 3.11.
is at least 3.9.*, and not higher than 3.11.*.
If this works, check the version of the Python package manager, pip:

View File

@ -41,7 +41,7 @@ IF /I "%choice%" == "1" (
python .venv\Scripts\invokeai-configure.exe --skip-sd-weight --skip-support-models
) ELSE IF /I "%choice%" == "7" (
echo Running invokeai-configure...
python .venv\Scripts\invokeai-configure.exe --yes --default_only
python .venv\Scripts\invokeai-configure.exe --yes --skip-sd-weight
) ELSE IF /I "%choice%" == "8" (
echo Developer Console
echo Python command is:

View File

@ -82,7 +82,7 @@ do_choice() {
7)
clear
printf "Re-run the configure script to fix a broken install or to complete a major upgrade\n"
invokeai-configure --root ${INVOKEAI_ROOT} --yes --default_only
invokeai-configure --root ${INVOKEAI_ROOT} --yes --default_only --skip-sd-weights
;;
8)
clear

View File

@ -1,7 +1,7 @@
# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)
from typing import Optional
from logging import Logger
import os
from invokeai.app.services.board_image_record_storage import (
SqliteBoardImageRecordStorage,
)
@ -29,6 +29,7 @@ from ..services.invoker import Invoker
from ..services.processor import DefaultInvocationProcessor
from ..services.sqlite import SqliteItemStorage
from ..services.model_manager_service import ModelManagerService
from ..services.invocation_stats import InvocationStatsService
from .events import FastAPIEventService
@ -54,7 +55,7 @@ logger = InvokeAILogger.getLogger()
class ApiDependencies:
"""Contains and initializes all dependencies for the API"""
invoker: Invoker = None
invoker: Invoker
@staticmethod
def initialize(config: InvokeAIAppConfig, event_handler_id: int, logger: Logger = logger):
@ -67,8 +68,9 @@ class ApiDependencies:
output_folder = config.output_path
# TODO: build a file/path manager?
db_location = config.db_path
db_location.parent.mkdir(parents=True, exist_ok=True)
db_path = config.db_path
db_path.parent.mkdir(parents=True, exist_ok=True)
db_location = str(db_path)
graph_execution_manager = SqliteItemStorage[GraphExecutionState](
filename=db_location, table_name="graph_executions"
@ -127,6 +129,7 @@ class ApiDependencies:
graph_execution_manager=graph_execution_manager,
processor=DefaultInvocationProcessor(),
configuration=config,
performance_statistics=InvocationStatsService(graph_execution_manager),
logger=logger,
)

View File

@ -1,24 +1,30 @@
from fastapi import Body, HTTPException, Path, Query
from fastapi import Body, HTTPException
from fastapi.routing import APIRouter
from invokeai.app.services.board_record_storage import BoardRecord, BoardChanges
from invokeai.app.services.image_record_storage import OffsetPaginatedResults
from invokeai.app.services.models.board_record import BoardDTO
from invokeai.app.services.models.image_record import ImageDTO
from pydantic import BaseModel, Field
from ..dependencies import ApiDependencies
board_images_router = APIRouter(prefix="/v1/board_images", tags=["boards"])
class AddImagesToBoardResult(BaseModel):
board_id: str = Field(description="The id of the board the images were added to")
added_image_names: list[str] = Field(description="The image names that were added to the board")
class RemoveImagesFromBoardResult(BaseModel):
removed_image_names: list[str] = Field(description="The image names that were removed from their board")
@board_images_router.post(
"/",
operation_id="create_board_image",
operation_id="add_image_to_board",
responses={
201: {"description": "The image was added to a board successfully"},
},
status_code=201,
)
async def create_board_image(
async def add_image_to_board(
board_id: str = Body(description="The id of the board to add to"),
image_name: str = Body(description="The name of the image to add"),
):
@ -29,26 +35,78 @@ async def create_board_image(
)
return result
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to add to board")
raise HTTPException(status_code=500, detail="Failed to add image to board")
@board_images_router.delete(
"/",
operation_id="remove_board_image",
operation_id="remove_image_from_board",
responses={
201: {"description": "The image was removed from the board successfully"},
},
status_code=201,
)
async def remove_board_image(
board_id: str = Body(description="The id of the board"),
image_name: str = Body(description="The name of the image to remove"),
async def remove_image_from_board(
image_name: str = Body(description="The name of the image to remove", embed=True),
):
"""Deletes a board_image"""
"""Removes an image from its board, if it had one"""
try:
result = ApiDependencies.invoker.services.board_images.remove_image_from_board(
board_id=board_id, image_name=image_name
)
result = ApiDependencies.invoker.services.board_images.remove_image_from_board(image_name=image_name)
return result
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to update board")
raise HTTPException(status_code=500, detail="Failed to remove image from board")
@board_images_router.post(
"/batch",
operation_id="add_images_to_board",
responses={
201: {"description": "Images were added to board successfully"},
},
status_code=201,
response_model=AddImagesToBoardResult,
)
async def add_images_to_board(
board_id: str = Body(description="The id of the board to add to"),
image_names: list[str] = Body(description="The names of the images to add", embed=True),
) -> AddImagesToBoardResult:
"""Adds a list of images to a board"""
try:
added_image_names: list[str] = []
for image_name in image_names:
try:
ApiDependencies.invoker.services.board_images.add_image_to_board(
board_id=board_id, image_name=image_name
)
added_image_names.append(image_name)
except:
pass
return AddImagesToBoardResult(board_id=board_id, added_image_names=added_image_names)
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to add images to board")
@board_images_router.post(
"/batch/delete",
operation_id="remove_images_from_board",
responses={
201: {"description": "Images were removed from board successfully"},
},
status_code=201,
response_model=RemoveImagesFromBoardResult,
)
async def remove_images_from_board(
image_names: list[str] = Body(description="The names of the images to remove", embed=True),
) -> RemoveImagesFromBoardResult:
"""Removes a list of images from their board, if they had one"""
try:
removed_image_names: list[str] = []
for image_name in image_names:
try:
ApiDependencies.invoker.services.board_images.remove_image_from_board(image_name=image_name)
removed_image_names.append(image_name)
except:
pass
return RemoveImagesFromBoardResult(removed_image_names=removed_image_names)
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to remove images from board")

View File

@ -1,21 +1,20 @@
import io
from typing import Optional
from PIL import Image
from fastapi import Body, HTTPException, Path, Query, Request, Response, UploadFile
from fastapi.responses import FileResponse
from fastapi.routing import APIRouter
from PIL import Image
from pydantic import BaseModel
from invokeai.app.invocations.metadata import ImageMetadata
from invokeai.app.models.image import ImageCategory, ResourceOrigin
from invokeai.app.services.image_record_storage import OffsetPaginatedResults
from invokeai.app.services.item_storage import PaginatedResults
from invokeai.app.services.models.image_record import (
ImageDTO,
ImageRecordChanges,
ImageUrlsDTO,
)
from ..dependencies import ApiDependencies
images_router = APIRouter(prefix="/v1/images", tags=["images"])
@ -25,7 +24,7 @@ IMAGE_MAX_AGE = 31536000
@images_router.post(
"/",
"/upload",
operation_id="upload_image",
responses={
201: {"description": "The image was uploaded successfully"},
@ -77,7 +76,7 @@ async def upload_image(
raise HTTPException(status_code=500, detail="Failed to create image")
@images_router.delete("/{image_name}", operation_id="delete_image")
@images_router.delete("/i/{image_name}", operation_id="delete_image")
async def delete_image(
image_name: str = Path(description="The name of the image to delete"),
) -> None:
@ -103,7 +102,7 @@ async def clear_intermediates() -> int:
@images_router.patch(
"/{image_name}",
"/i/{image_name}",
operation_id="update_image",
response_model=ImageDTO,
)
@ -120,7 +119,7 @@ async def update_image(
@images_router.get(
"/{image_name}",
"/i/{image_name}",
operation_id="get_image_dto",
response_model=ImageDTO,
)
@ -136,7 +135,7 @@ async def get_image_dto(
@images_router.get(
"/{image_name}/metadata",
"/i/{image_name}/metadata",
operation_id="get_image_metadata",
response_model=ImageMetadata,
)
@ -151,8 +150,9 @@ async def get_image_metadata(
raise HTTPException(status_code=404)
@images_router.get(
"/{image_name}/full",
@images_router.api_route(
"/i/{image_name}/full",
methods=["GET", "HEAD"],
operation_id="get_image_full",
response_class=Response,
responses={
@ -187,7 +187,7 @@ async def get_image_full(
@images_router.get(
"/{image_name}/thumbnail",
"/i/{image_name}/thumbnail",
operation_id="get_image_thumbnail",
response_class=Response,
responses={
@ -216,7 +216,7 @@ async def get_image_thumbnail(
@images_router.get(
"/{image_name}/urls",
"/i/{image_name}/urls",
operation_id="get_image_urls",
response_model=ImageUrlsDTO,
)
@ -265,3 +265,24 @@ async def list_image_dtos(
)
return image_dtos
class DeleteImagesFromListResult(BaseModel):
deleted_images: list[str]
@images_router.post("/delete", operation_id="delete_images_from_list", response_model=DeleteImagesFromListResult)
async def delete_images_from_list(
image_names: list[str] = Body(description="The list of names of images to delete", embed=True),
) -> DeleteImagesFromListResult:
try:
deleted_images: list[str] = []
for image_name in image_names:
try:
ApiDependencies.invoker.services.images.delete(image_name)
deleted_images.append(image_name)
except:
pass
return DeleteImagesFromListResult(deleted_images=deleted_images)
except Exception as e:
raise HTTPException(status_code=500, detail="Failed to delete images")

View File

@ -104,8 +104,12 @@ async def update_model(
): # model manager moved model path during rename - don't overwrite it
info.path = new_info.get("path")
# replace empty string values with None/null to avoid phenomenon of vae: ''
info_dict = info.dict()
info_dict = {x: info_dict[x] if info_dict[x] else None for x in info_dict.keys()}
ApiDependencies.invoker.services.model_manager.update_model(
model_name=model_name, base_model=base_model, model_type=model_type, model_attributes=info.dict()
model_name=model_name, base_model=base_model, model_type=model_type, model_attributes=info_dict
)
model_raw = ApiDependencies.invoker.services.model_manager.list_model(

View File

@ -211,7 +211,7 @@ def invoke_api():
port = find_port(app_config.port)
if port != app_config.port:
logger.warn(f"Port {app_config.port} in use, using port {port}")
# Start our own event loop for eventing usage
loop = asyncio.new_event_loop()
config = uvicorn.Config(
@ -229,7 +229,7 @@ def invoke_api():
l.handlers.clear()
for ch in logger.handlers:
l.addHandler(ch)
loop.run_until_complete(server.serve())

View File

@ -37,6 +37,7 @@ from invokeai.app.services.image_record_storage import SqliteImageRecordStorage
from invokeai.app.services.images import ImageService, ImageServiceDependencies
from invokeai.app.services.resource_name import SimpleNameService
from invokeai.app.services.urls import LocalUrlService
from invokeai.app.services.invocation_stats import InvocationStatsService
from .services.default_graphs import default_text_to_image_graph_id, create_system_graphs
from .services.latent_storage import DiskLatentsStorage, ForwardCacheLatentsStorage
@ -311,6 +312,7 @@ def invoke_cli():
graph_library=SqliteItemStorage[LibraryGraph](filename=db_location, table_name="graphs"),
graph_execution_manager=graph_execution_manager,
processor=DefaultInvocationProcessor(),
performance_statistics=InvocationStatsService(graph_execution_manager),
logger=logger,
configuration=config,
)

View File

@ -1,6 +1,14 @@
from typing import Literal, Optional, Union, List, Annotated
from pydantic import BaseModel, Field
import re
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationContext, InvocationConfig
from .model import ClipField
from ...backend.util.devices import torch_dtype
from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent
from ...backend.model_management import BaseModelType, ModelType, SubModelType, ModelPatcher
import torch
from compel import Compel, ReturnedEmbeddingsType
from compel.prompt_parser import Blend, Conjunction, CrossAttentionControlSubstitute, FlattenedPrompt, Fragment
@ -8,7 +16,7 @@ from ...backend.util.devices import torch_dtype
from ...backend.model_management import ModelType
from ...backend.model_management.models import ModelNotFoundException
from ...backend.model_management.lora import ModelPatcher
from ...backend.stable_diffusion.diffusion import InvokeAIDiffuserComponent
from ...backend.stable_diffusion import InvokeAIDiffuserComponent, BasicConditioningInfo, SDXLConditioningInfo
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext
from .model import ClipField
from dataclasses import dataclass
@ -21,28 +29,9 @@ class ConditioningField(BaseModel):
schema_extra = {"required": ["conditioning_name"]}
@dataclass
class BasicConditioningInfo:
# type: Literal["basic_conditioning"] = "basic_conditioning"
embeds: torch.Tensor
extra_conditioning: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo]
# weight: float
# mode: ConditioningAlgo
@dataclass
class SDXLConditioningInfo(BasicConditioningInfo):
# type: Literal["sdxl_conditioning"] = "sdxl_conditioning"
pooled_embeds: torch.Tensor
add_time_ids: torch.Tensor
ConditioningInfoType = Annotated[Union[BasicConditioningInfo, SDXLConditioningInfo], Field(discriminator="type")]
@dataclass
class ConditioningFieldData:
conditionings: List[Union[BasicConditioningInfo, SDXLConditioningInfo]]
conditionings: List[BasicConditioningInfo]
# unconditioned: Optional[torch.Tensor]
@ -101,12 +90,15 @@ class CompelInvocation(BaseInvocation):
name = trigger[1:-1]
try:
ti_list.append(
context.services.model_manager.get_model(
model_name=name,
base_model=self.clip.text_encoder.base_model,
model_type=ModelType.TextualInversion,
context=context,
).context.model
(
name,
context.services.model_manager.get_model(
model_name=name,
base_model=self.clip.text_encoder.base_model,
model_type=ModelType.TextualInversion,
context=context,
).context.model,
)
)
except ModelNotFoundException:
# print(e)
@ -165,7 +157,15 @@ class CompelInvocation(BaseInvocation):
class SDXLPromptInvocationBase:
def run_clip_raw(self, context, clip_field, prompt, get_pooled):
def run_clip_compel(
self,
context: InvocationContext,
clip_field: ClipField,
prompt: str,
get_pooled: bool,
lora_prefix: str,
zero_on_empty: bool,
):
tokenizer_info = context.services.model_manager.get_model(
**clip_field.tokenizer.dict(),
context=context,
@ -175,79 +175,21 @@ class SDXLPromptInvocationBase:
context=context,
)
def _lora_loader():
for lora in clip_field.loras:
lora_info = context.services.model_manager.get_model(**lora.dict(exclude={"weight"}), context=context)
yield (lora_info.context.model, lora.weight)
del lora_info
return
# loras = [(context.services.model_manager.get_model(**lora.dict(exclude={"weight"})).context.model, lora.weight) for lora in self.clip.loras]
ti_list = []
for trigger in re.findall(r"<[a-zA-Z0-9., _-]+>", prompt):
name = trigger[1:-1]
try:
ti_list.append(
context.services.model_manager.get_model(
model_name=name,
base_model=clip_field.text_encoder.base_model,
model_type=ModelType.TextualInversion,
context=context,
).context.model
)
except ModelNotFoundException:
# print(e)
# import traceback
# print(traceback.format_exc())
print(f'Warn: trigger: "{trigger}" not found')
with ModelPatcher.apply_lora_text_encoder(
text_encoder_info.context.model, _lora_loader()
), ModelPatcher.apply_ti(tokenizer_info.context.model, text_encoder_info.context.model, ti_list) as (
tokenizer,
ti_manager,
), ModelPatcher.apply_clip_skip(
text_encoder_info.context.model, clip_field.skipped_layers
), text_encoder_info as text_encoder:
text_inputs = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
prompt_embeds = text_encoder(
text_input_ids.to(text_encoder.device),
output_hidden_states=True,
# return zero on empty
if prompt == "" and zero_on_empty:
cpu_text_encoder = text_encoder_info.context.model
c = torch.zeros(
(1, cpu_text_encoder.config.max_position_embeddings, cpu_text_encoder.config.hidden_size),
dtype=text_encoder_info.context.cache.precision,
)
if get_pooled:
c_pooled = prompt_embeds[0]
c_pooled = torch.zeros(
(1, cpu_text_encoder.config.hidden_size),
dtype=c.dtype,
)
else:
c_pooled = None
c = prompt_embeds.hidden_states[-2]
del tokenizer
del text_encoder
del tokenizer_info
del text_encoder_info
c = c.detach().to("cpu")
if c_pooled is not None:
c_pooled = c_pooled.detach().to("cpu")
return c, c_pooled, None
def run_clip_compel(self, context, clip_field, prompt, get_pooled):
tokenizer_info = context.services.model_manager.get_model(
**clip_field.tokenizer.dict(),
context=context,
)
text_encoder_info = context.services.model_manager.get_model(
**clip_field.text_encoder.dict(),
context=context,
)
return c, c_pooled, None
def _lora_loader():
for lora in clip_field.loras:
@ -263,12 +205,15 @@ class SDXLPromptInvocationBase:
name = trigger[1:-1]
try:
ti_list.append(
context.services.model_manager.get_model(
model_name=name,
base_model=clip_field.text_encoder.base_model,
model_type=ModelType.TextualInversion,
context=context,
).context.model
(
name,
context.services.model_manager.get_model(
model_name=name,
base_model=clip_field.text_encoder.base_model,
model_type=ModelType.TextualInversion,
context=context,
).context.model,
)
)
except ModelNotFoundException:
# print(e)
@ -276,8 +221,8 @@ class SDXLPromptInvocationBase:
# print(traceback.format_exc())
print(f'Warn: trigger: "{trigger}" not found')
with ModelPatcher.apply_lora_text_encoder(
text_encoder_info.context.model, _lora_loader()
with ModelPatcher.apply_lora(
text_encoder_info.context.model, _lora_loader(), lora_prefix
), ModelPatcher.apply_ti(tokenizer_info.context.model, text_encoder_info.context.model, ti_list) as (
tokenizer,
ti_manager,
@ -349,11 +294,17 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CompelOutput:
c1, c1_pooled, ec1 = self.run_clip_compel(context, self.clip, self.prompt, False)
c1, c1_pooled, ec1 = self.run_clip_compel(
context, self.clip, self.prompt, False, "lora_te1_", zero_on_empty=True
)
if self.style.strip() == "":
c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.prompt, True)
c2, c2_pooled, ec2 = self.run_clip_compel(
context, self.clip2, self.prompt, True, "lora_te2_", zero_on_empty=True
)
else:
c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True)
c2, c2_pooled, ec2 = self.run_clip_compel(
context, self.clip2, self.style, True, "lora_te2_", zero_on_empty=True
)
original_size = (self.original_height, self.original_width)
crop_coords = (self.crop_top, self.crop_left)
@ -407,117 +358,8 @@ class SDXLRefinerCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CompelOutput:
c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True)
original_size = (self.original_height, self.original_width)
crop_coords = (self.crop_top, self.crop_left)
add_time_ids = torch.tensor([original_size + crop_coords + (self.aesthetic_score,)])
conditioning_data = ConditioningFieldData(
conditionings=[
SDXLConditioningInfo(
embeds=c2,
pooled_embeds=c2_pooled,
add_time_ids=add_time_ids,
extra_conditioning=ec2, # or None
)
]
)
conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"
context.services.latents.save(conditioning_name, conditioning_data)
return CompelOutput(
conditioning=ConditioningField(
conditioning_name=conditioning_name,
),
)
class SDXLRawPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
"""Pass unmodified prompt to conditioning without compel processing."""
type: Literal["sdxl_raw_prompt"] = "sdxl_raw_prompt"
prompt: str = Field(default="", description="Prompt")
style: str = Field(default="", description="Style prompt")
original_width: int = Field(1024, description="")
original_height: int = Field(1024, description="")
crop_top: int = Field(0, description="")
crop_left: int = Field(0, description="")
target_width: int = Field(1024, description="")
target_height: int = Field(1024, description="")
clip: ClipField = Field(None, description="Clip to use")
clip2: ClipField = Field(None, description="Clip2 to use")
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {"title": "SDXL Prompt (Raw)", "tags": ["prompt", "compel"], "type_hints": {"model": "model"}},
}
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CompelOutput:
c1, c1_pooled, ec1 = self.run_clip_raw(context, self.clip, self.prompt, False)
if self.style.strip() == "":
c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.prompt, True)
else:
c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.style, True)
original_size = (self.original_height, self.original_width)
crop_coords = (self.crop_top, self.crop_left)
target_size = (self.target_height, self.target_width)
add_time_ids = torch.tensor([original_size + crop_coords + target_size])
conditioning_data = ConditioningFieldData(
conditionings=[
SDXLConditioningInfo(
embeds=torch.cat([c1, c2], dim=-1),
pooled_embeds=c2_pooled,
add_time_ids=add_time_ids,
extra_conditioning=ec1,
)
]
)
conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"
context.services.latents.save(conditioning_name, conditioning_data)
return CompelOutput(
conditioning=ConditioningField(
conditioning_name=conditioning_name,
),
)
class SDXLRefinerRawPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
"""Parse prompt using compel package to conditioning."""
type: Literal["sdxl_refiner_raw_prompt"] = "sdxl_refiner_raw_prompt"
style: str = Field(default="", description="Style prompt") # TODO: ?
original_width: int = Field(1024, description="")
original_height: int = Field(1024, description="")
crop_top: int = Field(0, description="")
crop_left: int = Field(0, description="")
aesthetic_score: float = Field(6.0, description="")
clip2: ClipField = Field(None, description="Clip to use")
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "SDXL Refiner Prompt (Raw)",
"tags": ["prompt", "compel"],
"type_hints": {"model": "model"},
},
}
@torch.no_grad()
def invoke(self, context: InvocationContext) -> CompelOutput:
c2, c2_pooled, ec2 = self.run_clip_raw(context, self.clip2, self.style, True)
# TODO: if there will appear lora for refiner - write proper prefix
c2, c2_pooled, ec2 = self.run_clip_compel(context, self.clip2, self.style, True, "<NONE>", zero_on_empty=False)
original_size = (self.original_height, self.original_width)
crop_coords = (self.crop_top, self.crop_left)

View File

@ -1,251 +0,0 @@
# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)
from functools import partial
from typing import Literal, Optional, get_args
import torch
from pydantic import Field
from invokeai.app.models.image import ColorField, ImageCategory, ImageField, ResourceOrigin
from invokeai.app.util.misc import SEED_MAX, get_random_seed
from invokeai.backend.generator.inpaint import infill_methods
from ...backend.generator import Inpaint, InvokeAIGenerator
from ...backend.stable_diffusion import PipelineIntermediateState
from ..util.step_callback import stable_diffusion_step_callback
from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext
from .image import ImageOutput
from ...backend.model_management.lora import ModelPatcher
from ...backend.stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline
from .model import UNetField, VaeField
from .compel import ConditioningField
from contextlib import contextmanager, ExitStack, ContextDecorator
SAMPLER_NAME_VALUES = Literal[tuple(InvokeAIGenerator.schedulers())]
INFILL_METHODS = Literal[tuple(infill_methods())]
DEFAULT_INFILL_METHOD = "patchmatch" if "patchmatch" in get_args(INFILL_METHODS) else "tile"
from .latent import get_scheduler
class OldModelContext(ContextDecorator):
model: StableDiffusionGeneratorPipeline
def __init__(self, model):
self.model = model
def __enter__(self):
return self.model
def __exit__(self, *exc):
return False
class OldModelInfo:
name: str
hash: str
context: OldModelContext
def __init__(self, name: str, hash: str, model: StableDiffusionGeneratorPipeline):
self.name = name
self.hash = hash
self.context = OldModelContext(
model=model,
)
class InpaintInvocation(BaseInvocation):
"""Generates an image using inpaint."""
type: Literal["inpaint"] = "inpaint"
positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation")
negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation")
seed: int = Field(
ge=0, le=SEED_MAX, description="The seed to use (omit for random)", default_factory=get_random_seed
)
steps: int = Field(default=30, gt=0, description="The number of steps to use to generate the image")
width: int = Field(
default=512,
multiple_of=8,
gt=0,
description="The width of the resulting image",
)
height: int = Field(
default=512,
multiple_of=8,
gt=0,
description="The height of the resulting image",
)
cfg_scale: float = Field(
default=7.5,
ge=1,
description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt",
)
scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use")
unet: UNetField = Field(default=None, description="UNet model")
vae: VaeField = Field(default=None, description="Vae model")
# Inputs
image: Optional[ImageField] = Field(description="The input image")
strength: float = Field(default=0.75, gt=0, le=1, description="The strength of the original image")
fit: bool = Field(
default=True,
description="Whether or not the result should be fit to the aspect ratio of the input image",
)
# Inputs
mask: Optional[ImageField] = Field(description="The mask")
seam_size: int = Field(default=96, ge=1, description="The seam inpaint size (px)")
seam_blur: int = Field(default=16, ge=0, description="The seam inpaint blur radius (px)")
seam_strength: float = Field(default=0.75, gt=0, le=1, description="The seam inpaint strength")
seam_steps: int = Field(default=30, ge=1, description="The number of steps to use for seam inpaint")
tile_size: int = Field(default=32, ge=1, description="The tile infill method size (px)")
infill_method: INFILL_METHODS = Field(
default=DEFAULT_INFILL_METHOD,
description="The method used to infill empty regions (px)",
)
inpaint_width: Optional[int] = Field(
default=None,
multiple_of=8,
gt=0,
description="The width of the inpaint region (px)",
)
inpaint_height: Optional[int] = Field(
default=None,
multiple_of=8,
gt=0,
description="The height of the inpaint region (px)",
)
inpaint_fill: Optional[ColorField] = Field(
default=ColorField(r=127, g=127, b=127, a=255),
description="The solid infill method color",
)
inpaint_replace: float = Field(
default=0.0,
ge=0.0,
le=1.0,
description="The amount by which to replace masked areas with latent noise",
)
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {"tags": ["stable-diffusion", "image"], "title": "Inpaint"},
}
def dispatch_progress(
self,
context: InvocationContext,
source_node_id: str,
intermediate_state: PipelineIntermediateState,
) -> None:
stable_diffusion_step_callback(
context=context,
intermediate_state=intermediate_state,
node=self.dict(),
source_node_id=source_node_id,
)
def get_conditioning(self, context, unet):
positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name)
c = positive_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
extra_conditioning_info = positive_cond_data.conditionings[0].extra_conditioning
negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name)
uc = negative_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
return (uc, c, extra_conditioning_info)
@contextmanager
def load_model_old_way(self, context, scheduler):
def _lora_loader():
for lora in self.unet.loras:
lora_info = context.services.model_manager.get_model(
**lora.dict(exclude={"weight"}),
context=context,
)
yield (lora_info.context.model, lora.weight)
del lora_info
return
unet_info = context.services.model_manager.get_model(
**self.unet.unet.dict(),
context=context,
)
vae_info = context.services.model_manager.get_model(
**self.vae.vae.dict(),
context=context,
)
with vae_info as vae, ModelPatcher.apply_lora_unet(unet_info.context.model, _lora_loader()), unet_info as unet:
device = context.services.model_manager.mgr.cache.execution_device
dtype = context.services.model_manager.mgr.cache.precision
pipeline = StableDiffusionGeneratorPipeline(
vae=vae,
text_encoder=None,
tokenizer=None,
unet=unet,
scheduler=scheduler,
safety_checker=None,
feature_extractor=None,
requires_safety_checker=False,
precision="float16" if dtype == torch.float16 else "float32",
execution_device=device,
)
yield OldModelInfo(
name=self.unet.unet.model_name,
hash="<NO-HASH>",
model=pipeline,
)
def invoke(self, context: InvocationContext) -> ImageOutput:
image = None if self.image is None else context.services.images.get_pil_image(self.image.image_name)
mask = None if self.mask is None else context.services.images.get_pil_image(self.mask.image_name)
# Get the source node id (we are invoking the prepared node)
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
)
with self.load_model_old_way(context, scheduler) as model:
conditioning = self.get_conditioning(context, model.context.model.unet)
outputs = Inpaint(model).generate(
conditioning=conditioning,
scheduler=scheduler,
init_image=image,
mask_image=mask,
step_callback=partial(self.dispatch_progress, context, source_node_id),
**self.dict(
exclude={"positive_conditioning", "negative_conditioning", "scheduler", "image", "mask"}
), # Shorthand for passing all of the parameters above manually
)
# Outputs is an infinite iterator that will return a new InvokeAIGeneratorOutput object
# each time it is called. We only need the first one.
generator_output = next(outputs)
image_dto = context.services.images.create(
image=generator_output.image,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
session_id=context.graph_execution_state_id,
node_id=self.id,
is_intermediate=self.is_intermediate,
)
return ImageOutput(
image=ImageField(image_name=image_dto.image_name),
width=image_dto.width,
height=image_dto.height,
)

View File

@ -1,28 +1,19 @@
# Copyright (c) 2022 Kyle Schouviller (https://github.com/kyle0654)
from typing import Literal, Optional
import numpy
from PIL import Image, ImageFilter, ImageOps, ImageChops
from pydantic import Field
from pathlib import Path
from typing import Union
from typing import Literal, Optional, Union
import cv2
import numpy
from PIL import Image, ImageChops, ImageFilter, ImageOps
from pydantic import Field
from invokeai.app.invocations.metadata import CoreMetadata
from ..models.image import (
ImageCategory,
ImageField,
ResourceOrigin,
PILInvocationConfig,
ImageOutput,
MaskOutput,
)
from .baseinvocation import (
BaseInvocation,
InvocationContext,
InvocationConfig,
)
from invokeai.backend.image_util.safety_checker import SafetyChecker
from invokeai.backend.image_util.invisible_watermark import InvisibleWatermark
from invokeai.backend.image_util.safety_checker import SafetyChecker
from ..models.image import ImageCategory, ImageField, ImageOutput, MaskOutput, PILInvocationConfig, ResourceOrigin
from .baseinvocation import BaseInvocation, InvocationConfig, InvocationContext
class LoadImageInvocation(BaseInvocation):
@ -142,9 +133,10 @@ class ImagePasteInvocation(BaseInvocation, PILInvocationConfig):
def invoke(self, context: InvocationContext) -> ImageOutput:
base_image = context.services.images.get_pil_image(self.base_image.image_name)
image = context.services.images.get_pil_image(self.image.image_name)
mask = (
None if self.mask is None else ImageOps.invert(context.services.images.get_pil_image(self.mask.image_name))
)
mask = None
if self.mask is not None:
mask = context.services.images.get_pil_image(self.mask.image_name)
mask = ImageOps.invert(mask.convert("L"))
# TODO: probably shouldn't invert mask here... should user be required to do it?
min_x = min(0, self.x)
@ -500,7 +492,7 @@ class ImageLerpInvocation(BaseInvocation, PILInvocationConfig):
image = context.services.images.get_pil_image(self.image.image_name)
image_arr = numpy.asarray(image, dtype=numpy.float32) / 255
image_arr = image_arr * (self.max - self.min) + self.max
image_arr = image_arr * (self.max - self.min) + self.min
lerp_image = Image.fromarray(numpy.uint8(image_arr))
@ -650,3 +642,332 @@ class ImageWatermarkInvocation(BaseInvocation, PILInvocationConfig):
width=image_dto.width,
height=image_dto.height,
)
class MaskEdgeInvocation(BaseInvocation, PILInvocationConfig):
"""Applies an edge mask to an image"""
# fmt: off
type: Literal["mask_edge"] = "mask_edge"
# Inputs
image: Optional[ImageField] = Field(default=None, description="The image to apply the mask to")
edge_size: int = Field(description="The size of the edge")
edge_blur: int = Field(description="The amount of blur on the edge")
low_threshold: int = Field(description="First threshold for the hysteresis procedure in Canny edge detection")
high_threshold: int = Field(description="Second threshold for the hysteresis procedure in Canny edge detection")
# fmt: on
def invoke(self, context: InvocationContext) -> MaskOutput:
mask = context.services.images.get_pil_image(self.image.image_name)
npimg = numpy.asarray(mask, dtype=numpy.uint8)
npgradient = numpy.uint8(255 * (1.0 - numpy.floor(numpy.abs(0.5 - numpy.float32(npimg) / 255.0) * 2.0)))
npedge = cv2.Canny(npimg, threshold1=self.low_threshold, threshold2=self.high_threshold)
npmask = npgradient + npedge
npmask = cv2.dilate(npmask, numpy.ones((3, 3), numpy.uint8), iterations=int(self.edge_size / 2))
new_mask = Image.fromarray(npmask)
if self.edge_blur > 0:
new_mask = new_mask.filter(ImageFilter.BoxBlur(self.edge_blur))
new_mask = ImageOps.invert(new_mask)
image_dto = context.services.images.create(
image=new_mask,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.MASK,
node_id=self.id,
session_id=context.graph_execution_state_id,
is_intermediate=self.is_intermediate,
)
return MaskOutput(
mask=ImageField(image_name=image_dto.image_name),
width=image_dto.width,
height=image_dto.height,
)
class MaskCombineInvocation(BaseInvocation, PILInvocationConfig):
"""Combine two masks together by multiplying them using `PIL.ImageChops.multiply()`."""
# fmt: off
type: Literal["mask_combine"] = "mask_combine"
# Inputs
mask1: ImageField = Field(default=None, description="The first mask to combine")
mask2: ImageField = Field(default=None, description="The second image to combine")
# fmt: on
class Config(InvocationConfig):
schema_extra = {
"ui": {"title": "Mask Combine", "tags": ["mask", "combine"]},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
mask1 = context.services.images.get_pil_image(self.mask1.image_name).convert("L")
mask2 = context.services.images.get_pil_image(self.mask2.image_name).convert("L")
combined_mask = ImageChops.multiply(mask1, mask2)
image_dto = context.services.images.create(
image=combined_mask,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
session_id=context.graph_execution_state_id,
is_intermediate=self.is_intermediate,
)
return ImageOutput(
image=ImageField(image_name=image_dto.image_name),
width=image_dto.width,
height=image_dto.height,
)
class ColorCorrectInvocation(BaseInvocation, PILInvocationConfig):
"""
Shifts the colors of a target image to match the reference image, optionally
using a mask to only color-correct certain regions of the target image.
"""
type: Literal["color_correct"] = "color_correct"
image: Optional[ImageField] = Field(default=None, description="The image to color-correct")
reference: Optional[ImageField] = Field(default=None, description="Reference image for color-correction")
mask: Optional[ImageField] = Field(default=None, description="Mask to use when applying color-correction")
mask_blur_radius: float = Field(default=8, description="Mask blur radius")
def invoke(self, context: InvocationContext) -> ImageOutput:
pil_init_mask = None
if self.mask is not None:
pil_init_mask = context.services.images.get_pil_image(self.mask.image_name).convert("L")
init_image = context.services.images.get_pil_image(self.reference.image_name)
result = context.services.images.get_pil_image(self.image.image_name).convert("RGBA")
# if init_image is None or init_mask is None:
# return result
# Get the original alpha channel of the mask if there is one.
# Otherwise it is some other black/white image format ('1', 'L' or 'RGB')
# pil_init_mask = (
# init_mask.getchannel("A")
# if init_mask.mode == "RGBA"
# else init_mask.convert("L")
# )
pil_init_image = init_image.convert("RGBA") # Add an alpha channel if one doesn't exist
# Build an image with only visible pixels from source to use as reference for color-matching.
init_rgb_pixels = numpy.asarray(init_image.convert("RGB"), dtype=numpy.uint8)
init_a_pixels = numpy.asarray(pil_init_image.getchannel("A"), dtype=numpy.uint8)
init_mask_pixels = numpy.asarray(pil_init_mask, dtype=numpy.uint8)
# Get numpy version of result
np_image = numpy.asarray(result.convert("RGB"), dtype=numpy.uint8)
# Mask and calculate mean and standard deviation
mask_pixels = init_a_pixels * init_mask_pixels > 0
np_init_rgb_pixels_masked = init_rgb_pixels[mask_pixels, :]
np_image_masked = np_image[mask_pixels, :]
if np_init_rgb_pixels_masked.size > 0:
init_means = np_init_rgb_pixels_masked.mean(axis=0)
init_std = np_init_rgb_pixels_masked.std(axis=0)
gen_means = np_image_masked.mean(axis=0)
gen_std = np_image_masked.std(axis=0)
# Color correct
np_matched_result = np_image.copy()
np_matched_result[:, :, :] = (
(
(
(np_matched_result[:, :, :].astype(numpy.float32) - gen_means[None, None, :])
/ gen_std[None, None, :]
)
* init_std[None, None, :]
+ init_means[None, None, :]
)
.clip(0, 255)
.astype(numpy.uint8)
)
matched_result = Image.fromarray(np_matched_result, mode="RGB")
else:
matched_result = Image.fromarray(np_image, mode="RGB")
# Blur the mask out (into init image) by specified amount
if self.mask_blur_radius > 0:
nm = numpy.asarray(pil_init_mask, dtype=numpy.uint8)
nmd = cv2.erode(
nm,
kernel=numpy.ones((3, 3), dtype=numpy.uint8),
iterations=int(self.mask_blur_radius / 2),
)
pmd = Image.fromarray(nmd, mode="L")
blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(self.mask_blur_radius))
else:
blurred_init_mask = pil_init_mask
multiplied_blurred_init_mask = ImageChops.multiply(blurred_init_mask, result.split()[-1])
# Paste original on color-corrected generation (using blurred mask)
matched_result.paste(init_image, (0, 0), mask=multiplied_blurred_init_mask)
image_dto = context.services.images.create(
image=matched_result,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
session_id=context.graph_execution_state_id,
is_intermediate=self.is_intermediate,
)
return ImageOutput(
image=ImageField(image_name=image_dto.image_name),
width=image_dto.width,
height=image_dto.height,
)
class ImageHueAdjustmentInvocation(BaseInvocation):
"""Adjusts the Hue of an image."""
# fmt: off
type: Literal["img_hue_adjust"] = "img_hue_adjust"
# Inputs
image: ImageField = Field(default=None, description="The image to adjust")
hue: int = Field(default=0, description="The degrees by which to rotate the hue, 0-360")
# fmt: on
def invoke(self, context: InvocationContext) -> ImageOutput:
pil_image = context.services.images.get_pil_image(self.image.image_name)
# Convert image to HSV color space
hsv_image = numpy.array(pil_image.convert("HSV"))
# Convert hue from 0..360 to 0..256
hue = int(256 * ((self.hue % 360) / 360))
# Increment each hue and wrap around at 255
hsv_image[:, :, 0] = (hsv_image[:, :, 0] + hue) % 256
# Convert back to PIL format and to original color mode
pil_image = Image.fromarray(hsv_image, mode="HSV").convert("RGBA")
image_dto = context.services.images.create(
image=pil_image,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
is_intermediate=self.is_intermediate,
session_id=context.graph_execution_state_id,
)
return ImageOutput(
image=ImageField(
image_name=image_dto.image_name,
),
width=image_dto.width,
height=image_dto.height,
)
class ImageLuminosityAdjustmentInvocation(BaseInvocation):
"""Adjusts the Luminosity (Value) of an image."""
# fmt: off
type: Literal["img_luminosity_adjust"] = "img_luminosity_adjust"
# Inputs
image: ImageField = Field(default=None, description="The image to adjust")
luminosity: float = Field(default=1.0, ge=0, le=1, description="The factor by which to adjust the luminosity (value)")
# fmt: on
def invoke(self, context: InvocationContext) -> ImageOutput:
pil_image = context.services.images.get_pil_image(self.image.image_name)
# Convert PIL image to OpenCV format (numpy array), note color channel
# ordering is changed from RGB to BGR
image = numpy.array(pil_image.convert("RGB"))[:, :, ::-1]
# Convert image to HSV color space
hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# Adjust the luminosity (value)
hsv_image[:, :, 2] = numpy.clip(hsv_image[:, :, 2] * self.luminosity, 0, 255)
# Convert image back to BGR color space
image = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR)
# Convert back to PIL format and to original color mode
pil_image = Image.fromarray(image[:, :, ::-1], "RGB").convert("RGBA")
image_dto = context.services.images.create(
image=pil_image,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
is_intermediate=self.is_intermediate,
session_id=context.graph_execution_state_id,
)
return ImageOutput(
image=ImageField(
image_name=image_dto.image_name,
),
width=image_dto.width,
height=image_dto.height,
)
class ImageSaturationAdjustmentInvocation(BaseInvocation):
"""Adjusts the Saturation of an image."""
# fmt: off
type: Literal["img_saturation_adjust"] = "img_saturation_adjust"
# Inputs
image: ImageField = Field(default=None, description="The image to adjust")
saturation: float = Field(default=1.0, ge=0, le=1, description="The factor by which to adjust the saturation")
# fmt: on
def invoke(self, context: InvocationContext) -> ImageOutput:
pil_image = context.services.images.get_pil_image(self.image.image_name)
# Convert PIL image to OpenCV format (numpy array), note color channel
# ordering is changed from RGB to BGR
image = numpy.array(pil_image.convert("RGB"))[:, :, ::-1]
# Convert image to HSV color space
hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
# Adjust the saturation
hsv_image[:, :, 1] = numpy.clip(hsv_image[:, :, 1] * self.saturation, 0, 255)
# Convert image back to BGR color space
image = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR)
# Convert back to PIL format and to original color mode
pil_image = Image.fromarray(image[:, :, ::-1], "RGB").convert("RGBA")
image_dto = context.services.images.create(
image=pil_image,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
is_intermediate=self.is_intermediate,
session_id=context.graph_execution_state_id,
)
return ImageOutput(
image=ImageField(
image_name=image_dto.image_name,
),
width=image_dto.width,
height=image_dto.height,
)

View File

@ -5,16 +5,24 @@ from typing import List, Literal, Optional, Union
import einops
import torch
from diffusers import ControlNetModel
import torchvision.transforms as T
from diffusers.image_processor import VaeImageProcessor
from diffusers.schedulers import SchedulerMixin as Scheduler
from diffusers.models.attention_processor import (
AttnProcessor2_0,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
from diffusers.schedulers import DPMSolverSDEScheduler, SchedulerMixin as Scheduler
from pydantic import BaseModel, Field, validator
from torchvision.transforms.functional import resize as tv_resize
from invokeai.app.invocations.metadata import CoreMetadata
from invokeai.app.util.controlnet_utils import prepare_control_image
from invokeai.app.util.step_callback import stable_diffusion_step_callback
from invokeai.backend.model_management.models import ModelType, SilenceWarnings
from ...backend.model_management.lora import ModelPatcher
from ...backend.model_management import BaseModelType, ModelPatcher
from ...backend.stable_diffusion import PipelineIntermediateState
from ...backend.stable_diffusion.diffusers_pipeline import (
ConditioningData,
@ -24,22 +32,13 @@ from ...backend.stable_diffusion.diffusers_pipeline import (
)
from ...backend.stable_diffusion.diffusion.shared_invokeai_diffusion import PostprocessingSettings
from ...backend.stable_diffusion.schedulers import SCHEDULER_MAP
from ...backend.util.devices import choose_torch_device, torch_dtype, choose_precision
from ...backend.util.devices import choose_precision, choose_torch_device, torch_dtype
from ..models.image import ImageCategory, ImageField, ResourceOrigin
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext
from .compel import ConditioningField
from .controlnet_image_processors import ControlField
from .image import ImageOutput
from .model import ModelInfo, UNetField, VaeField
from invokeai.app.util.controlnet_utils import prepare_control_image
from diffusers.models.attention_processor import (
AttnProcessor2_0,
LoRAAttnProcessor2_0,
LoRAXFormersAttnProcessor,
XFormersAttnProcessor,
)
DEFAULT_PRECISION = choose_precision(choose_torch_device())
@ -48,6 +47,7 @@ class LatentsField(BaseModel):
"""A latents field used for passing latents between invocations"""
latents_name: Optional[str] = Field(default=None, description="The name of the latents")
seed: Optional[int] = Field(description="Seed used to generate this latents")
class Config:
schema_extra = {"required": ["latents_name"]}
@ -66,9 +66,9 @@ class LatentsOutput(BaseInvocationOutput):
# fmt: on
def build_latents_output(latents_name: str, latents: torch.Tensor):
def build_latents_output(latents_name: str, latents: torch.Tensor, seed: Optional[int]):
return LatentsOutput(
latents=LatentsField(latents_name=latents_name),
latents=LatentsField(latents_name=latents_name, seed=seed),
width=latents.size()[3] * 8,
height=latents.size()[2] * 8,
)
@ -81,6 +81,7 @@ def get_scheduler(
context: InvocationContext,
scheduler_info: ModelInfo,
scheduler_name: str,
seed: int,
) -> Scheduler:
scheduler_class, scheduler_extra_config = SCHEDULER_MAP.get(scheduler_name, SCHEDULER_MAP["ddim"])
orig_scheduler_info = context.services.model_manager.get_model(
@ -97,6 +98,11 @@ def get_scheduler(
**scheduler_extra_config,
"_backup": scheduler_config,
}
# make dpmpp_sde reproducable(seed can be passed only in initializer)
if scheduler_class is DPMSolverSDEScheduler:
scheduler_config["noise_sampler_seed"] = seed
scheduler = scheduler_class.from_config(scheduler_config)
# hack copied over from generate.py
@ -105,25 +111,31 @@ def get_scheduler(
return scheduler
# Text to image
class TextToLatentsInvocation(BaseInvocation):
"""Generates latents from conditionings."""
class DenoiseLatentsInvocation(BaseInvocation):
"""Denoises noisy latents to decodable images"""
type: Literal["t2l"] = "t2l"
type: Literal["denoise_latents"] = "denoise_latents"
# Inputs
# fmt: off
positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation")
negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation")
noise: Optional[LatentsField] = Field(description="The noise to use")
steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
cfg_scale: Union[float, List[float]] = Field(
default=7.5,
ge=1,
description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt",
)
denoising_start: float = Field(default=0.0, ge=0, le=1, description="")
denoising_end: float = Field(default=1.0, ge=0, le=1, description="")
scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use")
unet: UNetField = Field(default=None, description="UNet submodel")
control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use")
# seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
# seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
# fmt: on
latents: Optional[LatentsField] = Field(description="The latents to use as a base image")
mask: Optional[ImageField] = Field(
None,
description="Mask",
)
@validator("cfg_scale")
def ge_one(cls, v):
@ -141,12 +153,11 @@ class TextToLatentsInvocation(BaseInvocation):
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Text To Latents",
"tags": ["latents"],
"title": "Denoise Latents",
"tags": ["denoise", "latents"],
"type_hints": {
"model": "model",
"control": "control",
# "cfg_scale": "float",
"cfg_scale": "number",
},
},
@ -158,12 +169,14 @@ class TextToLatentsInvocation(BaseInvocation):
context: InvocationContext,
source_node_id: str,
intermediate_state: PipelineIntermediateState,
base_model: BaseModelType,
) -> None:
stable_diffusion_step_callback(
context=context,
intermediate_state=intermediate_state,
node=self.dict(),
source_node_id=source_node_id,
base_model=base_model,
)
def get_conditioning_data(
@ -171,13 +184,14 @@ class TextToLatentsInvocation(BaseInvocation):
context: InvocationContext,
scheduler,
unet,
seed,
) -> ConditioningData:
positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name)
c = positive_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
extra_conditioning_info = positive_cond_data.conditionings[0].extra_conditioning
c = positive_cond_data.conditionings[0].to(device=unet.device, dtype=unet.dtype)
extra_conditioning_info = c.extra_conditioning
negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name)
uc = negative_cond_data.conditionings[0].embeds.to(device=unet.device, dtype=unet.dtype)
uc = negative_cond_data.conditionings[0].to(device=unet.device, dtype=unet.dtype)
conditioning_data = ConditioningData(
unconditioned_embeddings=uc,
@ -197,7 +211,8 @@ class TextToLatentsInvocation(BaseInvocation):
# for ddim scheduler
eta=0.0, # ddim_eta
# for ancestral and sde schedulers
generator=torch.Generator(device=unet.device).manual_seed(0),
# flip all bits to have noise different from initial
generator=torch.Generator(device=unet.device).manual_seed(seed ^ 0xFFFFFFFF),
)
return conditioning_data
@ -230,7 +245,6 @@ class TextToLatentsInvocation(BaseInvocation):
safety_checker=None,
feature_extractor=None,
requires_safety_checker=False,
precision="float16" if unet.dtype == torch.float16 else "float32",
)
def prep_control_data(
@ -309,110 +323,83 @@ class TextToLatentsInvocation(BaseInvocation):
# MultiControlNetModel has been refactored out, just need list[ControlNetData]
return control_data
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
with SilenceWarnings():
noise = context.services.latents.get(self.noise.latents_name)
# original idea by https://github.com/AmericanPresidentJimmyCarter
# TODO: research more for second order schedulers timesteps
def init_scheduler(self, scheduler, device, steps, denoising_start, denoising_end):
num_inference_steps = steps
if scheduler.config.get("cpu_only", False):
scheduler.set_timesteps(num_inference_steps, device="cpu")
timesteps = scheduler.timesteps.to(device=device)
else:
scheduler.set_timesteps(num_inference_steps, device=device)
timesteps = scheduler.timesteps
# Get the source node id (we are invoking the prepared node)
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
# apply denoising_start
t_start_val = int(round(scheduler.config.num_train_timesteps * (1 - denoising_start)))
t_start_idx = len(list(filter(lambda ts: ts >= t_start_val, timesteps)))
timesteps = timesteps[t_start_idx:]
if scheduler.order == 2 and t_start_idx > 0:
timesteps = timesteps[1:]
def step_callback(state: PipelineIntermediateState):
self.dispatch_progress(context, source_node_id, state)
# save start timestep to apply noise
init_timestep = timesteps[:1]
def _lora_loader():
for lora in self.unet.loras:
lora_info = context.services.model_manager.get_model(
**lora.dict(exclude={"weight"}),
context=context,
)
yield (lora_info.context.model, lora.weight)
del lora_info
return
# apply denoising_end
t_end_val = int(round(scheduler.config.num_train_timesteps * (1 - denoising_end)))
t_end_idx = len(list(filter(lambda ts: ts >= t_end_val, timesteps)))
if scheduler.order == 2 and t_end_idx > 0:
t_end_idx += 1
timesteps = timesteps[:t_end_idx]
unet_info = context.services.model_manager.get_model(
**self.unet.unet.dict(),
context=context,
)
with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet(
unet_info.context.model, _lora_loader()
), unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype)
# calculate step count based on scheduler order
num_inference_steps = len(timesteps)
if scheduler.order == 2:
num_inference_steps += num_inference_steps % 2
num_inference_steps = num_inference_steps // 2
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
)
return num_inference_steps, timesteps, init_timestep
pipeline = self.create_pipeline(unet, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler, unet)
def prep_mask_tensor(self, mask, context, lantents):
if mask is None:
return None
control_data = self.prep_control_data(
model=pipeline,
context=context,
control_input=self.control,
latents_shape=noise.shape,
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
do_classifier_free_guidance=True,
exit_stack=exit_stack,
)
# TODO: Verify the noise is the right size
result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
latents=torch.zeros_like(noise, dtype=torch_dtype(unet.device)),
noise=noise,
num_inference_steps=self.steps,
conditioning_data=conditioning_data,
control_data=control_data, # list[ControlNetData]
callback=step_callback,
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
result_latents = result_latents.to("cpu")
torch.cuda.empty_cache()
name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, result_latents)
return build_latents_output(latents_name=name, latents=result_latents)
class LatentsToLatentsInvocation(TextToLatentsInvocation):
"""Generates latents using latents as base image."""
type: Literal["l2l"] = "l2l"
# Inputs
latents: Optional[LatentsField] = Field(description="The latents to use as a base image")
strength: float = Field(default=0.7, ge=0, le=1, description="The strength of the latents to use")
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Latent To Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
"control": "control",
"cfg_scale": "number",
},
},
}
mask_image = context.services.images.get_pil_image(mask.image_name)
if mask_image.mode != "L":
# FIXME: why do we get passed an RGB image here? We can only use single-channel.
mask_image = mask_image.convert("L")
mask_tensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
if mask_tensor.dim() == 3:
mask_tensor = mask_tensor.unsqueeze(0)
mask_tensor = tv_resize(mask_tensor, lantents.shape[-2:], T.InterpolationMode.BILINEAR)
return 1 - mask_tensor
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
with SilenceWarnings(): # this quenches NSFW nag from diffusers
noise = context.services.latents.get(self.noise.latents_name)
latent = context.services.latents.get(self.latents.latents_name)
seed = None
noise = None
if self.noise is not None:
noise = context.services.latents.get(self.noise.latents_name)
seed = self.noise.seed
if self.latents is not None:
latents = context.services.latents.get(self.latents.latents_name)
if seed is None:
seed = self.latents.seed
else:
latents = torch.zeros_like(noise)
if seed is None:
seed = 0
mask = self.prep_mask_tensor(self.mask, context, latents)
# Get the source node id (we are invoking the prepared node)
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
def step_callback(state: PipelineIntermediateState):
self.dispatch_progress(context, source_node_id, state)
self.dispatch_progress(context, source_node_id, state, self.unet.unet.base_model)
def _lora_loader():
for lora in self.unet.loras:
@ -431,44 +418,48 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
with ExitStack() as exit_stack, ModelPatcher.apply_lora_unet(
unet_info.context.model, _lora_loader()
), unet_info as unet:
noise = noise.to(device=unet.device, dtype=unet.dtype)
latent = latent.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
if noise is not None:
noise = noise.to(device=unet.device, dtype=unet.dtype)
if mask is not None:
mask = mask.to(device=unet.device, dtype=unet.dtype)
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
seed=seed,
)
pipeline = self.create_pipeline(unet, scheduler)
conditioning_data = self.get_conditioning_data(context, scheduler, unet)
conditioning_data = self.get_conditioning_data(context, scheduler, unet, seed)
control_data = self.prep_control_data(
model=pipeline,
context=context,
control_input=self.control,
latents_shape=noise.shape,
latents_shape=latents.shape,
# do_classifier_free_guidance=(self.cfg_scale >= 1.0))
do_classifier_free_guidance=True,
exit_stack=exit_stack,
)
# TODO: Verify the noise is the right size
initial_latents = (
latent if self.strength < 1.0 else torch.zeros_like(latent, device=unet.device, dtype=latent.dtype)
)
timesteps, _ = pipeline.get_img2img_timesteps(
self.steps,
self.strength,
num_inference_steps, timesteps, init_timestep = self.init_scheduler(
scheduler,
device=unet.device,
steps=self.steps,
denoising_start=self.denoising_start,
denoising_end=self.denoising_end,
)
result_latents, result_attention_map_saver = pipeline.latents_from_embeddings(
latents=initial_latents,
latents=latents,
timesteps=timesteps,
init_timestep=init_timestep,
noise=noise,
num_inference_steps=self.steps,
seed=seed,
mask=mask,
num_inference_steps=num_inference_steps,
conditioning_data=conditioning_data,
control_data=control_data, # list[ControlNetData]
callback=step_callback,
@ -480,7 +471,7 @@ class LatentsToLatentsInvocation(TextToLatentsInvocation):
name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, result_latents)
return build_latents_output(latents_name=name, latents=result_latents)
return build_latents_output(latents_name=name, latents=result_latents, seed=seed)
# Latent to image
@ -622,7 +613,7 @@ class ResizeLatentsInvocation(BaseInvocation):
name = f"{context.graph_execution_state_id}__{self.id}"
# context.services.latents.set(name, resized_latents)
context.services.latents.save(name, resized_latents)
return build_latents_output(latents_name=name, latents=resized_latents)
return build_latents_output(latents_name=name, latents=resized_latents, seed=self.latents.seed)
class ScaleLatentsInvocation(BaseInvocation):
@ -664,7 +655,7 @@ class ScaleLatentsInvocation(BaseInvocation):
name = f"{context.graph_execution_state_id}__{self.id}"
# context.services.latents.set(name, resized_latents)
context.services.latents.save(name, resized_latents)
return build_latents_output(latents_name=name, latents=resized_latents)
return build_latents_output(latents_name=name, latents=resized_latents, seed=self.latents.seed)
class ImageToLatentsInvocation(BaseInvocation):
@ -745,4 +736,4 @@ class ImageToLatentsInvocation(BaseInvocation):
name = f"{context.graph_execution_state_id}__{self.id}"
latents = latents.to("cpu")
context.services.latents.save(name, latents)
return build_latents_output(latents_name=name, latents=latents)
return build_latents_output(latents_name=name, latents=latents, seed=None)

View File

@ -1,7 +1,8 @@
from typing import Literal, Optional, Union
from pydantic import BaseModel, Field
from pydantic import Field
from ...version import __version__
from invokeai.app.invocations.baseinvocation import (
BaseInvocation,
BaseInvocationOutput,
@ -10,18 +11,20 @@ from invokeai.app.invocations.baseinvocation import (
)
from invokeai.app.invocations.controlnet_image_processors import ControlField
from invokeai.app.invocations.model import LoRAModelField, MainModelField, VAEModelField
from invokeai.app.util.model_exclude_null import BaseModelExcludeNull
class LoRAMetadataField(BaseModel):
class LoRAMetadataField(BaseModelExcludeNull):
"""LoRA metadata for an image generated in InvokeAI."""
lora: LoRAModelField = Field(description="The LoRA model")
weight: float = Field(description="The weight of the LoRA model")
class CoreMetadata(BaseModel):
class CoreMetadata(BaseModelExcludeNull):
"""Core generation metadata for an image generated in InvokeAI."""
app_version: str = Field(default=__version__, description="The version of InvokeAI used to generate this image")
generation_mode: str = Field(
description="The generation mode that output this image",
)
@ -64,13 +67,16 @@ class CoreMetadata(BaseModel):
)
refiner_steps: Union[int, None] = Field(default=None, description="The number of steps used for the refiner")
refiner_scheduler: Union[str, None] = Field(default=None, description="The scheduler used for the refiner")
refiner_aesthetic_store: Union[float, None] = Field(
refiner_positive_aesthetic_store: Union[float, None] = Field(
default=None, description="The aesthetic score used for the refiner"
)
refiner_negative_aesthetic_store: Union[float, None] = Field(
default=None, description="The aesthetic score used for the refiner"
)
refiner_start: Union[float, None] = Field(default=None, description="The start value used for refiner denoising")
class ImageMetadata(BaseModel):
class ImageMetadata(BaseModelExcludeNull):
"""An image's generation metadata"""
metadata: Optional[dict] = Field(
@ -133,7 +139,10 @@ class MetadataAccumulatorInvocation(BaseInvocation):
)
refiner_steps: Union[int, None] = Field(default=None, description="The number of steps used for the refiner")
refiner_scheduler: Union[str, None] = Field(default=None, description="The scheduler used for the refiner")
refiner_aesthetic_store: Union[float, None] = Field(
refiner_positive_aesthetic_score: Union[float, None] = Field(
default=None, description="The aesthetic score used for the refiner"
)
refiner_negative_aesthetic_score: Union[float, None] = Field(
default=None, description="The aesthetic score used for the refiner"
)
refiner_start: Union[float, None] = Field(default=None, description="The start value used for refiner denoising")

View File

@ -53,6 +53,7 @@ class MainModelField(BaseModel):
model_name: str = Field(description="Name of the model")
base_model: BaseModelType = Field(description="Base model")
model_type: ModelType = Field(description="Model Type")
class LoRAModelField(BaseModel):
@ -261,6 +262,103 @@ class LoraLoaderInvocation(BaseInvocation):
return output
class SDXLLoraLoaderOutput(BaseInvocationOutput):
"""Model loader output"""
# fmt: off
type: Literal["sdxl_lora_loader_output"] = "sdxl_lora_loader_output"
unet: Optional[UNetField] = Field(default=None, description="UNet submodel")
clip: Optional[ClipField] = Field(default=None, description="Tokenizer and text_encoder submodels")
clip2: Optional[ClipField] = Field(default=None, description="Tokenizer2 and text_encoder2 submodels")
# fmt: on
class SDXLLoraLoaderInvocation(BaseInvocation):
"""Apply selected lora to unet and text_encoder."""
type: Literal["sdxl_lora_loader"] = "sdxl_lora_loader"
lora: Union[LoRAModelField, None] = Field(default=None, description="Lora model name")
weight: float = Field(default=0.75, description="With what weight to apply lora")
unet: Optional[UNetField] = Field(description="UNet model for applying lora")
clip: Optional[ClipField] = Field(description="Clip model for applying lora")
clip2: Optional[ClipField] = Field(description="Clip2 model for applying lora")
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "SDXL Lora Loader",
"tags": ["lora", "loader"],
"type_hints": {"lora": "lora_model"},
},
}
def invoke(self, context: InvocationContext) -> SDXLLoraLoaderOutput:
if self.lora is None:
raise Exception("No LoRA provided")
base_model = self.lora.base_model
lora_name = self.lora.model_name
if not context.services.model_manager.model_exists(
base_model=base_model,
model_name=lora_name,
model_type=ModelType.Lora,
):
raise Exception(f"Unknown lora name: {lora_name}!")
if self.unet is not None and any(lora.model_name == lora_name for lora in self.unet.loras):
raise Exception(f'Lora "{lora_name}" already applied to unet')
if self.clip is not None and any(lora.model_name == lora_name for lora in self.clip.loras):
raise Exception(f'Lora "{lora_name}" already applied to clip')
if self.clip2 is not None and any(lora.model_name == lora_name for lora in self.clip2.loras):
raise Exception(f'Lora "{lora_name}" already applied to clip2')
output = SDXLLoraLoaderOutput()
if self.unet is not None:
output.unet = copy.deepcopy(self.unet)
output.unet.loras.append(
LoraInfo(
base_model=base_model,
model_name=lora_name,
model_type=ModelType.Lora,
submodel=None,
weight=self.weight,
)
)
if self.clip is not None:
output.clip = copy.deepcopy(self.clip)
output.clip.loras.append(
LoraInfo(
base_model=base_model,
model_name=lora_name,
model_type=ModelType.Lora,
submodel=None,
weight=self.weight,
)
)
if self.clip2 is not None:
output.clip2 = copy.deepcopy(self.clip2)
output.clip2.loras.append(
LoraInfo(
base_model=base_model,
model_name=lora_name,
model_type=ModelType.Lora,
submodel=None,
weight=self.weight,
)
)
return output
class VAEModelField(BaseModel):
"""Vae model field"""

View File

@ -71,9 +71,9 @@ class NoiseOutput(BaseInvocationOutput):
# fmt: on
def build_noise_output(latents_name: str, latents: torch.Tensor):
def build_noise_output(latents_name: str, latents: torch.Tensor, seed: int):
return NoiseOutput(
noise=LatentsField(latents_name=latents_name),
noise=LatentsField(latents_name=latents_name, seed=seed),
width=latents.size()[3] * 8,
height=latents.size()[2] * 8,
)
@ -132,4 +132,4 @@ class NoiseInvocation(BaseInvocation):
)
name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, noise)
return build_noise_output(latents_name=name, latents=noise)
return build_noise_output(latents_name=name, latents=noise, seed=self.seed)

View File

@ -0,0 +1,574 @@
# Copyright (c) 2023 Borisov Sergey (https://github.com/StAlKeR7779)
from contextlib import ExitStack
from typing import List, Literal, Optional, Union
import re
import inspect
from pydantic import BaseModel, Field, validator
import torch
import numpy as np
from diffusers import ControlNetModel, DPMSolverMultistepScheduler
from diffusers.image_processor import VaeImageProcessor
from diffusers.schedulers import SchedulerMixin as Scheduler
from ..models.image import ImageCategory, ImageField, ResourceOrigin
from ...backend.model_management import ONNXModelPatcher
from ...backend.util import choose_torch_device
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext
from .compel import ConditioningField
from .controlnet_image_processors import ControlField
from .image import ImageOutput
from .model import ModelInfo, UNetField, VaeField
from invokeai.app.invocations.metadata import CoreMetadata
from invokeai.backend import BaseModelType, ModelType, SubModelType
from invokeai.app.util.step_callback import stable_diffusion_step_callback
from ...backend.stable_diffusion import PipelineIntermediateState
from tqdm import tqdm
from .model import ClipField
from .latent import LatentsField, LatentsOutput, build_latents_output, get_scheduler, SAMPLER_NAME_VALUES
from .compel import CompelOutput
ORT_TO_NP_TYPE = {
"tensor(bool)": np.bool_,
"tensor(int8)": np.int8,
"tensor(uint8)": np.uint8,
"tensor(int16)": np.int16,
"tensor(uint16)": np.uint16,
"tensor(int32)": np.int32,
"tensor(uint32)": np.uint32,
"tensor(int64)": np.int64,
"tensor(uint64)": np.uint64,
"tensor(float16)": np.float16,
"tensor(float)": np.float32,
"tensor(double)": np.float64,
}
PRECISION_VALUES = Literal[tuple(list(ORT_TO_NP_TYPE.keys()))]
class ONNXPromptInvocation(BaseInvocation):
type: Literal["prompt_onnx"] = "prompt_onnx"
prompt: str = Field(default="", description="Prompt")
clip: ClipField = Field(None, description="Clip to use")
def invoke(self, context: InvocationContext) -> CompelOutput:
tokenizer_info = context.services.model_manager.get_model(
**self.clip.tokenizer.dict(),
)
text_encoder_info = context.services.model_manager.get_model(
**self.clip.text_encoder.dict(),
)
with tokenizer_info as orig_tokenizer, text_encoder_info as text_encoder, ExitStack() as stack:
loras = [
(context.services.model_manager.get_model(**lora.dict(exclude={"weight"})).context.model, lora.weight)
for lora in self.clip.loras
]
ti_list = []
for trigger in re.findall(r"<[a-zA-Z0-9., _-]+>", self.prompt):
name = trigger[1:-1]
try:
ti_list.append(
(
name,
context.services.model_manager.get_model(
model_name=name,
base_model=self.clip.text_encoder.base_model,
model_type=ModelType.TextualInversion,
).context.model,
)
)
except Exception:
# print(e)
# import traceback
# print(traceback.format_exc())
print(f'Warn: trigger: "{trigger}" not found')
if loras or ti_list:
text_encoder.release_session()
with ONNXModelPatcher.apply_lora_text_encoder(text_encoder, loras), ONNXModelPatcher.apply_ti(
orig_tokenizer, text_encoder, ti_list
) as (tokenizer, ti_manager):
text_encoder.create_session()
# copy from
# https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L153
text_inputs = tokenizer(
self.prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="np",
)
text_input_ids = text_inputs.input_ids
"""
untruncated_ids = tokenizer(prompt, padding="max_length", return_tensors="np").input_ids
if not np.array_equal(text_input_ids, untruncated_ids):
removed_text = self.tokenizer.batch_decode(
untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
)
logger.warning(
"The following part of your input was truncated because CLIP can only handle sequences up to"
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
)
"""
prompt_embeds = text_encoder(input_ids=text_input_ids.astype(np.int32))[0]
conditioning_name = f"{context.graph_execution_state_id}_{self.id}_conditioning"
# TODO: hacky but works ;D maybe rename latents somehow?
context.services.latents.save(conditioning_name, (prompt_embeds, None))
return CompelOutput(
conditioning=ConditioningField(
conditioning_name=conditioning_name,
),
)
# Text to image
class ONNXTextToLatentsInvocation(BaseInvocation):
"""Generates latents from conditionings."""
type: Literal["t2l_onnx"] = "t2l_onnx"
# Inputs
# fmt: off
positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation")
negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation")
noise: Optional[LatentsField] = Field(description="The noise to use")
steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
precision: PRECISION_VALUES = Field(default = "tensor(float16)", description="The precision to use when generating latents")
unet: UNetField = Field(default=None, description="UNet submodel")
control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use")
# seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
# seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
# fmt: on
@validator("cfg_scale")
def ge_one(cls, v):
"""validate that all cfg_scale values are >= 1"""
if isinstance(v, list):
for i in v:
if i < 1:
raise ValueError("cfg_scale must be greater than 1")
else:
if v < 1:
raise ValueError("cfg_scale must be greater than 1")
return v
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["latents"],
"type_hints": {
"model": "model",
"control": "control",
# "cfg_scale": "float",
"cfg_scale": "number",
},
},
}
# based on
# https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L375
def invoke(self, context: InvocationContext) -> LatentsOutput:
c, _ = context.services.latents.get(self.positive_conditioning.conditioning_name)
uc, _ = context.services.latents.get(self.negative_conditioning.conditioning_name)
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
if isinstance(c, torch.Tensor):
c = c.cpu().numpy()
if isinstance(uc, torch.Tensor):
uc = uc.cpu().numpy()
device = torch.device(choose_torch_device())
prompt_embeds = np.concatenate([uc, c])
latents = context.services.latents.get(self.noise.latents_name)
if isinstance(latents, torch.Tensor):
latents = latents.cpu().numpy()
# TODO: better execution device handling
latents = latents.astype(ORT_TO_NP_TYPE[self.precision])
# get the initial random noise unless the user supplied it
do_classifier_free_guidance = True
# latents_dtype = prompt_embeds.dtype
# latents_shape = (batch_size * num_images_per_prompt, 4, height // 8, width // 8)
# if latents.shape != latents_shape:
# raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}")
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
seed=0, # TODO: refactor this node
)
def torch2numpy(latent: torch.Tensor):
return latent.cpu().numpy()
def numpy2torch(latent, device):
return torch.from_numpy(latent).to(device)
def dispatch_progress(
self, context: InvocationContext, source_node_id: str, intermediate_state: PipelineIntermediateState
) -> None:
stable_diffusion_step_callback(
context=context,
intermediate_state=intermediate_state,
node=self.dict(),
source_node_id=source_node_id,
)
scheduler.set_timesteps(self.steps)
latents = latents * np.float64(scheduler.init_noise_sigma)
extra_step_kwargs = dict()
if "eta" in set(inspect.signature(scheduler.step).parameters.keys()):
extra_step_kwargs.update(
eta=0.0,
)
unet_info = context.services.model_manager.get_model(**self.unet.unet.dict())
with unet_info as unet, ExitStack() as stack:
# loras = [(stack.enter_context(context.services.model_manager.get_model(**lora.dict(exclude={"weight"}))), lora.weight) for lora in self.unet.loras]
loras = [
(context.services.model_manager.get_model(**lora.dict(exclude={"weight"})).context.model, lora.weight)
for lora in self.unet.loras
]
if loras:
unet.release_session()
with ONNXModelPatcher.apply_lora_unet(unet, loras):
# TODO:
_, _, h, w = latents.shape
unet.create_session(h, w)
timestep_dtype = next(
(input.type for input in unet.session.get_inputs() if input.name == "timestep"), "tensor(float16)"
)
timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype]
for i in tqdm(range(len(scheduler.timesteps))):
t = scheduler.timesteps[i]
# expand the latents if we are doing classifier free guidance
latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = scheduler.scale_model_input(numpy2torch(latent_model_input, device), t)
latent_model_input = latent_model_input.cpu().numpy()
# predict the noise residual
timestep = np.array([t], dtype=timestep_dtype)
noise_pred = unet(sample=latent_model_input, timestep=timestep, encoder_hidden_states=prompt_embeds)
noise_pred = noise_pred[0]
# perform guidance
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2)
noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1
scheduler_output = scheduler.step(
numpy2torch(noise_pred, device), t, numpy2torch(latents, device), **extra_step_kwargs
)
latents = torch2numpy(scheduler_output.prev_sample)
state = PipelineIntermediateState(
run_id="test", step=i, timestep=timestep, latents=scheduler_output.prev_sample
)
dispatch_progress(self, context=context, source_node_id=source_node_id, intermediate_state=state)
# call the callback, if provided
# if callback is not None and i % callback_steps == 0:
# callback(i, t, latents)
torch.cuda.empty_cache()
name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, latents)
return build_latents_output(latents_name=name, latents=torch.from_numpy(latents))
# Latent to image
class ONNXLatentsToImageInvocation(BaseInvocation):
"""Generates an image from latents."""
type: Literal["l2i_onnx"] = "l2i_onnx"
# Inputs
latents: Optional[LatentsField] = Field(description="The latents to generate an image from")
vae: VaeField = Field(default=None, description="Vae submodel")
metadata: Optional[CoreMetadata] = Field(
default=None, description="Optional core metadata to be written to the image"
)
# tiled: bool = Field(default=False, description="Decode latents by overlaping tiles(less memory consumption)")
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"tags": ["latents", "image"],
},
}
def invoke(self, context: InvocationContext) -> ImageOutput:
latents = context.services.latents.get(self.latents.latents_name)
if self.vae.vae.submodel != SubModelType.VaeDecoder:
raise Exception(f"Expected vae_decoder, found: {self.vae.vae.model_type}")
vae_info = context.services.model_manager.get_model(
**self.vae.vae.dict(),
)
# clear memory as vae decode can request a lot
torch.cuda.empty_cache()
with vae_info as vae:
vae.create_session()
# copied from
# https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L427
latents = 1 / 0.18215 * latents
# image = self.vae_decoder(latent_sample=latents)[0]
# it seems likes there is a strange result for using half-precision vae decoder if batchsize>1
image = np.concatenate([vae(latent_sample=latents[i : i + 1])[0] for i in range(latents.shape[0])])
image = np.clip(image / 2 + 0.5, 0, 1)
image = image.transpose((0, 2, 3, 1))
image = VaeImageProcessor.numpy_to_pil(image)[0]
torch.cuda.empty_cache()
image_dto = context.services.images.create(
image=image,
image_origin=ResourceOrigin.INTERNAL,
image_category=ImageCategory.GENERAL,
node_id=self.id,
session_id=context.graph_execution_state_id,
is_intermediate=self.is_intermediate,
metadata=self.metadata.dict() if self.metadata else None,
)
return ImageOutput(
image=ImageField(image_name=image_dto.image_name),
width=image_dto.width,
height=image_dto.height,
)
class ONNXModelLoaderOutput(BaseInvocationOutput):
"""Model loader output"""
# fmt: off
type: Literal["model_loader_output_onnx"] = "model_loader_output_onnx"
unet: UNetField = Field(default=None, description="UNet submodel")
clip: ClipField = Field(default=None, description="Tokenizer and text_encoder submodels")
vae_decoder: VaeField = Field(default=None, description="Vae submodel")
vae_encoder: VaeField = Field(default=None, description="Vae submodel")
# fmt: on
class ONNXSD1ModelLoaderInvocation(BaseInvocation):
"""Loading submodels of selected model."""
type: Literal["sd1_model_loader_onnx"] = "sd1_model_loader_onnx"
model_name: str = Field(default="", description="Model to load")
# TODO: precision?
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {"tags": ["model", "loader"], "type_hints": {"model_name": "model"}}, # TODO: rename to model_name?
}
def invoke(self, context: InvocationContext) -> ONNXModelLoaderOutput:
model_name = "stable-diffusion-v1-5"
base_model = BaseModelType.StableDiffusion1
# TODO: not found exceptions
if not context.services.model_manager.model_exists(
model_name=model_name,
base_model=BaseModelType.StableDiffusion1,
model_type=ModelType.ONNX,
):
raise Exception(f"Unkown model name: {model_name}!")
return ONNXModelLoaderOutput(
unet=UNetField(
unet=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=ModelType.ONNX,
submodel=SubModelType.UNet,
),
scheduler=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=ModelType.ONNX,
submodel=SubModelType.Scheduler,
),
loras=[],
),
clip=ClipField(
tokenizer=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=ModelType.ONNX,
submodel=SubModelType.Tokenizer,
),
text_encoder=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=ModelType.ONNX,
submodel=SubModelType.TextEncoder,
),
loras=[],
),
vae_decoder=VaeField(
vae=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=ModelType.ONNX,
submodel=SubModelType.VaeDecoder,
),
),
vae_encoder=VaeField(
vae=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=ModelType.ONNX,
submodel=SubModelType.VaeEncoder,
),
),
)
class OnnxModelField(BaseModel):
"""Onnx model field"""
model_name: str = Field(description="Name of the model")
base_model: BaseModelType = Field(description="Base model")
model_type: ModelType = Field(description="Model Type")
class OnnxModelLoaderInvocation(BaseInvocation):
"""Loads a main model, outputting its submodels."""
type: Literal["onnx_model_loader"] = "onnx_model_loader"
model: OnnxModelField = Field(description="The model to load")
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "Onnx Model Loader",
"tags": ["model", "loader"],
"type_hints": {"model": "model"},
},
}
def invoke(self, context: InvocationContext) -> ONNXModelLoaderOutput:
base_model = self.model.base_model
model_name = self.model.model_name
model_type = ModelType.ONNX
# TODO: not found exceptions
if not context.services.model_manager.model_exists(
model_name=model_name,
base_model=base_model,
model_type=model_type,
):
raise Exception(f"Unknown {base_model} {model_type} model: {model_name}")
"""
if not context.services.model_manager.model_exists(
model_name=self.model_name,
model_type=SDModelType.Diffusers,
submodel=SDModelType.Tokenizer,
):
raise Exception(
f"Failed to find tokenizer submodel in {self.model_name}! Check if model corrupted"
)
if not context.services.model_manager.model_exists(
model_name=self.model_name,
model_type=SDModelType.Diffusers,
submodel=SDModelType.TextEncoder,
):
raise Exception(
f"Failed to find text_encoder submodel in {self.model_name}! Check if model corrupted"
)
if not context.services.model_manager.model_exists(
model_name=self.model_name,
model_type=SDModelType.Diffusers,
submodel=SDModelType.UNet,
):
raise Exception(
f"Failed to find unet submodel from {self.model_name}! Check if model corrupted"
)
"""
return ONNXModelLoaderOutput(
unet=UNetField(
unet=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=model_type,
submodel=SubModelType.UNet,
),
scheduler=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=model_type,
submodel=SubModelType.Scheduler,
),
loras=[],
),
clip=ClipField(
tokenizer=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=model_type,
submodel=SubModelType.Tokenizer,
),
text_encoder=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=model_type,
submodel=SubModelType.TextEncoder,
),
loras=[],
skipped_layers=0,
),
vae_decoder=VaeField(
vae=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=model_type,
submodel=SubModelType.VaeDecoder,
),
),
vae_encoder=VaeField(
vae=ModelInfo(
model_name=model_name,
base_model=base_model,
model_type=model_type,
submodel=SubModelType.VaeEncoder,
),
),
)

View File

@ -4,6 +4,8 @@ from typing import Literal
from pydantic import Field
from invokeai.app.invocations.prompt import PromptOutput
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext
from .math import FloatOutput, IntOutput
@ -64,3 +66,18 @@ class ParamStringInvocation(BaseInvocation):
def invoke(self, context: InvocationContext) -> StringOutput:
return StringOutput(text=self.text)
class ParamPromptInvocation(BaseInvocation):
"""A prompt input parameter"""
type: Literal["param_prompt"] = "param_prompt"
prompt: str = Field(default="", description="The prompt value")
class Config(InvocationConfig):
schema_extra = {
"ui": {"tags": ["param", "prompt"], "title": "Prompt"},
}
def invoke(self, context: InvocationContext) -> PromptOutput:
return PromptOutput(prompt=self.prompt)

View File

@ -1,17 +1,10 @@
import torch
import inspect
from tqdm import tqdm
from typing import List, Literal, Optional, Union
from pydantic import Field, validator
from typing import Literal
from pydantic import Field
from ...backend.model_management import ModelType, SubModelType
from invokeai.app.util.step_callback import stable_diffusion_xl_step_callback
from .baseinvocation import BaseInvocation, BaseInvocationOutput, InvocationConfig, InvocationContext
from .model import UNetField, ClipField, VaeField, MainModelField, ModelInfo
from .compel import ConditioningField
from .latent import LatentsField, SAMPLER_NAME_VALUES, LatentsOutput, get_scheduler, build_latents_output
class SDXLModelLoaderOutput(BaseInvocationOutput):
@ -201,504 +194,3 @@ class SDXLRefinerModelLoaderInvocation(BaseInvocation):
),
),
)
# Text to image
class SDXLTextToLatentsInvocation(BaseInvocation):
"""Generates latents from conditionings."""
type: Literal["t2l_sdxl"] = "t2l_sdxl"
# Inputs
# fmt: off
positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation")
negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation")
noise: Optional[LatentsField] = Field(description="The noise to use")
steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
unet: UNetField = Field(default=None, description="UNet submodel")
denoising_end: float = Field(default=1.0, gt=0, le=1, description="")
# control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use")
# seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
# seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
# fmt: on
@validator("cfg_scale")
def ge_one(cls, v):
"""validate that all cfg_scale values are >= 1"""
if isinstance(v, list):
for i in v:
if i < 1:
raise ValueError("cfg_scale must be greater than 1")
else:
if v < 1:
raise ValueError("cfg_scale must be greater than 1")
return v
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "SDXL Text To Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
# "cfg_scale": "float",
"cfg_scale": "number",
},
},
}
def dispatch_progress(
self,
context: InvocationContext,
source_node_id: str,
sample,
step,
total_steps,
) -> None:
stable_diffusion_xl_step_callback(
context=context,
node=self.dict(),
source_node_id=source_node_id,
sample=sample,
step=step,
total_steps=total_steps,
)
# based on
# https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L375
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
latents = context.services.latents.get(self.noise.latents_name)
positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name)
prompt_embeds = positive_cond_data.conditionings[0].embeds
pooled_prompt_embeds = positive_cond_data.conditionings[0].pooled_embeds
add_time_ids = positive_cond_data.conditionings[0].add_time_ids
negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name)
negative_prompt_embeds = negative_cond_data.conditionings[0].embeds
negative_pooled_prompt_embeds = negative_cond_data.conditionings[0].pooled_embeds
add_neg_time_ids = negative_cond_data.conditionings[0].add_time_ids
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
)
num_inference_steps = self.steps
scheduler.set_timesteps(num_inference_steps)
timesteps = scheduler.timesteps
latents = latents * scheduler.init_noise_sigma
unet_info = context.services.model_manager.get_model(**self.unet.unet.dict(), context=context)
do_classifier_free_guidance = True
cross_attention_kwargs = None
with unet_info as unet:
extra_step_kwargs = dict()
if "eta" in set(inspect.signature(scheduler.step).parameters.keys()):
extra_step_kwargs.update(
eta=0.0,
)
if "generator" in set(inspect.signature(scheduler.step).parameters.keys()):
extra_step_kwargs.update(
generator=torch.Generator(device=unet.device).manual_seed(0),
)
num_warmup_steps = len(timesteps) - self.steps * scheduler.order
# apply denoising_end
skipped_final_steps = int(round((1 - self.denoising_end) * self.steps))
num_inference_steps = num_inference_steps - skipped_final_steps
timesteps = timesteps[: num_warmup_steps + scheduler.order * num_inference_steps]
if not context.services.configuration.sequential_guidance:
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
add_text_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0)
add_time_ids = torch.cat([add_neg_time_ids, add_time_ids], dim=0)
prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype)
add_text_embeds = add_text_embeds.to(device=unet.device, dtype=unet.dtype)
add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
with tqdm(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = scheduler.scale_model_input(latent_model_input, t)
# predict the noise residual
added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
noise_pred = unet(
latent_model_input,
t,
encoder_hidden_states=prompt_embeds,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
# perform guidance
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond)
# del noise_pred_uncond
# del noise_pred_text
# if do_classifier_free_guidance and guidance_rescale > 0.0:
# # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
# noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
# compute the previous noisy sample x_t -> x_t-1
latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0):
progress_bar.update()
self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps)
# if callback is not None and i % callback_steps == 0:
# callback(i, t, latents)
else:
negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype)
negative_prompt_embeds = negative_prompt_embeds.to(device=unet.device, dtype=unet.dtype)
add_neg_time_ids = add_neg_time_ids.to(device=unet.device, dtype=unet.dtype)
pooled_prompt_embeds = pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype)
prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype)
add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
with tqdm(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
# latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = scheduler.scale_model_input(latents, t)
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# predict the noise residual
added_cond_kwargs = {"text_embeds": negative_pooled_prompt_embeds, "time_ids": add_neg_time_ids}
noise_pred_uncond = unet(
latent_model_input,
t,
encoder_hidden_states=negative_prompt_embeds,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
added_cond_kwargs = {"text_embeds": pooled_prompt_embeds, "time_ids": add_time_ids}
noise_pred_text = unet(
latent_model_input,
t,
encoder_hidden_states=prompt_embeds,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
# perform guidance
noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond)
# del noise_pred_text
# del noise_pred_uncond
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# if do_classifier_free_guidance and guidance_rescale > 0.0:
# # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
# noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
# compute the previous noisy sample x_t -> x_t-1
latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
# del noise_pred
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0):
progress_bar.update()
self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps)
# if callback is not None and i % callback_steps == 0:
# callback(i, t, latents)
#################
latents = latents.to("cpu")
torch.cuda.empty_cache()
name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, latents)
return build_latents_output(latents_name=name, latents=latents)
class SDXLLatentsToLatentsInvocation(BaseInvocation):
"""Generates latents from conditionings."""
type: Literal["l2l_sdxl"] = "l2l_sdxl"
# Inputs
# fmt: off
positive_conditioning: Optional[ConditioningField] = Field(description="Positive conditioning for generation")
negative_conditioning: Optional[ConditioningField] = Field(description="Negative conditioning for generation")
noise: Optional[LatentsField] = Field(description="The noise to use")
steps: int = Field(default=10, gt=0, description="The number of steps to use to generate the image")
cfg_scale: Union[float, List[float]] = Field(default=7.5, ge=1, description="The Classifier-Free Guidance, higher values may result in a result closer to the prompt", )
scheduler: SAMPLER_NAME_VALUES = Field(default="euler", description="The scheduler to use" )
unet: UNetField = Field(default=None, description="UNet submodel")
latents: Optional[LatentsField] = Field(description="Initial latents")
denoising_start: float = Field(default=0.0, ge=0, le=1, description="")
denoising_end: float = Field(default=1.0, ge=0, le=1, description="")
# control: Union[ControlField, list[ControlField]] = Field(default=None, description="The control to use")
# seamless: bool = Field(default=False, description="Whether or not to generate an image that can tile without seams", )
# seamless_axes: str = Field(default="", description="The axes to tile the image on, 'x' and/or 'y'")
# fmt: on
@validator("cfg_scale")
def ge_one(cls, v):
"""validate that all cfg_scale values are >= 1"""
if isinstance(v, list):
for i in v:
if i < 1:
raise ValueError("cfg_scale must be greater than 1")
else:
if v < 1:
raise ValueError("cfg_scale must be greater than 1")
return v
# Schema customisation
class Config(InvocationConfig):
schema_extra = {
"ui": {
"title": "SDXL Latents to Latents",
"tags": ["latents"],
"type_hints": {
"model": "model",
# "cfg_scale": "float",
"cfg_scale": "number",
},
},
}
def dispatch_progress(
self,
context: InvocationContext,
source_node_id: str,
sample,
step,
total_steps,
) -> None:
stable_diffusion_xl_step_callback(
context=context,
node=self.dict(),
source_node_id=source_node_id,
sample=sample,
step=step,
total_steps=total_steps,
)
# based on
# https://github.com/huggingface/diffusers/blob/3ebbaf7c96801271f9e6c21400033b6aa5ffcf29/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py#L375
@torch.no_grad()
def invoke(self, context: InvocationContext) -> LatentsOutput:
graph_execution_state = context.services.graph_execution_manager.get(context.graph_execution_state_id)
source_node_id = graph_execution_state.prepared_source_mapping[self.id]
latents = context.services.latents.get(self.latents.latents_name)
positive_cond_data = context.services.latents.get(self.positive_conditioning.conditioning_name)
prompt_embeds = positive_cond_data.conditionings[0].embeds
pooled_prompt_embeds = positive_cond_data.conditionings[0].pooled_embeds
add_time_ids = positive_cond_data.conditionings[0].add_time_ids
negative_cond_data = context.services.latents.get(self.negative_conditioning.conditioning_name)
negative_prompt_embeds = negative_cond_data.conditionings[0].embeds
negative_pooled_prompt_embeds = negative_cond_data.conditionings[0].pooled_embeds
add_neg_time_ids = negative_cond_data.conditionings[0].add_time_ids
scheduler = get_scheduler(
context=context,
scheduler_info=self.unet.scheduler,
scheduler_name=self.scheduler,
)
# apply denoising_start
num_inference_steps = self.steps
scheduler.set_timesteps(num_inference_steps)
t_start = int(round(self.denoising_start * num_inference_steps))
timesteps = scheduler.timesteps[t_start * scheduler.order :]
num_inference_steps = num_inference_steps - t_start
# apply noise(if provided)
if self.noise is not None and timesteps.shape[0] > 0:
noise = context.services.latents.get(self.noise.latents_name)
latents = scheduler.add_noise(latents, noise, timesteps[:1])
del noise
unet_info = context.services.model_manager.get_model(
**self.unet.unet.dict(),
context=context,
)
do_classifier_free_guidance = True
cross_attention_kwargs = None
with unet_info as unet:
# apply scheduler extra args
extra_step_kwargs = dict()
if "eta" in set(inspect.signature(scheduler.step).parameters.keys()):
extra_step_kwargs.update(
eta=0.0,
)
if "generator" in set(inspect.signature(scheduler.step).parameters.keys()):
extra_step_kwargs.update(
generator=torch.Generator(device=unet.device).manual_seed(0),
)
num_warmup_steps = max(len(timesteps) - num_inference_steps * scheduler.order, 0)
# apply denoising_end
skipped_final_steps = int(round((1 - self.denoising_end) * self.steps))
num_inference_steps = num_inference_steps - skipped_final_steps
timesteps = timesteps[: num_warmup_steps + scheduler.order * num_inference_steps]
if not context.services.configuration.sequential_guidance:
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
add_text_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0)
add_time_ids = torch.cat([add_neg_time_ids, add_time_ids], dim=0)
prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype)
add_text_embeds = add_text_embeds.to(device=unet.device, dtype=unet.dtype)
add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
with tqdm(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = scheduler.scale_model_input(latent_model_input, t)
# predict the noise residual
added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
noise_pred = unet(
latent_model_input,
t,
encoder_hidden_states=prompt_embeds,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
# perform guidance
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond)
# del noise_pred_uncond
# del noise_pred_text
# if do_classifier_free_guidance and guidance_rescale > 0.0:
# # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
# noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
# compute the previous noisy sample x_t -> x_t-1
latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0):
progress_bar.update()
self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps)
# if callback is not None and i % callback_steps == 0:
# callback(i, t, latents)
else:
negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype)
negative_prompt_embeds = negative_prompt_embeds.to(device=unet.device, dtype=unet.dtype)
add_neg_time_ids = add_neg_time_ids.to(device=unet.device, dtype=unet.dtype)
pooled_prompt_embeds = pooled_prompt_embeds.to(device=unet.device, dtype=unet.dtype)
prompt_embeds = prompt_embeds.to(device=unet.device, dtype=unet.dtype)
add_time_ids = add_time_ids.to(device=unet.device, dtype=unet.dtype)
latents = latents.to(device=unet.device, dtype=unet.dtype)
with tqdm(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
# latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
latent_model_input = scheduler.scale_model_input(latents, t)
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# predict the noise residual
added_cond_kwargs = {"text_embeds": negative_pooled_prompt_embeds, "time_ids": add_time_ids}
noise_pred_uncond = unet(
latent_model_input,
t,
encoder_hidden_states=negative_prompt_embeds,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
added_cond_kwargs = {"text_embeds": pooled_prompt_embeds, "time_ids": add_time_ids}
noise_pred_text = unet(
latent_model_input,
t,
encoder_hidden_states=prompt_embeds,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
)[0]
# perform guidance
noise_pred = noise_pred_uncond + self.cfg_scale * (noise_pred_text - noise_pred_uncond)
# del noise_pred_text
# del noise_pred_uncond
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# if do_classifier_free_guidance and guidance_rescale > 0.0:
# # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
# noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
# compute the previous noisy sample x_t -> x_t-1
latents = scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
# del noise_pred
# import gc
# gc.collect()
# torch.cuda.empty_cache()
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % scheduler.order == 0):
progress_bar.update()
self.dispatch_progress(context, source_node_id, latents, i, num_inference_steps)
# if callback is not None and i % callback_steps == 0:
# callback(i, t, latents)
#################
latents = latents.to("cpu")
torch.cuda.empty_cache()
name = f"{context.graph_execution_state_id}__{self.id}"
context.services.latents.save(name, latents)
return build_latents_output(latents_name=name, latents=latents)

View File

@ -25,7 +25,6 @@ class BoardImageRecordStorageBase(ABC):
@abstractmethod
def remove_image_from_board(
self,
board_id: str,
image_name: str,
) -> None:
"""Removes an image from a board."""
@ -154,7 +153,6 @@ class SqliteBoardImageRecordStorage(BoardImageRecordStorageBase):
def remove_image_from_board(
self,
board_id: str,
image_name: str,
) -> None:
try:
@ -162,9 +160,9 @@ class SqliteBoardImageRecordStorage(BoardImageRecordStorageBase):
self._cursor.execute(
"""--sql
DELETE FROM board_images
WHERE board_id = ? AND image_name = ?;
WHERE image_name = ?;
""",
(board_id, image_name),
(image_name,),
)
self._conn.commit()
except sqlite3.Error as e:

View File

@ -31,7 +31,6 @@ class BoardImagesServiceABC(ABC):
@abstractmethod
def remove_image_from_board(
self,
board_id: str,
image_name: str,
) -> None:
"""Removes an image from a board."""
@ -93,10 +92,9 @@ class BoardImagesService(BoardImagesServiceABC):
def remove_image_from_board(
self,
board_id: str,
image_name: str,
) -> None:
self._services.board_image_records.remove_image_from_board(board_id, image_name)
self._services.board_image_records.remove_image_from_board(image_name)
def get_all_board_image_names_for_board(
self,

View File

@ -24,11 +24,10 @@ InvokeAI:
sequential_guidance: false
precision: float16
max_cache_size: 6
max_vram_cache_size: 2.7
max_vram_cache_size: 0.5
always_use_cpu: false
free_gpu_mem: false
Features:
restore: true
esrgan: true
patchmatch: true
internet_available: true
@ -165,15 +164,15 @@ import pydoc
import os
import sys
from argparse import ArgumentParser
from omegaconf import OmegaConf, DictConfig
from omegaconf import OmegaConf, DictConfig, ListConfig
from pathlib import Path
from pydantic import BaseSettings, Field, parse_obj_as
from typing import ClassVar, Dict, List, Set, Literal, Union, get_origin, get_type_hints, get_args
INIT_FILE = Path("invokeai.yaml")
MODEL_CORE = Path("models/core")
DB_FILE = Path("invokeai.db")
LEGACY_INIT_FILE = Path("invokeai.init")
DEFAULT_MAX_VRAM = 0.5
class InvokeAISettings(BaseSettings):
@ -190,7 +189,12 @@ class InvokeAISettings(BaseSettings):
opt = parser.parse_args(argv)
for name in self.__fields__:
if name not in self._excluded():
setattr(self, name, getattr(opt, name))
value = getattr(opt, name)
if isinstance(value, ListConfig):
value = list(value)
elif isinstance(value, DictConfig):
value = dict(value)
setattr(self, name, value)
def to_yaml(self) -> str:
"""
@ -283,14 +287,10 @@ class InvokeAISettings(BaseSettings):
return [
"type",
"initconf",
"gpu_mem_reserved",
"max_loaded_models",
"version",
"from_file",
"model",
"restore",
"root",
"nsfw_checker",
]
class Config:
@ -356,8 +356,8 @@ class InvokeAISettings(BaseSettings):
def _find_root() -> Path:
venv = Path(os.environ.get("VIRTUAL_ENV") or ".")
if os.environ.get("INVOKEAI_ROOT"):
root = Path(os.environ.get("INVOKEAI_ROOT")).resolve()
elif any([(venv.parent / x).exists() for x in [INIT_FILE, LEGACY_INIT_FILE, MODEL_CORE]]):
root = Path(os.environ["INVOKEAI_ROOT"])
elif any([(venv.parent / x).exists() for x in [INIT_FILE, LEGACY_INIT_FILE]]):
root = (venv.parent).resolve()
else:
root = Path("~/invokeai").expanduser().resolve()
@ -389,21 +389,17 @@ class InvokeAIAppConfig(InvokeAISettings):
internet_available : bool = Field(default=True, description="If true, attempt to download models on the fly; otherwise only use local models", category='Features')
log_tokenization : bool = Field(default=False, description="Enable logging of parsed prompt tokens.", category='Features')
patchmatch : bool = Field(default=True, description="Enable/disable patchmatch inpaint code", category='Features')
restore : bool = Field(default=True, description="Enable/disable face restoration code (DEPRECATED)", category='DEPRECATED')
always_use_cpu : bool = Field(default=False, description="If true, use the CPU for rendering even if a GPU is available.", category='Memory/Performance')
free_gpu_mem : bool = Field(default=False, description="If true, purge model from GPU after each generation.", category='Memory/Performance')
max_loaded_models : int = Field(default=3, gt=0, description="(DEPRECATED: use max_cache_size) Maximum number of models to keep in memory for rapid switching", category='DEPRECATED')
max_cache_size : float = Field(default=6.0, gt=0, description="Maximum memory amount used by model cache for rapid switching", category='Memory/Performance')
max_vram_cache_size : float = Field(default=2.75, ge=0, description="Amount of VRAM reserved for model storage", category='Memory/Performance')
gpu_mem_reserved : float = Field(default=2.75, ge=0, description="DEPRECATED: use max_vram_cache_size. Amount of VRAM reserved for model storage", category='DEPRECATED')
nsfw_checker : bool = Field(default=True, description="DEPRECATED: use Web settings to enable/disable", category='DEPRECATED')
precision : Literal[tuple(['auto','float16','float32','autocast'])] = Field(default='auto',description='Floating point precision', category='Memory/Performance')
sequential_guidance : bool = Field(default=False, description="Whether to calculate guidance in serial instead of in parallel, lowering memory requirements", category='Memory/Performance')
xformers_enabled : bool = Field(default=True, description="Enable/disable memory-efficient attention", category='Memory/Performance')
tiled_decode : bool = Field(default=False, description="Whether to enable tiled VAE decode (reduces memory consumption with some performance penalty)", category='Memory/Performance')
root : Path = Field(default=_find_root(), description='InvokeAI runtime root directory', category='Paths')
root : Path = Field(default=None, description='InvokeAI runtime root directory', category='Paths')
autoimport_dir : Path = Field(default='autoimport', description='Path to a directory of models files to be imported on startup.', category='Paths')
lora_dir : Path = Field(default=None, description='Path to a directory of LoRA/LyCORIS models to be imported on startup.', category='Paths')
embedding_dir : Path = Field(default=None, description='Path to a directory of Textual Inversion embeddings to be imported on startup.', category='Paths')
@ -415,8 +411,7 @@ class InvokeAIAppConfig(InvokeAISettings):
outdir : Path = Field(default='outputs', description='Default folder for output images', category='Paths')
from_file : Path = Field(default=None, description='Take command input from the indicated file (command-line client only)', category='Paths')
use_memory_db : bool = Field(default=False, description='Use in-memory database for storing image metadata', category='Paths')
model : str = Field(default='stable-diffusion-1.5', description='Initial model name', category='Models')
ignore_missing_core_models : bool = Field(default=False, description='Ignore missing models in models/core/convert', category='Features')
log_handlers : List[str] = Field(default=["console"], description='Log handler. Valid options are "console", "file=<path>", "syslog=path|address:host:port", "http=<url>"', category="Logging")
# note - would be better to read the log_format values from logging.py, but this creates circular dependencies issues
@ -426,6 +421,9 @@ class InvokeAIAppConfig(InvokeAISettings):
version : bool = Field(default=False, description="Show InvokeAI version and exit", category="Other")
# fmt: on
class Config:
validate_assignment = True
def parse_args(self, argv: List[str] = None, conf: DictConfig = None, clobber=False):
"""
Update settings with contents of init file, environment, and
@ -472,9 +470,11 @@ class InvokeAIAppConfig(InvokeAISettings):
Path to the runtime root directory
"""
if self.root:
return Path(self.root).expanduser().absolute()
root = Path(self.root).expanduser().absolute()
else:
return self.find_root()
root = self.find_root().expanduser().absolute()
self.root = root # insulate ourselves from relative paths that may change
return root
@property
def root_dir(self) -> Path:

View File

@ -1,4 +1,4 @@
from ..invocations.latent import LatentsToImageInvocation, TextToLatentsInvocation
from ..invocations.latent import LatentsToImageInvocation, DenoiseLatentsInvocation
from ..invocations.image import ImageNSFWBlurInvocation
from ..invocations.noise import NoiseInvocation
from ..invocations.compel import CompelInvocation
@ -23,7 +23,7 @@ def create_text_to_image() -> LibraryGraph:
"3": NoiseInvocation(id="3"),
"4": CompelInvocation(id="4"),
"5": CompelInvocation(id="5"),
"6": TextToLatentsInvocation(id="6"),
"6": DenoiseLatentsInvocation(id="6"),
"7": LatentsToImageInvocation(id="7"),
"8": ImageNSFWBlurInvocation(id="8"),
},

View File

@ -35,6 +35,7 @@ class EventServiceBase:
source_node_id: str,
progress_image: Optional[ProgressImage],
step: int,
order: int,
total_steps: int,
) -> None:
"""Emitted when there is generation progress"""
@ -46,6 +47,7 @@ class EventServiceBase:
source_node_id=source_node_id,
progress_image=progress_image.dict() if progress_image is not None else None,
step=step,
order=order,
total_steps=total_steps,
),
)

View File

@ -289,9 +289,10 @@ class ImageService(ImageServiceABC):
def get_metadata(self, image_name: str) -> Optional[ImageMetadata]:
try:
image_record = self._services.image_records.get(image_name)
metadata = self._services.image_records.get_metadata(image_name)
if not image_record.session_id:
return ImageMetadata()
return ImageMetadata(metadata=metadata)
session_raw = self._services.graph_execution_manager.get_raw(image_record.session_id)
graph = None
@ -303,7 +304,6 @@ class ImageService(ImageServiceABC):
self._services.logger.warn(f"Failed to parse session graph: {e}")
graph = None
metadata = self._services.image_records.get_metadata(image_name)
return ImageMetadata(graph=graph, metadata=metadata)
except ImageRecordNotFoundException:
self._services.logger.error("Image record not found")

View File

@ -32,6 +32,7 @@ class InvocationServices:
logger: "Logger"
model_manager: "ModelManagerServiceBase"
processor: "InvocationProcessorABC"
performance_statistics: "InvocationStatsServiceBase"
queue: "InvocationQueueABC"
def __init__(
@ -47,6 +48,7 @@ class InvocationServices:
logger: "Logger",
model_manager: "ModelManagerServiceBase",
processor: "InvocationProcessorABC",
performance_statistics: "InvocationStatsServiceBase",
queue: "InvocationQueueABC",
):
self.board_images = board_images
@ -61,4 +63,5 @@ class InvocationServices:
self.logger = logger
self.model_manager = model_manager
self.processor = processor
self.performance_statistics = performance_statistics
self.queue = queue

View File

@ -0,0 +1,223 @@
# Copyright 2023 Lincoln D. Stein <lincoln.stein@gmail.com>
"""Utility to collect execution time and GPU usage stats on invocations in flight"""
"""
Usage:
statistics = InvocationStatsService(graph_execution_manager)
with statistics.collect_stats(invocation, graph_execution_state.id):
... execute graphs...
statistics.log_stats()
Typical output:
[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> Graph stats: c7764585-9c68-4d9d-a199-55e8186790f3
[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> Node Calls Seconds VRAM Used
[2023-08-02 18:03:04,507]::[InvokeAI]::INFO --> main_model_loader 1 0.005s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> clip_skip 1 0.004s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> compel 2 0.512s 0.26G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> rand_int 1 0.001s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> range_of_size 1 0.001s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> iterate 1 0.001s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> metadata_accumulator 1 0.002s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> noise 1 0.002s 0.01G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> t2l 1 3.541s 1.93G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> l2i 1 0.679s 0.58G
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> TOTAL GRAPH EXECUTION TIME: 4.749s
[2023-08-02 18:03:04,508]::[InvokeAI]::INFO --> Current VRAM utilization 0.01G
The abstract base class for this class is InvocationStatsServiceBase. An implementing class which
writes to the system log is stored in InvocationServices.performance_statistics.
"""
import time
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from dataclasses import dataclass, field
from typing import Dict
import torch
import invokeai.backend.util.logging as logger
from ..invocations.baseinvocation import BaseInvocation
from .graph import GraphExecutionState
from .item_storage import ItemStorageABC
class InvocationStatsServiceBase(ABC):
"Abstract base class for recording node memory/time performance statistics"
@abstractmethod
def __init__(self, graph_execution_manager: ItemStorageABC["GraphExecutionState"]):
"""
Initialize the InvocationStatsService and reset counters to zero
:param graph_execution_manager: Graph execution manager for this session
"""
pass
@abstractmethod
def collect_stats(
self,
invocation: BaseInvocation,
graph_execution_state_id: str,
) -> AbstractContextManager:
"""
Return a context object that will capture the statistics on the execution
of invocaation. Use with: to place around the part of the code that executes the invocation.
:param invocation: BaseInvocation object from the current graph.
:param graph_execution_state: GraphExecutionState object from the current session.
"""
pass
@abstractmethod
def reset_stats(self, graph_execution_state_id: str):
"""
Reset all statistics for the indicated graph
:param graph_execution_state_id
"""
pass
@abstractmethod
def reset_all_stats(self):
"""Zero all statistics"""
pass
@abstractmethod
def update_invocation_stats(
self,
graph_id: str,
invocation_type: str,
time_used: float,
vram_used: float,
):
"""
Add timing information on execution of a node. Usually
used internally.
:param graph_id: ID of the graph that is currently executing
:param invocation_type: String literal type of the node
:param time_used: Time used by node's exection (sec)
:param vram_used: Maximum VRAM used during exection (GB)
"""
pass
@abstractmethod
def log_stats(self):
"""
Write out the accumulated statistics to the log or somewhere else.
"""
pass
@dataclass
class NodeStats:
"""Class for tracking execution stats of an invocation node"""
calls: int = 0
time_used: float = 0.0 # seconds
max_vram: float = 0.0 # GB
@dataclass
class NodeLog:
"""Class for tracking node usage"""
# {node_type => NodeStats}
nodes: Dict[str, NodeStats] = field(default_factory=dict)
class InvocationStatsService(InvocationStatsServiceBase):
"""Accumulate performance information about a running graph. Collects time spent in each node,
as well as the maximum and current VRAM utilisation for CUDA systems"""
def __init__(self, graph_execution_manager: ItemStorageABC["GraphExecutionState"]):
self.graph_execution_manager = graph_execution_manager
# {graph_id => NodeLog}
self._stats: Dict[str, NodeLog] = {}
class StatsContext:
def __init__(self, invocation: BaseInvocation, graph_id: str, collector: "InvocationStatsServiceBase"):
self.invocation = invocation
self.collector = collector
self.graph_id = graph_id
self.start_time = 0
def __enter__(self):
self.start_time = time.time()
if torch.cuda.is_available():
torch.cuda.reset_peak_memory_stats()
def __exit__(self, *args):
self.collector.update_invocation_stats(
self.graph_id,
self.invocation.type,
time.time() - self.start_time,
torch.cuda.max_memory_allocated() / 1e9 if torch.cuda.is_available() else 0.0,
)
def collect_stats(
self,
invocation: BaseInvocation,
graph_execution_state_id: str,
) -> StatsContext:
"""
Return a context object that will capture the statistics.
:param invocation: BaseInvocation object from the current graph.
:param graph_execution_state: GraphExecutionState object from the current session.
"""
if not self._stats.get(graph_execution_state_id): # first time we're seeing this
self._stats[graph_execution_state_id] = NodeLog()
return self.StatsContext(invocation, graph_execution_state_id, self)
def reset_all_stats(self):
"""Zero all statistics"""
self._stats = {}
def reset_stats(self, graph_execution_id: str):
"""Zero the statistics for the indicated graph."""
try:
self._stats.pop(graph_execution_id)
except KeyError:
logger.warning(f"Attempted to clear statistics for unknown graph {graph_execution_id}")
def update_invocation_stats(self, graph_id: str, invocation_type: str, time_used: float, vram_used: float):
"""
Add timing information on execution of a node. Usually
used internally.
:param graph_id: ID of the graph that is currently executing
:param invocation_type: String literal type of the node
:param time_used: Floating point seconds used by node's exection
"""
if not self._stats[graph_id].nodes.get(invocation_type):
self._stats[graph_id].nodes[invocation_type] = NodeStats()
stats = self._stats[graph_id].nodes[invocation_type]
stats.calls += 1
stats.time_used += time_used
stats.max_vram = max(stats.max_vram, vram_used)
def log_stats(self):
"""
Send the statistics to the system logger at the info level.
Stats will only be printed if when the execution of the graph
is complete.
"""
completed = set()
for graph_id, node_log in self._stats.items():
current_graph_state = self.graph_execution_manager.get(graph_id)
if not current_graph_state.is_complete():
continue
total_time = 0
logger.info(f"Graph stats: {graph_id}")
logger.info("Node Calls Seconds VRAM Used")
for node_type, stats in self._stats[graph_id].nodes.items():
logger.info(f"{node_type:<20} {stats.calls:>5} {stats.time_used:7.3f}s {stats.max_vram:4.2f}G")
total_time += stats.time_used
logger.info(f"TOTAL GRAPH EXECUTION TIME: {total_time:7.3f}s")
if torch.cuda.is_available():
logger.info("Current VRAM utilization " + "%4.2fG" % (torch.cuda.memory_allocated() / 1e9))
completed.add(graph_id)
for graph_id in completed:
del self._stats[graph_id]

View File

@ -3,9 +3,10 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from logging import Logger
from pathlib import Path
from pydantic import Field
from typing import Optional, Union, Callable, List, Tuple, TYPE_CHECKING
from typing import Literal, Optional, Union, Callable, List, Tuple, TYPE_CHECKING
from types import ModuleType
from invokeai.backend.model_management import (
@ -193,7 +194,7 @@ class ModelManagerServiceBase(ABC):
self,
model_name: str,
base_model: BaseModelType,
model_type: Union[ModelType.Main, ModelType.Vae],
model_type: Literal[ModelType.Main, ModelType.Vae],
) -> AddModelResult:
"""
Convert a checkpoint file into a diffusers folder, deleting the cached
@ -292,7 +293,7 @@ class ModelManagerService(ModelManagerServiceBase):
def __init__(
self,
config: InvokeAIAppConfig,
logger: ModuleType,
logger: Logger,
):
"""
Initialize with the path to the models.yaml config file.
@ -396,7 +397,7 @@ class ModelManagerService(ModelManagerServiceBase):
model_type,
)
def model_info(self, model_name: str, base_model: BaseModelType, model_type: ModelType) -> dict:
def model_info(self, model_name: str, base_model: BaseModelType, model_type: ModelType) -> Union[dict, None]:
"""
Given a model name returns a dict-like (OmegaConf) object describing it.
"""
@ -416,7 +417,7 @@ class ModelManagerService(ModelManagerServiceBase):
"""
return self.mgr.list_models(base_model, model_type)
def list_model(self, model_name: str, base_model: BaseModelType, model_type: ModelType) -> dict:
def list_model(self, model_name: str, base_model: BaseModelType, model_type: ModelType) -> Union[dict, None]:
"""
Return information about the model using the same format as list_models()
"""
@ -429,7 +430,7 @@ class ModelManagerService(ModelManagerServiceBase):
model_type: ModelType,
model_attributes: dict,
clobber: bool = False,
) -> None:
) -> AddModelResult:
"""
Update the named model with a dictionary of attributes. Will fail with an
assertion error if the name already exists. Pass clobber=True to overwrite.
@ -478,7 +479,7 @@ class ModelManagerService(ModelManagerServiceBase):
self,
model_name: str,
base_model: BaseModelType,
model_type: Union[ModelType.Main, ModelType.Vae],
model_type: Literal[ModelType.Main, ModelType.Vae],
convert_dest_directory: Optional[Path] = Field(
default=None, description="Optional directory location for merged model"
),
@ -573,9 +574,9 @@ class ModelManagerService(ModelManagerServiceBase):
default=None, description="Base model shared by all models to be merged"
),
merged_model_name: str = Field(default=None, description="Name of destination model after merging"),
alpha: Optional[float] = 0.5,
alpha: float = 0.5,
interp: Optional[MergeInterpolationMethod] = None,
force: Optional[bool] = False,
force: bool = False,
merge_dest_directory: Optional[Path] = Field(
default=None, description="Optional directory location for merged model"
),
@ -633,8 +634,8 @@ class ModelManagerService(ModelManagerServiceBase):
model_name: str,
base_model: BaseModelType,
model_type: ModelType,
new_name: str = None,
new_base: BaseModelType = None,
new_name: Optional[str] = None,
new_base: Optional[BaseModelType] = None,
):
"""
Rename the indicated model. Can provide a new name and/or a new base.

View File

@ -0,0 +1,8 @@
from pydantic import Field
from invokeai.app.util.model_exclude_null import BaseModelExcludeNull
class BoardImage(BaseModelExcludeNull):
board_id: str = Field(description="The id of the board")
image_name: str = Field(description="The name of the image")

View File

@ -1,10 +1,11 @@
from typing import Optional, Union
from datetime import datetime
from pydantic import BaseModel, Extra, Field, StrictBool, StrictStr
from pydantic import Field
from invokeai.app.util.misc import get_iso_timestamp
from invokeai.app.util.model_exclude_null import BaseModelExcludeNull
class BoardRecord(BaseModel):
class BoardRecord(BaseModelExcludeNull):
"""Deserialized board record."""
board_id: str = Field(description="The unique ID of the board.")

View File

@ -1,13 +1,14 @@
import datetime
from typing import Optional, Union
from pydantic import BaseModel, Extra, Field, StrictBool, StrictStr
from pydantic import Extra, Field, StrictBool, StrictStr
from invokeai.app.models.image import ImageCategory, ResourceOrigin
from invokeai.app.util.misc import get_iso_timestamp
from invokeai.app.util.model_exclude_null import BaseModelExcludeNull
class ImageRecord(BaseModel):
class ImageRecord(BaseModelExcludeNull):
"""Deserialized image record without metadata."""
image_name: str = Field(description="The unique name of the image.")
@ -40,7 +41,7 @@ class ImageRecord(BaseModel):
"""The node ID that generated this image, if it is a generated image."""
class ImageRecordChanges(BaseModel, extra=Extra.forbid):
class ImageRecordChanges(BaseModelExcludeNull, extra=Extra.forbid):
"""A set of changes to apply to an image record.
Only limited changes are valid:
@ -60,7 +61,7 @@ class ImageRecordChanges(BaseModel, extra=Extra.forbid):
"""The image's new `is_intermediate` flag."""
class ImageUrlsDTO(BaseModel):
class ImageUrlsDTO(BaseModelExcludeNull):
"""The URLs for an image and its thumbnail."""
image_name: str = Field(description="The unique name of the image.")
@ -76,11 +77,15 @@ class ImageDTO(ImageRecord, ImageUrlsDTO):
board_id: Optional[str] = Field(description="The id of the board the image belongs to, if one exists.")
"""The id of the board the image belongs to, if one exists."""
pass
def image_record_to_dto(
image_record: ImageRecord, image_url: str, thumbnail_url: str, board_id: Optional[str]
image_record: ImageRecord,
image_url: str,
thumbnail_url: str,
board_id: Optional[str],
) -> ImageDTO:
"""Converts an image record to an image DTO."""
return ImageDTO(

View File

@ -1,14 +1,15 @@
import time
import traceback
from threading import Event, Thread, BoundedSemaphore
from ..invocations.baseinvocation import InvocationContext
from .invocation_queue import InvocationQueueItem
from .invoker import InvocationProcessorABC, Invoker
from ..models.exceptions import CanceledException
from threading import BoundedSemaphore, Event, Thread
import invokeai.backend.util.logging as logger
from ..invocations.baseinvocation import InvocationContext
from ..models.exceptions import CanceledException
from .invocation_queue import InvocationQueueItem
from .invocation_stats import InvocationStatsServiceBase
from .invoker import InvocationProcessorABC, Invoker
class DefaultInvocationProcessor(InvocationProcessorABC):
__invoker_thread: Thread
@ -35,6 +36,8 @@ class DefaultInvocationProcessor(InvocationProcessorABC):
def __process(self, stop_event: Event):
try:
self.__threadLimit.acquire()
statistics: InvocationStatsServiceBase = self.__invoker.services.performance_statistics
while not stop_event.is_set():
try:
queue_item: InvocationQueueItem = self.__invoker.services.queue.get()
@ -83,35 +86,38 @@ class DefaultInvocationProcessor(InvocationProcessorABC):
# Invoke
try:
outputs = invocation.invoke(
InvocationContext(
services=self.__invoker.services,
graph_execution_state_id=graph_execution_state.id,
with statistics.collect_stats(invocation, graph_execution_state.id):
outputs = invocation.invoke(
InvocationContext(
services=self.__invoker.services,
graph_execution_state_id=graph_execution_state.id,
)
)
)
# Check queue to see if this is canceled, and skip if so
if self.__invoker.services.queue.is_canceled(graph_execution_state.id):
continue
# Check queue to see if this is canceled, and skip if so
if self.__invoker.services.queue.is_canceled(graph_execution_state.id):
continue
# Save outputs and history
graph_execution_state.complete(invocation.id, outputs)
# Save outputs and history
graph_execution_state.complete(invocation.id, outputs)
# Save the state changes
self.__invoker.services.graph_execution_manager.set(graph_execution_state)
# Save the state changes
self.__invoker.services.graph_execution_manager.set(graph_execution_state)
# Send complete event
self.__invoker.services.events.emit_invocation_complete(
graph_execution_state_id=graph_execution_state.id,
node=invocation.dict(),
source_node_id=source_node_id,
result=outputs.dict(),
)
# Send complete event
self.__invoker.services.events.emit_invocation_complete(
graph_execution_state_id=graph_execution_state.id,
node=invocation.dict(),
source_node_id=source_node_id,
result=outputs.dict(),
)
statistics.log_stats()
except KeyboardInterrupt:
pass
except CanceledException:
statistics.reset_stats(graph_execution_state.id)
pass
except Exception as e:
@ -133,7 +139,7 @@ class DefaultInvocationProcessor(InvocationProcessorABC):
error_type=e.__class__.__name__,
error=error,
)
statistics.reset_stats(graph_execution_state.id)
pass
# Check queue to see if this is canceled, and skip if so

View File

@ -20,6 +20,6 @@ class LocalUrlService(UrlServiceBase):
# These paths are determined by the routes in invokeai/app/api/routers/images.py
if thumbnail:
return f"{self._base_url}/images/{image_basename}/thumbnail"
return f"{self._base_url}/images/i/{image_basename}/thumbnail"
return f"{self._base_url}/images/{image_basename}/full"
return f"{self._base_url}/images/i/{image_basename}/full"

View File

@ -18,5 +18,5 @@ SEED_MAX = np.iinfo(np.uint32).max
def get_random_seed():
rng = np.random.default_rng(seed=0)
rng = np.random.default_rng(seed=None)
return int(rng.integers(0, SEED_MAX))

View File

@ -0,0 +1,23 @@
from typing import Any
from pydantic import BaseModel
"""
We want to exclude null values from objects that make their way to the client.
Unfortunately there is no built-in way to do this in pydantic, so we need to override the default
dict method to do this.
From https://github.com/tiangolo/fastapi/discussions/8882#discussioncomment-5154541
"""
class BaseModelExcludeNull(BaseModel):
def dict(self, *args, **kwargs) -> dict[str, Any]:
"""
Override the default dict method to exclude None values in the response
"""
kwargs.pop("exclude_none", None)
return super().dict(*args, exclude_none=True, **kwargs)
pass

View File

@ -4,9 +4,9 @@ from invokeai.app.models.exceptions import CanceledException
from invokeai.app.models.image import ProgressImage
from ..invocations.baseinvocation import InvocationContext
from ...backend.util.util import image_to_dataURL
from ...backend.generator.base import Generator
from ...backend.stable_diffusion import PipelineIntermediateState
from invokeai.app.services.config import InvokeAIAppConfig
from ...backend.model_management.models import BaseModelType
def sample_to_lowres_estimated_image(samples, latent_rgb_factors, smooth_matrix=None):
@ -29,6 +29,7 @@ def stable_diffusion_step_callback(
intermediate_state: PipelineIntermediateState,
node: dict,
source_node_id: str,
base_model: BaseModelType,
):
if context.services.queue.is_canceled(context.graph_execution_state_id):
raise CanceledException
@ -56,23 +57,50 @@ def stable_diffusion_step_callback(
# TODO: only output a preview image when requested
# origingally adapted from code by @erucipe and @keturn here:
# https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
if base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
# fast latents preview matrix for sdxl
# generated by @StAlKeR7779
sdxl_latent_rgb_factors = torch.tensor(
[
# R G B
[0.3816, 0.4930, 0.5320],
[-0.3753, 0.1631, 0.1739],
[0.1770, 0.3588, -0.2048],
[-0.4350, -0.2644, -0.4289],
],
dtype=sample.dtype,
device=sample.device,
)
# these updated numbers for v1.5 are from @torridgristle
v1_5_latent_rgb_factors = torch.tensor(
[
# R G B
[0.3444, 0.1385, 0.0670], # L1
[0.1247, 0.4027, 0.1494], # L2
[-0.3192, 0.2513, 0.2103], # L3
[-0.1307, -0.1874, -0.7445], # L4
],
dtype=sample.dtype,
device=sample.device,
)
sdxl_smooth_matrix = torch.tensor(
[
[0.0358, 0.0964, 0.0358],
[0.0964, 0.4711, 0.0964],
[0.0358, 0.0964, 0.0358],
],
dtype=sample.dtype,
device=sample.device,
)
image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)
image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
else:
# origingally adapted from code by @erucipe and @keturn here:
# https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
# these updated numbers for v1.5 are from @torridgristle
v1_5_latent_rgb_factors = torch.tensor(
[
# R G B
[0.3444, 0.1385, 0.0670], # L1
[0.1247, 0.4027, 0.1494], # L2
[-0.3192, 0.2513, 0.2103], # L3
[-0.1307, -0.1874, -0.7445], # L4
],
dtype=sample.dtype,
device=sample.device,
)
image = sample_to_lowres_estimated_image(sample, v1_5_latent_rgb_factors)
(width, height) = image.size
width *= 8
@ -86,59 +114,6 @@ def stable_diffusion_step_callback(
source_node_id=source_node_id,
progress_image=ProgressImage(width=width, height=height, dataURL=dataURL),
step=intermediate_state.step,
total_steps=node["steps"],
)
def stable_diffusion_xl_step_callback(
context: InvocationContext,
node: dict,
source_node_id: str,
sample,
step,
total_steps,
):
if context.services.queue.is_canceled(context.graph_execution_state_id):
raise CanceledException
sdxl_latent_rgb_factors = torch.tensor(
[
# R G B
[0.3816, 0.4930, 0.5320],
[-0.3753, 0.1631, 0.1739],
[0.1770, 0.3588, -0.2048],
[-0.4350, -0.2644, -0.4289],
],
dtype=sample.dtype,
device=sample.device,
)
sdxl_smooth_matrix = torch.tensor(
[
# [ 0.0478, 0.1285, 0.0478],
# [ 0.1285, 0.2948, 0.1285],
# [ 0.0478, 0.1285, 0.0478],
[0.0358, 0.0964, 0.0358],
[0.0964, 0.4711, 0.0964],
[0.0358, 0.0964, 0.0358],
],
dtype=sample.dtype,
device=sample.device,
)
image = sample_to_lowres_estimated_image(sample, sdxl_latent_rgb_factors, sdxl_smooth_matrix)
(width, height) = image.size
width *= 8
height *= 8
dataURL = image_to_dataURL(image, image_format="JPEG")
context.services.events.emit_generator_progress(
graph_execution_state_id=context.graph_execution_state_id,
node=node,
source_node_id=source_node_id,
progress_image=ProgressImage(width=width, height=height, dataURL=dataURL),
step=step,
total_steps=total_steps,
order=intermediate_state.order,
total_steps=intermediate_state.total_steps,
)

View File

@ -1,6 +1,5 @@
"""
Initialization file for invokeai.backend
"""
from .generator import InvokeAIGeneratorBasicParams, InvokeAIGenerator, InvokeAIGeneratorOutput, Img2Img, Inpaint
from .model_management import ModelManager, ModelCache, BaseModelType, ModelType, SubModelType, ModelInfo
from .model_management.models import SilenceWarnings

View File

@ -1,12 +0,0 @@
"""
Initialization file for the invokeai.generator package
"""
from .base import (
InvokeAIGenerator,
InvokeAIGeneratorBasicParams,
InvokeAIGeneratorOutput,
Img2Img,
Inpaint,
Generator,
)
from .inpaint import infill_methods

View File

@ -1,559 +0,0 @@
"""
Base class for invokeai.backend.generator.*
including img2img, txt2img, and inpaint
"""
from __future__ import annotations
import itertools
import dataclasses
import diffusers
import os
import random
import traceback
from abc import ABCMeta
from argparse import Namespace
from contextlib import nullcontext
import cv2
import numpy as np
import torch
from PIL import Image, ImageChops, ImageFilter
from accelerate.utils import set_seed
from diffusers import DiffusionPipeline
from tqdm import trange
from typing import Callable, List, Iterator, Optional, Type, Union
from dataclasses import dataclass, field
from diffusers.schedulers import SchedulerMixin as Scheduler
import invokeai.backend.util.logging as logger
from ..image_util import configure_model_padding
from ..util.util import rand_perlin_2d
from ..stable_diffusion.diffusers_pipeline import StableDiffusionGeneratorPipeline
from ..stable_diffusion.schedulers import SCHEDULER_MAP
downsampling = 8
@dataclass
class InvokeAIGeneratorBasicParams:
seed: Optional[int] = None
width: int = 512
height: int = 512
cfg_scale: float = 7.5
steps: int = 20
ddim_eta: float = 0.0
scheduler: str = "ddim"
precision: str = "float16"
perlin: float = 0.0
threshold: float = 0.0
seamless: bool = False
seamless_axes: List[str] = field(default_factory=lambda: ["x", "y"])
h_symmetry_time_pct: Optional[float] = None
v_symmetry_time_pct: Optional[float] = None
variation_amount: float = 0.0
with_variations: list = field(default_factory=list)
@dataclass
class InvokeAIGeneratorOutput:
"""
InvokeAIGeneratorOutput is a dataclass that contains the outputs of a generation
operation, including the image, its seed, the model name used to generate the image
and the model hash, as well as all the generate() parameters that went into
generating the image (in .params, also available as attributes)
"""
image: Image.Image
seed: int
model_hash: str
attention_maps_images: List[Image.Image]
params: Namespace
# we are interposing a wrapper around the original Generator classes so that
# old code that calls Generate will continue to work.
class InvokeAIGenerator(metaclass=ABCMeta):
def __init__(
self,
model_info: dict,
params: InvokeAIGeneratorBasicParams = InvokeAIGeneratorBasicParams(),
**kwargs,
):
self.model_info = model_info
self.params = params
self.kwargs = kwargs
def generate(
self,
conditioning: tuple,
scheduler,
callback: Optional[Callable] = None,
step_callback: Optional[Callable] = None,
iterations: int = 1,
**keyword_args,
) -> Iterator[InvokeAIGeneratorOutput]:
"""
Return an iterator across the indicated number of generations.
Each time the iterator is called it will return an InvokeAIGeneratorOutput
object. Use like this:
outputs = txt2img.generate(prompt='banana sushi', iterations=5)
for result in outputs:
print(result.image, result.seed)
In the typical case of wanting to get just a single image, iterations
defaults to 1 and do:
output = next(txt2img.generate(prompt='banana sushi')
Pass None to get an infinite iterator.
outputs = txt2img.generate(prompt='banana sushi', iterations=None)
for o in outputs:
print(o.image, o.seed)
"""
generator_args = dataclasses.asdict(self.params)
generator_args.update(keyword_args)
model_info = self.model_info
model_name = model_info.name
model_hash = model_info.hash
with model_info.context as model:
gen_class = self._generator_class()
generator = gen_class(model, self.params.precision, **self.kwargs)
if self.params.variation_amount > 0:
generator.set_variation(
generator_args.get("seed"),
generator_args.get("variation_amount"),
generator_args.get("with_variations"),
)
if isinstance(model, DiffusionPipeline):
for component in [model.unet, model.vae]:
configure_model_padding(
component, generator_args.get("seamless", False), generator_args.get("seamless_axes")
)
else:
configure_model_padding(
model, generator_args.get("seamless", False), generator_args.get("seamless_axes")
)
iteration_count = range(iterations) if iterations else itertools.count(start=0, step=1)
for i in iteration_count:
results = generator.generate(
conditioning=conditioning,
step_callback=step_callback,
sampler=scheduler,
**generator_args,
)
output = InvokeAIGeneratorOutput(
image=results[0][0],
seed=results[0][1],
attention_maps_images=results[0][2],
model_hash=model_hash,
params=Namespace(model_name=model_name, **generator_args),
)
if callback:
callback(output)
yield output
@classmethod
def schedulers(self) -> List[str]:
"""
Return list of all the schedulers that we currently handle.
"""
return list(SCHEDULER_MAP.keys())
def load_generator(self, model: StableDiffusionGeneratorPipeline, generator_class: Type[Generator]):
return generator_class(model, self.params.precision)
@classmethod
def _generator_class(cls) -> Type[Generator]:
"""
In derived classes return the name of the generator to apply.
If you don't override will return the name of the derived
class, which nicely parallels the generator class names.
"""
return Generator
# ------------------------------------
class Img2Img(InvokeAIGenerator):
def generate(
self, init_image: Union[Image.Image, torch.FloatTensor], strength: float = 0.75, **keyword_args
) -> Iterator[InvokeAIGeneratorOutput]:
return super().generate(init_image=init_image, strength=strength, **keyword_args)
@classmethod
def _generator_class(cls):
from .img2img import Img2Img
return Img2Img
# ------------------------------------
# Takes all the arguments of Img2Img and adds the mask image and the seam/infill stuff
class Inpaint(Img2Img):
def generate(
self,
mask_image: Union[Image.Image, torch.FloatTensor],
# Seam settings - when 0, doesn't fill seam
seam_size: int = 96,
seam_blur: int = 16,
seam_strength: float = 0.7,
seam_steps: int = 30,
tile_size: int = 32,
inpaint_replace=False,
infill_method=None,
inpaint_width=None,
inpaint_height=None,
inpaint_fill: tuple(int) = (0x7F, 0x7F, 0x7F, 0xFF),
**keyword_args,
) -> Iterator[InvokeAIGeneratorOutput]:
return super().generate(
mask_image=mask_image,
seam_size=seam_size,
seam_blur=seam_blur,
seam_strength=seam_strength,
seam_steps=seam_steps,
tile_size=tile_size,
inpaint_replace=inpaint_replace,
infill_method=infill_method,
inpaint_width=inpaint_width,
inpaint_height=inpaint_height,
inpaint_fill=inpaint_fill,
**keyword_args,
)
@classmethod
def _generator_class(cls):
from .inpaint import Inpaint
return Inpaint
class Generator:
downsampling_factor: int
latent_channels: int
precision: str
model: DiffusionPipeline
def __init__(self, model: DiffusionPipeline, precision: str, **kwargs):
self.model = model
self.precision = precision
self.seed = None
self.latent_channels = model.unet.config.in_channels
self.downsampling_factor = downsampling # BUG: should come from model or config
self.perlin = 0.0
self.threshold = 0
self.variation_amount = 0
self.with_variations = []
self.use_mps_noise = False
self.free_gpu_mem = None
# this is going to be overridden in img2img.py, txt2img.py and inpaint.py
def get_make_image(self, **kwargs):
"""
Returns a function returning an image derived from the prompt and the initial image
Return value depends on the seed at the time you call it
"""
raise NotImplementedError("image_iterator() must be implemented in a descendent class")
def set_variation(self, seed, variation_amount, with_variations):
self.seed = seed
self.variation_amount = variation_amount
self.with_variations = with_variations
def generate(
self,
width,
height,
sampler,
init_image=None,
iterations=1,
seed=None,
image_callback=None,
step_callback=None,
threshold=0.0,
perlin=0.0,
h_symmetry_time_pct=None,
v_symmetry_time_pct=None,
free_gpu_mem: bool = False,
**kwargs,
):
scope = nullcontext
self.free_gpu_mem = free_gpu_mem
attention_maps_images = []
attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image())
make_image = self.get_make_image(
sampler=sampler,
init_image=init_image,
width=width,
height=height,
step_callback=step_callback,
threshold=threshold,
perlin=perlin,
h_symmetry_time_pct=h_symmetry_time_pct,
v_symmetry_time_pct=v_symmetry_time_pct,
attention_maps_callback=attention_maps_callback,
**kwargs,
)
results = []
seed = seed if seed is not None and seed >= 0 else self.new_seed()
first_seed = seed
seed, initial_noise = self.generate_initial_noise(seed, width, height)
# There used to be an additional self.model.ema_scope() here, but it breaks
# the inpaint-1.5 model. Not sure what it did.... ?
with scope(self.model.device.type):
for n in trange(iterations, desc="Generating"):
x_T = None
if self.variation_amount > 0:
set_seed(seed)
target_noise = self.get_noise(width, height)
x_T = self.slerp(self.variation_amount, initial_noise, target_noise)
elif initial_noise is not None:
# i.e. we specified particular variations
x_T = initial_noise
else:
set_seed(seed)
try:
x_T = self.get_noise(width, height)
except:
logger.error("An error occurred while getting initial noise")
print(traceback.format_exc())
# Pass on the seed in case a layer beneath us needs to generate noise on its own.
image = make_image(x_T, seed)
results.append([image, seed, attention_maps_images])
if image_callback is not None:
attention_maps_image = None if len(attention_maps_images) == 0 else attention_maps_images[-1]
image_callback(
image,
seed,
first_seed=first_seed,
attention_maps_image=attention_maps_image,
)
seed = self.new_seed()
# Free up memory from the last generation.
clear_cuda_cache = kwargs["clear_cuda_cache"] if "clear_cuda_cache" in kwargs else None
if clear_cuda_cache is not None:
clear_cuda_cache()
return results
def sample_to_image(self, samples) -> Image.Image:
"""
Given samples returned from a sampler, converts
it into a PIL Image
"""
with torch.inference_mode():
image = self.model.decode_latents(samples)
return self.model.numpy_to_pil(image)[0]
def repaste_and_color_correct(
self,
result: Image.Image,
init_image: Image.Image,
init_mask: Image.Image,
mask_blur_radius: int = 8,
) -> Image.Image:
if init_image is None or init_mask is None:
return result
# Get the original alpha channel of the mask if there is one.
# Otherwise it is some other black/white image format ('1', 'L' or 'RGB')
pil_init_mask = init_mask.getchannel("A") if init_mask.mode == "RGBA" else init_mask.convert("L")
pil_init_image = init_image.convert("RGBA") # Add an alpha channel if one doesn't exist
# Build an image with only visible pixels from source to use as reference for color-matching.
init_rgb_pixels = np.asarray(init_image.convert("RGB"), dtype=np.uint8)
init_a_pixels = np.asarray(pil_init_image.getchannel("A"), dtype=np.uint8)
init_mask_pixels = np.asarray(pil_init_mask, dtype=np.uint8)
# Get numpy version of result
np_image = np.asarray(result, dtype=np.uint8)
# Mask and calculate mean and standard deviation
mask_pixels = init_a_pixels * init_mask_pixels > 0
np_init_rgb_pixels_masked = init_rgb_pixels[mask_pixels, :]
np_image_masked = np_image[mask_pixels, :]
if np_init_rgb_pixels_masked.size > 0:
init_means = np_init_rgb_pixels_masked.mean(axis=0)
init_std = np_init_rgb_pixels_masked.std(axis=0)
gen_means = np_image_masked.mean(axis=0)
gen_std = np_image_masked.std(axis=0)
# Color correct
np_matched_result = np_image.copy()
np_matched_result[:, :, :] = (
(
(
(np_matched_result[:, :, :].astype(np.float32) - gen_means[None, None, :])
/ gen_std[None, None, :]
)
* init_std[None, None, :]
+ init_means[None, None, :]
)
.clip(0, 255)
.astype(np.uint8)
)
matched_result = Image.fromarray(np_matched_result, mode="RGB")
else:
matched_result = Image.fromarray(np_image, mode="RGB")
# Blur the mask out (into init image) by specified amount
if mask_blur_radius > 0:
nm = np.asarray(pil_init_mask, dtype=np.uint8)
nmd = cv2.erode(
nm,
kernel=np.ones((3, 3), dtype=np.uint8),
iterations=int(mask_blur_radius / 2),
)
pmd = Image.fromarray(nmd, mode="L")
blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(mask_blur_radius))
else:
blurred_init_mask = pil_init_mask
multiplied_blurred_init_mask = ImageChops.multiply(blurred_init_mask, self.pil_image.split()[-1])
# Paste original on color-corrected generation (using blurred mask)
matched_result.paste(init_image, (0, 0), mask=multiplied_blurred_init_mask)
return matched_result
@staticmethod
def sample_to_lowres_estimated_image(samples):
# origingally adapted from code by @erucipe and @keturn here:
# https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7
# these updated numbers for v1.5 are from @torridgristle
v1_5_latent_rgb_factors = torch.tensor(
[
# R G B
[0.3444, 0.1385, 0.0670], # L1
[0.1247, 0.4027, 0.1494], # L2
[-0.3192, 0.2513, 0.2103], # L3
[-0.1307, -0.1874, -0.7445], # L4
],
dtype=samples.dtype,
device=samples.device,
)
latent_image = samples[0].permute(1, 2, 0) @ v1_5_latent_rgb_factors
latents_ubyte = (
((latent_image + 1) / 2).clamp(0, 1).mul(0xFF).byte() # change scale from -1..1 to 0..1 # to 0..255
).cpu()
return Image.fromarray(latents_ubyte.numpy())
def generate_initial_noise(self, seed, width, height):
initial_noise = None
if self.variation_amount > 0 or len(self.with_variations) > 0:
# use fixed initial noise plus random noise per iteration
set_seed(seed)
initial_noise = self.get_noise(width, height)
for v_seed, v_weight in self.with_variations:
seed = v_seed
set_seed(seed)
next_noise = self.get_noise(width, height)
initial_noise = self.slerp(v_weight, initial_noise, next_noise)
if self.variation_amount > 0:
random.seed() # reset RNG to an actually random state, so we can get a random seed for variations
seed = random.randrange(0, np.iinfo(np.uint32).max)
return (seed, initial_noise)
def get_perlin_noise(self, width, height):
fixdevice = "cpu" if (self.model.device.type == "mps") else self.model.device
# limit noise to only the diffusion image channels, not the mask channels
input_channels = min(self.latent_channels, 4)
# round up to the nearest block of 8
temp_width = int((width + 7) / 8) * 8
temp_height = int((height + 7) / 8) * 8
noise = torch.stack(
[
rand_perlin_2d((temp_height, temp_width), (8, 8), device=self.model.device).to(fixdevice)
for _ in range(input_channels)
],
dim=0,
).to(self.model.device)
return noise[0:4, 0:height, 0:width]
def new_seed(self):
self.seed = random.randrange(0, np.iinfo(np.uint32).max)
return self.seed
def slerp(self, t, v0, v1, DOT_THRESHOLD=0.9995):
"""
Spherical linear interpolation
Args:
t (float/np.ndarray): Float value between 0.0 and 1.0
v0 (np.ndarray): Starting vector
v1 (np.ndarray): Final vector
DOT_THRESHOLD (float): Threshold for considering the two vectors as
colineal. Not recommended to alter this.
Returns:
v2 (np.ndarray): Interpolation vector between v0 and v1
"""
inputs_are_torch = False
if not isinstance(v0, np.ndarray):
inputs_are_torch = True
v0 = v0.detach().cpu().numpy()
if not isinstance(v1, np.ndarray):
inputs_are_torch = True
v1 = v1.detach().cpu().numpy()
dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
if np.abs(dot) > DOT_THRESHOLD:
v2 = (1 - t) * v0 + t * v1
else:
theta_0 = np.arccos(dot)
sin_theta_0 = np.sin(theta_0)
theta_t = theta_0 * t
sin_theta_t = np.sin(theta_t)
s0 = np.sin(theta_0 - theta_t) / sin_theta_0
s1 = sin_theta_t / sin_theta_0
v2 = s0 * v0 + s1 * v1
if inputs_are_torch:
v2 = torch.from_numpy(v2).to(self.model.device)
return v2
# this is a handy routine for debugging use. Given a generated sample,
# convert it into a PNG image and store it at the indicated path
def save_sample(self, sample, filepath):
image = self.sample_to_image(sample)
dirname = os.path.dirname(filepath) or "."
if not os.path.exists(dirname):
logger.info(f"creating directory {dirname}")
os.makedirs(dirname, exist_ok=True)
image.save(filepath, "PNG")
def torch_dtype(self) -> torch.dtype:
return torch.float16 if self.precision == "float16" else torch.float32
# returns a tensor filled with random numbers from a normal distribution
def get_noise(self, width, height):
device = self.model.device
# limit noise to only the diffusion image channels, not the mask channels
input_channels = min(self.latent_channels, 4)
x = torch.randn(
[
1,
input_channels,
height // self.downsampling_factor,
width // self.downsampling_factor,
],
dtype=self.torch_dtype(),
device=device,
)
if self.perlin > 0.0:
perlin_noise = self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor)
x = (1 - self.perlin) * x + self.perlin * perlin_noise
return x

View File

@ -1,92 +0,0 @@
"""
invokeai.backend.generator.img2img descends from .generator
"""
from typing import Optional
import torch
from accelerate.utils import set_seed
from diffusers import logging
from ..stable_diffusion import (
ConditioningData,
PostprocessingSettings,
StableDiffusionGeneratorPipeline,
)
from .base import Generator
class Img2Img(Generator):
def __init__(self, model, precision):
super().__init__(model, precision)
self.init_latent = None # by get_noise()
def get_make_image(
self,
sampler,
steps,
cfg_scale,
ddim_eta,
conditioning,
init_image,
strength,
step_callback=None,
threshold=0.0,
warmup=0.2,
perlin=0.0,
h_symmetry_time_pct=None,
v_symmetry_time_pct=None,
attention_maps_callback=None,
**kwargs,
):
"""
Returns a function returning an image derived from the prompt and the initial image
Return value depends on the seed at the time you call it.
"""
self.perlin = perlin
# noinspection PyTypeChecker
pipeline: StableDiffusionGeneratorPipeline = self.model
pipeline.scheduler = sampler
uc, c, extra_conditioning_info = conditioning
conditioning_data = ConditioningData(
uc,
c,
cfg_scale,
extra_conditioning_info,
postprocessing_settings=PostprocessingSettings(
threshold=threshold,
warmup=warmup,
h_symmetry_time_pct=h_symmetry_time_pct,
v_symmetry_time_pct=v_symmetry_time_pct,
),
).add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)
def make_image(x_T: torch.Tensor, seed: int):
# FIXME: use x_T for initial seeded noise
# We're not at the moment because the pipeline automatically resizes init_image if
# necessary, which the x_T input might not match.
# In the meantime, reset the seed prior to generating pipeline output so we at least get the same result.
logging.set_verbosity_error() # quench safety check warnings
pipeline_output = pipeline.img2img_from_embeddings(
init_image,
strength,
steps,
conditioning_data,
noise_func=self.get_noise_like,
callback=step_callback,
seed=seed,
)
if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None:
attention_maps_callback(pipeline_output.attention_map_saver)
return pipeline.numpy_to_pil(pipeline_output.images)[0]
return make_image
def get_noise_like(self, like: torch.Tensor):
device = like.device
x = torch.randn_like(like, device=device)
if self.perlin > 0.0:
shape = like.shape
x = (1 - self.perlin) * x + self.perlin * self.get_perlin_noise(shape[3], shape[2])
return x

View File

@ -1,379 +0,0 @@
"""
invokeai.backend.generator.inpaint descends from .generator
"""
from __future__ import annotations
import math
from typing import Tuple, Union, Optional
import cv2
import numpy as np
import torch
from PIL import Image, ImageChops, ImageFilter, ImageOps
from ..image_util import PatchMatch, debug_image
from ..stable_diffusion.diffusers_pipeline import (
ConditioningData,
StableDiffusionGeneratorPipeline,
image_resized_to_grid_as_tensor,
)
from .img2img import Img2Img
def infill_methods() -> list[str]:
methods = [
"tile",
"solid",
]
if PatchMatch.patchmatch_available():
methods.insert(0, "patchmatch")
return methods
class Inpaint(Img2Img):
def __init__(self, model, precision):
self.inpaint_height = 0
self.inpaint_width = 0
self.enable_image_debugging = False
self.init_latent = None
self.pil_image = None
self.pil_mask = None
self.mask_blur_radius = 0
self.infill_method = None
super().__init__(model, precision)
# Outpaint support code
def get_tile_images(self, image: np.ndarray, width=8, height=8):
_nrows, _ncols, depth = image.shape
_strides = image.strides
nrows, _m = divmod(_nrows, height)
ncols, _n = divmod(_ncols, width)
if _m != 0 or _n != 0:
return None
return np.lib.stride_tricks.as_strided(
np.ravel(image),
shape=(nrows, ncols, height, width, depth),
strides=(height * _strides[0], width * _strides[1], *_strides),
writeable=False,
)
def infill_patchmatch(self, im: Image.Image) -> Image.Image:
if im.mode != "RGBA":
return im
# Skip patchmatch if patchmatch isn't available
if not PatchMatch.patchmatch_available():
return im
# Patchmatch (note, we may want to expose patch_size? Increasing it significantly impacts performance though)
im_patched_np = PatchMatch.inpaint(im.convert("RGB"), ImageOps.invert(im.split()[-1]), patch_size=3)
im_patched = Image.fromarray(im_patched_np, mode="RGB")
return im_patched
def tile_fill_missing(self, im: Image.Image, tile_size: int = 16, seed: Optional[int] = None) -> Image.Image:
# Only fill if there's an alpha layer
if im.mode != "RGBA":
return im
a = np.asarray(im, dtype=np.uint8)
tile_size_tuple = (tile_size, tile_size)
# Get the image as tiles of a specified size
tiles = self.get_tile_images(a, *tile_size_tuple).copy()
# Get the mask as tiles
tiles_mask = tiles[:, :, :, :, 3]
# Find any mask tiles with any fully transparent pixels (we will be replacing these later)
tmask_shape = tiles_mask.shape
tiles_mask = tiles_mask.reshape(math.prod(tiles_mask.shape))
n, ny = (math.prod(tmask_shape[0:2])), math.prod(tmask_shape[2:])
tiles_mask = tiles_mask > 0
tiles_mask = tiles_mask.reshape((n, ny)).all(axis=1)
# Get RGB tiles in single array and filter by the mask
tshape = tiles.shape
tiles_all = tiles.reshape((math.prod(tiles.shape[0:2]), *tiles.shape[2:]))
filtered_tiles = tiles_all[tiles_mask]
if len(filtered_tiles) == 0:
return im
# Find all invalid tiles and replace with a random valid tile
replace_count = (tiles_mask == False).sum()
rng = np.random.default_rng(seed=seed)
tiles_all[np.logical_not(tiles_mask)] = filtered_tiles[
rng.choice(filtered_tiles.shape[0], replace_count), :, :, :
]
# Convert back to an image
tiles_all = tiles_all.reshape(tshape)
tiles_all = tiles_all.swapaxes(1, 2)
st = tiles_all.reshape(
(
math.prod(tiles_all.shape[0:2]),
math.prod(tiles_all.shape[2:4]),
tiles_all.shape[4],
)
)
si = Image.fromarray(st, mode="RGBA")
return si
def mask_edge(self, mask: Image.Image, edge_size: int, edge_blur: int) -> Image.Image:
npimg = np.asarray(mask, dtype=np.uint8)
# Detect any partially transparent regions
npgradient = np.uint8(255 * (1.0 - np.floor(np.abs(0.5 - np.float32(npimg) / 255.0) * 2.0)))
# Detect hard edges
npedge = cv2.Canny(npimg, threshold1=100, threshold2=200)
# Combine
npmask = npgradient + npedge
# Expand
npmask = cv2.dilate(npmask, np.ones((3, 3), np.uint8), iterations=int(edge_size / 2))
new_mask = Image.fromarray(npmask)
if edge_blur > 0:
new_mask = new_mask.filter(ImageFilter.BoxBlur(edge_blur))
return ImageOps.invert(new_mask)
def seam_paint(
self,
im: Image.Image,
seam_size: int,
seam_blur: int,
seed,
steps,
cfg_scale,
ddim_eta,
conditioning,
strength,
noise,
infill_method,
step_callback,
) -> Image.Image:
hard_mask = self.pil_image.split()[-1].copy()
mask = self.mask_edge(hard_mask, seam_size, seam_blur)
make_image = self.get_make_image(
steps,
cfg_scale,
ddim_eta,
conditioning,
init_image=im.copy().convert("RGBA"),
mask_image=mask,
strength=strength,
mask_blur_radius=0,
seam_size=0,
step_callback=step_callback,
inpaint_width=im.width,
inpaint_height=im.height,
infill_method=infill_method,
)
seam_noise = self.get_noise(im.width, im.height)
result = make_image(seam_noise, seed=None)
return result
@torch.no_grad()
def get_make_image(
self,
steps,
cfg_scale,
ddim_eta,
conditioning,
init_image: Union[Image.Image, torch.FloatTensor],
mask_image: Union[Image.Image, torch.FloatTensor],
strength: float,
mask_blur_radius: int = 8,
# Seam settings - when 0, doesn't fill seam
seam_size: int = 96,
seam_blur: int = 16,
seam_strength: float = 0.7,
seam_steps: int = 30,
tile_size: int = 32,
step_callback=None,
inpaint_replace=False,
enable_image_debugging=False,
infill_method=None,
inpaint_width=None,
inpaint_height=None,
inpaint_fill: Tuple[int, int, int, int] = (0x7F, 0x7F, 0x7F, 0xFF),
attention_maps_callback=None,
**kwargs,
):
"""
Returns a function returning an image derived from the prompt and
the initial image + mask. Return value depends on the seed at
the time you call it. kwargs are 'init_latent' and 'strength'
"""
self.enable_image_debugging = enable_image_debugging
infill_method = infill_method or infill_methods()[0]
self.infill_method = infill_method
self.inpaint_width = inpaint_width
self.inpaint_height = inpaint_height
if isinstance(init_image, Image.Image):
self.pil_image = init_image.copy()
# Do infill
if infill_method == "patchmatch" and PatchMatch.patchmatch_available():
init_filled = self.infill_patchmatch(self.pil_image.copy())
elif infill_method == "tile":
init_filled = self.tile_fill_missing(self.pil_image.copy(), seed=self.seed, tile_size=tile_size)
elif infill_method == "solid":
solid_bg = Image.new("RGBA", init_image.size, inpaint_fill)
init_filled = Image.alpha_composite(solid_bg, init_image)
else:
raise ValueError(f"Non-supported infill type {infill_method}", infill_method)
init_filled.paste(init_image, (0, 0), init_image.split()[-1])
# Resize if requested for inpainting
if inpaint_width and inpaint_height:
init_filled = init_filled.resize((inpaint_width, inpaint_height))
debug_image(init_filled, "init_filled", debug_status=self.enable_image_debugging)
# Create init tensor
init_image = image_resized_to_grid_as_tensor(init_filled.convert("RGB"))
if isinstance(mask_image, Image.Image):
self.pil_mask = mask_image.copy()
debug_image(
mask_image,
"mask_image BEFORE multiply with pil_image",
debug_status=self.enable_image_debugging,
)
init_alpha = self.pil_image.getchannel("A")
if mask_image.mode != "L":
# FIXME: why do we get passed an RGB image here? We can only use single-channel.
mask_image = mask_image.convert("L")
mask_image = ImageChops.multiply(mask_image, init_alpha)
self.pil_mask = mask_image
# Resize if requested for inpainting
if inpaint_width and inpaint_height:
mask_image = mask_image.resize((inpaint_width, inpaint_height))
debug_image(
mask_image,
"mask_image AFTER multiply with pil_image",
debug_status=self.enable_image_debugging,
)
mask: torch.FloatTensor = image_resized_to_grid_as_tensor(mask_image, normalize=False)
else:
mask: torch.FloatTensor = mask_image
self.mask_blur_radius = mask_blur_radius
# noinspection PyTypeChecker
pipeline: StableDiffusionGeneratorPipeline = self.model
# todo: support cross-attention control
uc, c, _ = conditioning
conditioning_data = ConditioningData(uc, c, cfg_scale).add_scheduler_args_if_applicable(
pipeline.scheduler, eta=ddim_eta
)
def make_image(x_T: torch.Tensor, seed: int):
pipeline_output = pipeline.inpaint_from_embeddings(
init_image=init_image,
mask=1 - mask, # expects white means "paint here."
strength=strength,
num_inference_steps=steps,
conditioning_data=conditioning_data,
noise_func=self.get_noise_like,
callback=step_callback,
seed=seed,
)
if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None:
attention_maps_callback(pipeline_output.attention_map_saver)
result = self.postprocess_size_and_mask(pipeline.numpy_to_pil(pipeline_output.images)[0])
# Seam paint if this is our first pass (seam_size set to 0 during seam painting)
if seam_size > 0:
old_image = self.pil_image or init_image
old_mask = self.pil_mask or mask_image
result = self.seam_paint(
result,
seam_size,
seam_blur,
seed,
seam_steps,
cfg_scale,
ddim_eta,
conditioning,
seam_strength,
x_T,
infill_method,
step_callback,
)
# Restore original settings
self.get_make_image(
steps,
cfg_scale,
ddim_eta,
conditioning,
old_image,
old_mask,
strength,
mask_blur_radius,
seam_size,
seam_blur,
seam_strength,
seam_steps,
tile_size,
step_callback,
inpaint_replace,
enable_image_debugging,
inpaint_width=inpaint_width,
inpaint_height=inpaint_height,
infill_method=infill_method,
**kwargs,
)
return result
return make_image
def sample_to_image(self, samples) -> Image.Image:
gen_result = super().sample_to_image(samples).convert("RGB")
return self.postprocess_size_and_mask(gen_result)
def postprocess_size_and_mask(self, gen_result: Image.Image) -> Image.Image:
debug_image(gen_result, "gen_result", debug_status=self.enable_image_debugging)
# Resize if necessary
if self.inpaint_width and self.inpaint_height:
gen_result = gen_result.resize(self.pil_image.size)
if self.pil_image is None or self.pil_mask is None:
return gen_result
corrected_result = self.repaste_and_color_correct(
gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius
)
debug_image(
corrected_result,
"corrected_result",
debug_status=self.enable_image_debugging,
)
return corrected_result

View File

@ -12,16 +12,17 @@ def check_invokeai_root(config: InvokeAIAppConfig):
assert config.model_conf_path.exists(), f"{config.model_conf_path} not found"
assert config.db_path.parent.exists(), f"{config.db_path.parent} not found"
assert config.models_path.exists(), f"{config.models_path} not found"
for model in [
"CLIP-ViT-bigG-14-laion2B-39B-b160k",
"bert-base-uncased",
"clip-vit-large-patch14",
"sd-vae-ft-mse",
"stable-diffusion-2-clip",
"stable-diffusion-safety-checker",
]:
path = config.models_path / f"core/convert/{model}"
assert path.exists(), f"{path} is missing"
if not config.ignore_missing_core_models:
for model in [
"CLIP-ViT-bigG-14-laion2B-39B-b160k",
"bert-base-uncased",
"clip-vit-large-patch14",
"sd-vae-ft-mse",
"stable-diffusion-2-clip",
"stable-diffusion-safety-checker",
]:
path = config.models_path / f"core/convert/{model}"
assert path.exists(), f"{path} is missing"
except Exception as e:
print()
print(f"An exception has occurred: {str(e)}")
@ -32,5 +33,10 @@ def check_invokeai_root(config: InvokeAIAppConfig):
print(
'** From the command line, activate the virtual environment and run "invokeai-configure --yes --skip-sd-weights" **'
)
print(
'** (To skip this check completely, add "--ignore_missing_core_models" to your CLI args. Not installing '
"these core models will prevent the loading of some or all .safetensors and .ckpt files. However, you can "
"always come back and install these core models in the future.)"
)
input("Press any key to continue...")
sys.exit(0)

View File

@ -10,15 +10,17 @@ import sys
import argparse
import io
import os
import psutil
import shutil
import textwrap
import torch
import traceback
import yaml
import warnings
from argparse import Namespace
from enum import Enum
from pathlib import Path
from shutil import get_terminal_size
from typing import get_type_hints
from urllib import request
import npyscreen
@ -44,6 +46,8 @@ from invokeai.app.services.config import (
)
from invokeai.backend.util.logging import InvokeAILogger
from invokeai.frontend.install.model_install import addModelsForm, process_and_execute
# TO DO - Move all the frontend code into invokeai.frontend.install
from invokeai.frontend.install.widgets import (
SingleSelectColumns,
CenteredButtonPress,
@ -53,6 +57,7 @@ from invokeai.frontend.install.widgets import (
CyclingForm,
MIN_COLS,
MIN_LINES,
WindowTooSmallException,
)
from invokeai.backend.install.legacy_arg_parsing import legacy_parser
from invokeai.backend.install.model_install_backend import (
@ -61,6 +66,7 @@ from invokeai.backend.install.model_install_backend import (
ModelInstall,
)
from invokeai.backend.model_management.model_probe import ModelType, BaseModelType
from pydantic.error_wrappers import ValidationError
warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()
@ -76,6 +82,13 @@ Default_config_file = config.model_conf_path
SD_Configs = config.legacy_conf_path
PRECISION_CHOICES = ["auto", "float16", "float32"]
GB = 1073741824 # GB in bytes
HAS_CUDA = torch.cuda.is_available()
_, MAX_VRAM = torch.cuda.mem_get_info() if HAS_CUDA else (0, 0)
MAX_VRAM /= GB
MAX_RAM = psutil.virtual_memory().total / GB
INIT_FILE_PREAMBLE = """# InvokeAI initialization file
# This is the InvokeAI initialization file, which contains command-line default values.
@ -86,6 +99,12 @@ INIT_FILE_PREAMBLE = """# InvokeAI initialization file
logger = InvokeAILogger.getLogger()
class DummyWidgetValue(Enum):
zero = 0
true = True
false = False
# --------------------------------------------
def postscript(errors: None):
if not any(errors):
@ -181,7 +200,7 @@ def download_with_progress_bar(model_url: str, model_dest: str, label: str = "th
def download_conversion_models():
target_dir = config.root_path / "models/core/convert"
target_dir = config.models_path / "core/convert"
kwargs = dict() # for future use
try:
logger.info("Downloading core tokenizers and text encoders")
@ -376,15 +395,47 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
max_width=80,
scroll_exit=True,
)
self.max_cache_size = self.add_widget_intelligent(
IntTitleSlider,
name="Size of the RAM cache used for fast model switching (GB)",
value=old_opts.max_cache_size,
out_of=20,
lowest=3,
begin_entry_at=6,
self.nextrely += 1
self.add_widget_intelligent(
npyscreen.TitleFixedText,
name="RAM cache size (GB). Make this at least large enough to hold a single full model.",
begin_entry_at=0,
editable=False,
color="CONTROL",
scroll_exit=True,
)
self.nextrely -= 1
self.max_cache_size = self.add_widget_intelligent(
npyscreen.Slider,
value=clip(old_opts.max_cache_size, range=(3.0, MAX_RAM), step=0.5),
out_of=round(MAX_RAM),
lowest=0.0,
step=0.5,
relx=8,
scroll_exit=True,
)
if HAS_CUDA:
self.nextrely += 1
self.add_widget_intelligent(
npyscreen.TitleFixedText,
name="VRAM cache size (GB). Reserving a small amount of VRAM will modestly speed up the start of image generation.",
begin_entry_at=0,
editable=False,
color="CONTROL",
scroll_exit=True,
)
self.nextrely -= 1
self.max_vram_cache_size = self.add_widget_intelligent(
npyscreen.Slider,
value=clip(old_opts.max_vram_cache_size, range=(0, MAX_VRAM), step=0.25),
out_of=round(MAX_VRAM * 2) / 2,
lowest=0.0,
relx=8,
step=0.25,
scroll_exit=True,
)
else:
self.max_vram_cache_size = DummyWidgetValue.zero
self.nextrely += 1
self.outdir = self.add_widget_intelligent(
FileBox,
@ -401,7 +452,7 @@ Use cursor arrows to make a checkbox selection, and space to toggle.
self.autoimport_dirs = {}
self.autoimport_dirs["autoimport_dir"] = self.add_widget_intelligent(
FileBox,
name=f"Folder to recursively scan for new checkpoints, ControlNets, LoRAs and TI models",
name="Folder to recursively scan for new checkpoints, ControlNets, LoRAs and TI models",
value=str(config.root_path / config.autoimport_dir),
select_dir=True,
must_exist=False,
@ -476,6 +527,7 @@ https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENS
"outdir",
"free_gpu_mem",
"max_cache_size",
"max_vram_cache_size",
"xformers_enabled",
"always_use_cpu",
]:
@ -553,6 +605,16 @@ def default_user_selections(program_opts: Namespace) -> InstallSelections:
)
# -------------------------------------
def clip(value: float, range: tuple[float, float], step: float) -> float:
minimum, maximum = range
if value < minimum:
value = minimum
if value > maximum:
value = maximum
return round(value / step) * step
# -------------------------------------
def initialize_rootdir(root: Path, yes_to_all: bool = False):
logger.info("Initializing InvokeAI runtime directory")
@ -592,13 +654,13 @@ def maybe_create_models_yaml(root: Path):
# -------------------------------------
def run_console_ui(program_opts: Namespace, initfile: Path = None) -> (Namespace, Namespace):
# parse_args() will read from init file if present
invokeai_opts = default_startup_options(initfile)
invokeai_opts.root = program_opts.root
# The third argument is needed in the Windows 11 environment to
# launch a console window running this program.
set_min_terminal_size(MIN_COLS, MIN_LINES)
if not set_min_terminal_size(MIN_COLS, MIN_LINES):
raise WindowTooSmallException(
"Could not increase terminal size. Try running again with a larger window or smaller font size."
)
# the install-models application spawns a subprocess to install
# models, and will crash unless this is set before running.
@ -654,10 +716,13 @@ def migrate_init_file(legacy_format: Path):
old = legacy_parser.parse_args([f"@{str(legacy_format)}"])
new = InvokeAIAppConfig.get_config()
fields = list(get_type_hints(InvokeAIAppConfig).keys())
fields = [x for x, y in InvokeAIAppConfig.__fields__.items() if y.field_info.extra.get("category") != "DEPRECATED"]
for attr in fields:
if hasattr(old, attr):
setattr(new, attr, getattr(old, attr))
try:
setattr(new, attr, getattr(old, attr))
except ValidationError as e:
print(f"* Ignoring incompatible value for field {attr}:\n {str(e)}")
# a few places where the field names have changed and we have to
# manually add in the new names/values
@ -777,6 +842,7 @@ def main():
models_to_download = default_user_selections(opt)
new_init_file = config.root_path / "invokeai.yaml"
if opt.yes_to_all:
write_default_options(opt, new_init_file)
init_options = Namespace(precision="float32" if opt.full_precision else "float16")
@ -802,6 +868,8 @@ def main():
postscript(errors=errors)
if not opt.yes_to_all:
input("Press any key to continue...")
except WindowTooSmallException as e:
logger.error(str(e))
except KeyboardInterrupt:
print("\nGoodbye! Come back soon.")

View File

@ -591,7 +591,6 @@ script, which will perform a full upgrade in place.""",
# TODO: revisit - don't rely on invokeai.yaml to exist yet!
dest_is_setup = (dest_root / "models/core").exists() and (dest_root / "databases").exists()
if not dest_is_setup:
import invokeai.frontend.install.invokeai_configure
from invokeai.backend.install.invokeai_configure import initialize_rootdir
initialize_rootdir(dest_root, True)

View File

@ -7,11 +7,13 @@ import warnings
from dataclasses import dataclass, field
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import List, Dict, Callable, Union, Set
from typing import Optional, List, Dict, Callable, Union, Set
import requests
from diffusers import DiffusionPipeline
from diffusers import logging as dlogging
import onnx
import torch
from huggingface_hub import hf_hub_url, HfFolder, HfApi
from omegaconf import OmegaConf
from tqdm import tqdm
@ -22,6 +24,7 @@ from invokeai.app.services.config import InvokeAIAppConfig
from invokeai.backend.model_management import ModelManager, ModelType, BaseModelType, ModelVariantType, AddModelResult
from invokeai.backend.model_management.model_probe import ModelProbe, SchedulerPredictionType, ModelProbeInfo
from invokeai.backend.util import download_with_resume
from invokeai.backend.util.devices import torch_dtype, choose_torch_device
from ..util.logging import InvokeAILogger
warnings.filterwarnings("ignore")
@ -86,8 +89,8 @@ class ModelLoadInfo:
name: str
model_type: ModelType
base_type: BaseModelType
path: Path = None
repo_id: str = None
path: Optional[Path] = None
repo_id: Optional[str] = None
description: str = ""
installed: bool = False
recommended: bool = False
@ -98,9 +101,9 @@ class ModelInstall(object):
def __init__(
self,
config: InvokeAIAppConfig,
prediction_type_helper: Callable[[Path], SchedulerPredictionType] = None,
model_manager: ModelManager = None,
access_token: str = None,
prediction_type_helper: Optional[Callable[[Path], SchedulerPredictionType]] = None,
model_manager: Optional[ModelManager] = None,
access_token: Optional[str] = None,
):
self.config = config
self.mgr = model_manager or ModelManager(config.model_conf_path)
@ -128,7 +131,9 @@ class ModelInstall(object):
model_dict[key] = ModelLoadInfo(**value)
# supplement with entries in models.yaml
installed_models = self.mgr.list_models()
installed_models = [x for x in self.mgr.list_models()]
# suppresses autoloaded models
# installed_models = [x for x in self.mgr.list_models() if not self._is_autoloaded(x)]
for md in installed_models:
base = md["base_model"]
@ -147,6 +152,17 @@ class ModelInstall(object):
)
return {x: model_dict[x] for x in sorted(model_dict.keys(), key=lambda y: model_dict[y].name.lower())}
def _is_autoloaded(self, model_info: dict) -> bool:
path = model_info.get("path")
if not path:
return False
for autodir in ["autoimport_dir", "lora_dir", "embedding_dir", "controlnet_dir"]:
if autodir_path := getattr(self.config, autodir):
autodir_path = self.config.root_path / autodir_path
if Path(path).is_relative_to(autodir_path):
return True
return False
def list_models(self, model_type):
installed = self.mgr.list_models(model_type=model_type)
print(f"Installed models of type `{model_type}`:")
@ -273,6 +289,7 @@ class ModelInstall(object):
logger.error(f"Unable to download {url}. Skipping.")
info = ModelProbe().heuristic_probe(location)
dest = self.config.models_path / info.base_type.value / info.model_type.value / location.name
dest.parent.mkdir(parents=True, exist_ok=True)
models_path = shutil.move(location, dest)
# staged version will be garbage-collected at this time
@ -290,6 +307,8 @@ class ModelInstall(object):
staging = Path(staging)
if "model_index.json" in files:
location = self._download_hf_pipeline(repo_id, staging) # pipeline
elif "unet/model.onnx" in files:
location = self._download_hf_model(repo_id, files, staging)
else:
for suffix in ["safetensors", "bin"]:
if f"pytorch_lora_weights.{suffix}" in files:
@ -346,7 +365,7 @@ class ModelInstall(object):
if key in self.datasets:
description = self.datasets[key].get("description") or description
rel_path = self.relative_to_root(path)
rel_path = self.relative_to_root(path, self.config.models_path)
attributes = dict(
path=str(rel_path),
@ -354,7 +373,7 @@ class ModelInstall(object):
model_format=info.format,
)
legacy_conf = None
if info.model_type == ModelType.Main:
if info.model_type == ModelType.Main or info.model_type == ModelType.ONNX:
attributes.update(
dict(
variant=info.variant_type,
@ -386,8 +405,8 @@ class ModelInstall(object):
attributes.update(dict(config=str(legacy_conf)))
return attributes
def relative_to_root(self, path: Path) -> Path:
root = self.config.root_path
def relative_to_root(self, path: Path, root: Optional[Path] = None) -> Path:
root = root or self.config.root_path
if path.is_relative_to(root):
return path.relative_to(root)
else:
@ -399,15 +418,25 @@ class ModelInstall(object):
does a save_pretrained() to the indicated staging area.
"""
_, name = repo_id.split("/")
revisions = ["fp16", "main"] if self.config.precision == "float16" else ["main"]
precision = torch_dtype(choose_torch_device())
variants = ["fp16", None] if precision == torch.float16 else [None, "fp16"]
model = None
for revision in revisions:
for variant in variants:
try:
model = DiffusionPipeline.from_pretrained(repo_id, revision=revision, safety_checker=None)
except: # most errors are due to fp16 not being present. Fix this to catch other errors
pass
model = DiffusionPipeline.from_pretrained(
repo_id,
variant=variant,
torch_dtype=precision,
safety_checker=None,
)
except Exception as e: # most errors are due to fp16 not being present. Fix this to catch other errors
if "fp16" not in str(e):
print(e)
if model:
break
if not model:
logger.error(f"Diffusers model {repo_id} could not be downloaded. Skipping.")
return None
@ -419,8 +448,13 @@ class ModelInstall(object):
location = staging / name
paths = list()
for filename in files:
filePath = Path(filename)
p = hf_download_with_resume(
repo_id, model_dir=location, model_name=filename, access_token=self.access_token
repo_id,
model_dir=location / filePath.parent,
model_name=filePath.name,
access_token=self.access_token,
subfolder=filePath.parent,
)
if p:
paths.append(p)
@ -468,11 +502,12 @@ def hf_download_with_resume(
model_name: str,
model_dest: Path = None,
access_token: str = None,
subfolder: str = None,
) -> Path:
model_dest = model_dest or Path(os.path.join(model_dir, model_name))
os.makedirs(model_dir, exist_ok=True)
url = hf_hub_url(repo_id, model_name)
url = hf_hub_url(repo_id, model_name, subfolder=subfolder)
header = {"Authorization": f"Bearer {access_token}"} if access_token else {}
open_mode = "wb"

View File

@ -3,6 +3,7 @@ Initialization file for invokeai.backend.model_management
"""
from .model_manager import ModelManager, ModelInfo, AddModelResult, SchedulerPredictionType
from .model_cache import ModelCache
from .lora import ModelPatcher, ONNXModelPatcher
from .models import (
BaseModelType,
ModelType,
@ -12,3 +13,4 @@ from .models import (
DuplicateModelException,
)
from .model_merge import ModelMerger, MergeInterpolationMethod
from .lora import ModelPatcher

View File

@ -63,7 +63,7 @@ from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionS
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from invokeai.backend.util.logging import InvokeAILogger
from invokeai.app.services.config import InvokeAIAppConfig, MODEL_CORE
from invokeai.app.services.config import InvokeAIAppConfig
from picklescan.scanner import scan_file_path
from .models import BaseModelType, ModelVariantType
@ -81,7 +81,7 @@ if is_accelerate_available():
from accelerate.utils import set_module_tensor_to_device
logger = InvokeAILogger.getLogger(__name__)
CONVERT_MODEL_ROOT = InvokeAIAppConfig.get_config().root_path / MODEL_CORE / "convert"
CONVERT_MODEL_ROOT = InvokeAIAppConfig.get_config().models_path / "core/convert"
def shave_segments(path, n_shave_prefix_segments=1):
@ -1070,7 +1070,7 @@ def convert_controlnet_checkpoint(
extract_ema,
use_linear_projection=None,
cross_attention_dim=None,
precision: torch.dtype = torch.float32,
precision: Optional[torch.dtype] = None,
):
ctrlnet_config = create_unet_diffusers_config(original_config, image_size=image_size, controlnet=True)
ctrlnet_config["upcast_attention"] = upcast_attention
@ -1111,7 +1111,6 @@ def convert_controlnet_checkpoint(
return controlnet.to(precision)
# TO DO - PASS PRECISION
def download_from_original_stable_diffusion_ckpt(
checkpoint_path: str,
model_version: BaseModelType,
@ -1121,7 +1120,7 @@ def download_from_original_stable_diffusion_ckpt(
prediction_type: str = None,
model_type: str = None,
extract_ema: bool = False,
precision: torch.dtype = torch.float32,
precision: Optional[torch.dtype] = None,
scheduler_type: str = "pndm",
num_in_channels: Optional[int] = None,
upcast_attention: Optional[bool] = None,
@ -1194,6 +1193,8 @@ def download_from_original_stable_diffusion_ckpt(
[CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer)
to use. If this parameter is `None`, the function will load a new instance of [CLIPTokenizer] by itself, if
needed.
precision (`torch.dtype`, *optional*, defauts to `None`):
If not provided the precision will be set to the precision of the original file.
return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file.
"""
@ -1252,6 +1253,10 @@ def download_from_original_stable_diffusion_ckpt(
logger.debug(f"model_type = {model_type}; original_config_file = {original_config_file}")
precision_probing_key = "model.diffusion_model.input_blocks.0.0.bias"
logger.debug(f"original checkpoint precision == {checkpoint[precision_probing_key].dtype}")
precision = precision or checkpoint[precision_probing_key].dtype
if original_config_file is None:
key_name_v2_1 = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight"
key_name_sd_xl_base = "conditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias"
@ -1279,9 +1284,12 @@ def download_from_original_stable_diffusion_ckpt(
original_config_file = BytesIO(requests.get(config_url).content)
original_config = OmegaConf.load(original_config_file)
if original_config["model"]["params"].get("use_ema") is not None:
extract_ema = original_config["model"]["params"]["use_ema"]
if (
model_version == BaseModelType.StableDiffusion2
and original_config["model"]["params"]["parameterization"] == "v"
and original_config["model"]["params"].get("parameterization") == "v"
):
prediction_type = "v_prediction"
upcast_attention = True
@ -1447,7 +1455,7 @@ def download_from_original_stable_diffusion_ckpt(
if controlnet:
pipe = pipeline_class(
vae=vae.to(precision),
text_encoder=text_model,
text_encoder=text_model.to(precision),
tokenizer=tokenizer,
unet=unet.to(precision),
scheduler=scheduler,
@ -1459,7 +1467,7 @@ def download_from_original_stable_diffusion_ckpt(
else:
pipe = pipeline_class(
vae=vae.to(precision),
text_encoder=text_model,
text_encoder=text_model.to(precision),
tokenizer=tokenizer,
unet=unet.to(precision),
scheduler=scheduler,
@ -1484,8 +1492,8 @@ def download_from_original_stable_diffusion_ckpt(
image_noising_scheduler=image_noising_scheduler,
# regular denoising components
tokenizer=tokenizer,
text_encoder=text_model,
unet=unet,
text_encoder=text_model.to(precision),
unet=unet.to(precision),
scheduler=scheduler,
# vae
vae=vae,
@ -1560,7 +1568,7 @@ def download_from_original_stable_diffusion_ckpt(
if controlnet:
pipe = pipeline_class(
vae=vae.to(precision),
text_encoder=text_model,
text_encoder=text_model.to(precision),
tokenizer=tokenizer,
unet=unet.to(precision),
controlnet=controlnet,
@ -1571,7 +1579,7 @@ def download_from_original_stable_diffusion_ckpt(
else:
pipe = pipeline_class(
vae=vae.to(precision),
text_encoder=text_model,
text_encoder=text_model.to(precision),
tokenizer=tokenizer,
unet=unet.to(precision),
scheduler=scheduler,
@ -1594,9 +1602,9 @@ def download_from_original_stable_diffusion_ckpt(
pipe = StableDiffusionXLPipeline(
vae=vae.to(precision),
text_encoder=text_encoder,
text_encoder=text_encoder.to(precision),
tokenizer=tokenizer,
text_encoder_2=text_encoder_2,
text_encoder_2=text_encoder_2.to(precision),
tokenizer_2=tokenizer_2,
unet=unet.to(precision),
scheduler=scheduler,
@ -1639,7 +1647,7 @@ def download_controlnet_from_original_ckpt(
original_config_file: str,
image_size: int = 512,
extract_ema: bool = False,
precision: torch.dtype = torch.float32,
precision: Optional[torch.dtype] = None,
num_in_channels: Optional[int] = None,
upcast_attention: Optional[bool] = None,
device: str = None,
@ -1680,6 +1688,12 @@ def download_controlnet_from_original_ckpt(
while "state_dict" in checkpoint:
checkpoint = checkpoint["state_dict"]
# use original precision
precision_probing_key = "input_blocks.0.0.bias"
ckpt_precision = checkpoint[precision_probing_key].dtype
logger.debug(f"original controlnet precision = {ckpt_precision}")
precision = precision or ckpt_precision
original_config = OmegaConf.load(original_config_file)
if num_in_channels is not None:
@ -1699,7 +1713,7 @@ def download_controlnet_from_original_ckpt(
cross_attention_dim=cross_attention_dim,
)
return controlnet
return controlnet.to(precision)
def convert_ldm_vae_to_diffusers(checkpoint, vae_config: DictConfig, image_size: int) -> AutoencoderKL:

View File

@ -6,427 +6,20 @@ from typing import Optional, Dict, Tuple, Any, Union, List
from pathlib import Path
import torch
from safetensors.torch import load_file
from torch.utils.hooks import RemovableHandle
from diffusers.models import UNet2DConditionModel
from transformers import CLIPTextModel
from onnx import numpy_helper
from onnxruntime import OrtValue
import numpy as np
from compel.embeddings_provider import BaseTextualInversionManager
from diffusers.models import UNet2DConditionModel
from safetensors.torch import load_file
from transformers import CLIPTextModel, CLIPTokenizer
class LoRALayerBase:
# rank: Optional[int]
# alpha: Optional[float]
# bias: Optional[torch.Tensor]
# layer_key: str
# @property
# def scale(self):
# return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
def __init__(
self,
layer_key: str,
values: dict,
):
if "alpha" in values:
self.alpha = values["alpha"].item()
else:
self.alpha = None
if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
self.bias = torch.sparse_coo_tensor(
values["bias_indices"],
values["bias_values"],
tuple(values["bias_size"]),
)
else:
self.bias = None
self.rank = None # set in layer implementation
self.layer_key = layer_key
def forward(
self,
module: torch.nn.Module,
input_h: Any, # for real looks like Tuple[torch.nn.Tensor] but not sure
multiplier: float,
):
if type(module) == torch.nn.Conv2d:
op = torch.nn.functional.conv2d
extra_args = dict(
stride=module.stride,
padding=module.padding,
dilation=module.dilation,
groups=module.groups,
)
else:
op = torch.nn.functional.linear
extra_args = {}
weight = self.get_weight()
bias = self.bias if self.bias is not None else 0
scale = self.alpha / self.rank if (self.alpha and self.rank) else 1.0
return (
op(
*input_h,
(weight + bias).view(module.weight.shape),
None,
**extra_args,
)
* multiplier
* scale
)
def get_weight(self):
raise NotImplementedError()
def calc_size(self) -> int:
model_size = 0
for val in [self.bias]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
if self.bias is not None:
self.bias = self.bias.to(device=device, dtype=dtype)
# TODO: find and debug lora/locon with bias
class LoRALayer(LoRALayerBase):
# up: torch.Tensor
# mid: Optional[torch.Tensor]
# down: torch.Tensor
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
self.up = values["lora_up.weight"]
self.down = values["lora_down.weight"]
if "lora_mid.weight" in values:
self.mid = values["lora_mid.weight"]
else:
self.mid = None
self.rank = self.down.shape[0]
def get_weight(self):
if self.mid is not None:
up = self.up.reshape(self.up.shape[0], self.up.shape[1])
down = self.down.reshape(self.down.shape[0], self.down.shape[1])
weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
else:
weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
return weight
def calc_size(self) -> int:
model_size = super().calc_size()
for val in [self.up, self.mid, self.down]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
self.up = self.up.to(device=device, dtype=dtype)
self.down = self.down.to(device=device, dtype=dtype)
if self.mid is not None:
self.mid = self.mid.to(device=device, dtype=dtype)
class LoHALayer(LoRALayerBase):
# w1_a: torch.Tensor
# w1_b: torch.Tensor
# w2_a: torch.Tensor
# w2_b: torch.Tensor
# t1: Optional[torch.Tensor] = None
# t2: Optional[torch.Tensor] = None
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
self.w1_a = values["hada_w1_a"]
self.w1_b = values["hada_w1_b"]
self.w2_a = values["hada_w2_a"]
self.w2_b = values["hada_w2_b"]
if "hada_t1" in values:
self.t1 = values["hada_t1"]
else:
self.t1 = None
if "hada_t2" in values:
self.t2 = values["hada_t2"]
else:
self.t2 = None
self.rank = self.w1_b.shape[0]
def get_weight(self):
if self.t1 is None:
weight = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
else:
rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
weight = rebuild1 * rebuild2
return weight
def calc_size(self) -> int:
model_size = super().calc_size()
for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
self.w1_a = self.w1_a.to(device=device, dtype=dtype)
self.w1_b = self.w1_b.to(device=device, dtype=dtype)
if self.t1 is not None:
self.t1 = self.t1.to(device=device, dtype=dtype)
self.w2_a = self.w2_a.to(device=device, dtype=dtype)
self.w2_b = self.w2_b.to(device=device, dtype=dtype)
if self.t2 is not None:
self.t2 = self.t2.to(device=device, dtype=dtype)
class LoKRLayer(LoRALayerBase):
# w1: Optional[torch.Tensor] = None
# w1_a: Optional[torch.Tensor] = None
# w1_b: Optional[torch.Tensor] = None
# w2: Optional[torch.Tensor] = None
# w2_a: Optional[torch.Tensor] = None
# w2_b: Optional[torch.Tensor] = None
# t2: Optional[torch.Tensor] = None
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
if "lokr_w1" in values:
self.w1 = values["lokr_w1"]
self.w1_a = None
self.w1_b = None
else:
self.w1 = None
self.w1_a = values["lokr_w1_a"]
self.w1_b = values["lokr_w1_b"]
if "lokr_w2" in values:
self.w2 = values["lokr_w2"]
self.w2_a = None
self.w2_b = None
else:
self.w2 = None
self.w2_a = values["lokr_w2_a"]
self.w2_b = values["lokr_w2_b"]
if "lokr_t2" in values:
self.t2 = values["lokr_t2"]
else:
self.t2 = None
if "lokr_w1_b" in values:
self.rank = values["lokr_w1_b"].shape[0]
elif "lokr_w2_b" in values:
self.rank = values["lokr_w2_b"].shape[0]
else:
self.rank = None # unscaled
def get_weight(self):
w1 = self.w1
if w1 is None:
w1 = self.w1_a @ self.w1_b
w2 = self.w2
if w2 is None:
if self.t2 is None:
w2 = self.w2_a @ self.w2_b
else:
w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
if len(w2.shape) == 4:
w1 = w1.unsqueeze(2).unsqueeze(2)
w2 = w2.contiguous()
weight = torch.kron(w1, w2)
return weight
def calc_size(self) -> int:
model_size = super().calc_size()
for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
if self.w1 is not None:
self.w1 = self.w1.to(device=device, dtype=dtype)
else:
self.w1_a = self.w1_a.to(device=device, dtype=dtype)
self.w1_b = self.w1_b.to(device=device, dtype=dtype)
if self.w2 is not None:
self.w2 = self.w2.to(device=device, dtype=dtype)
else:
self.w2_a = self.w2_a.to(device=device, dtype=dtype)
self.w2_b = self.w2_b.to(device=device, dtype=dtype)
if self.t2 is not None:
self.t2 = self.t2.to(device=device, dtype=dtype)
class LoRAModel: # (torch.nn.Module):
_name: str
layers: Dict[str, LoRALayer]
_device: torch.device
_dtype: torch.dtype
def __init__(
self,
name: str,
layers: Dict[str, LoRALayer],
device: torch.device,
dtype: torch.dtype,
):
self._name = name
self._device = device or torch.cpu
self._dtype = dtype or torch.float32
self.layers = layers
@property
def name(self):
return self._name
@property
def device(self):
return self._device
@property
def dtype(self):
return self._dtype
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
) -> LoRAModel:
# TODO: try revert if exception?
for key, layer in self.layers.items():
layer.to(device=device, dtype=dtype)
self._device = device
self._dtype = dtype
def calc_size(self) -> int:
model_size = 0
for _, layer in self.layers.items():
model_size += layer.calc_size()
return model_size
@classmethod
def from_checkpoint(
cls,
file_path: Union[str, Path],
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
device = device or torch.device("cpu")
dtype = dtype or torch.float32
if isinstance(file_path, str):
file_path = Path(file_path)
model = cls(
device=device,
dtype=dtype,
name=file_path.stem, # TODO:
layers=dict(),
)
if file_path.suffix == ".safetensors":
state_dict = load_file(file_path.absolute().as_posix(), device="cpu")
else:
state_dict = torch.load(file_path, map_location="cpu")
state_dict = cls._group_state(state_dict)
for layer_key, values in state_dict.items():
# lora and locon
if "lora_down.weight" in values:
layer = LoRALayer(layer_key, values)
# loha
elif "hada_w1_b" in values:
layer = LoHALayer(layer_key, values)
# lokr
elif "lokr_w1_b" in values or "lokr_w1" in values:
layer = LoKRLayer(layer_key, values)
else:
# TODO: diff/ia3/... format
print(f">> Encountered unknown lora layer module in {model.name}: {layer_key}")
return
# lower memory consumption by removing already parsed layer values
state_dict[layer_key].clear()
layer.to(device=device, dtype=dtype)
model.layers[layer_key] = layer
return model
@staticmethod
def _group_state(state_dict: dict):
state_dict_groupped = dict()
for key, value in state_dict.items():
stem, leaf = key.split(".", 1)
if stem not in state_dict_groupped:
state_dict_groupped[stem] = dict()
state_dict_groupped[stem][leaf] = value
return state_dict_groupped
"""
loras = [
(lora_model1, 0.7),
@ -505,6 +98,26 @@ class ModelPatcher:
with cls.apply_lora(text_encoder, loras, "lora_te_"):
yield
@classmethod
@contextmanager
def apply_sdxl_lora_text_encoder(
cls,
text_encoder: CLIPTextModel,
loras: List[Tuple[LoRAModel, float]],
):
with cls.apply_lora(text_encoder, loras, "lora_te1_"):
yield
@classmethod
@contextmanager
def apply_sdxl_lora_text_encoder2(
cls,
text_encoder: CLIPTextModel,
loras: List[Tuple[LoRAModel, float]],
):
with cls.apply_lora(text_encoder, loras, "lora_te2_"):
yield
@classmethod
@contextmanager
def apply_lora(
@ -530,7 +143,7 @@ class ModelPatcher:
# with torch.autocast(device_type="cpu"):
layer.to(dtype=torch.float32)
layer_scale = layer.alpha / layer.rank if (layer.alpha and layer.rank) else 1.0
layer_weight = layer.get_weight() * lora_weight * layer_scale
layer_weight = layer.get_weight(original_weights[module_key]) * lora_weight * layer_scale
if module.weight.shape != layer_weight.shape:
# TODO: debug on lycoris
@ -551,7 +164,7 @@ class ModelPatcher:
cls,
tokenizer: CLIPTokenizer,
text_encoder: CLIPTextModel,
ti_list: List[Any],
ti_list: List[Tuple[str, Any]],
) -> Tuple[CLIPTokenizer, TextualInversionManager]:
init_tokens_count = None
new_tokens_added = None
@ -561,27 +174,27 @@ class ModelPatcher:
ti_manager = TextualInversionManager(ti_tokenizer)
init_tokens_count = text_encoder.resize_token_embeddings(None).num_embeddings
def _get_trigger(ti, index):
trigger = ti.name
def _get_trigger(ti_name, index):
trigger = ti_name
if index > 0:
trigger += f"-!pad-{i}"
return f"<{trigger}>"
# modify tokenizer
new_tokens_added = 0
for ti in ti_list:
for ti_name, ti in ti_list:
for i in range(ti.embedding.shape[0]):
new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti, i))
new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti_name, i))
# modify text_encoder
text_encoder.resize_token_embeddings(init_tokens_count + new_tokens_added)
model_embeddings = text_encoder.get_input_embeddings()
for ti in ti_list:
for ti_name, ti in ti_list:
ti_tokens = []
for i in range(ti.embedding.shape[0]):
embedding = ti.embedding[i]
trigger = _get_trigger(ti, i)
trigger = _get_trigger(ti_name, i)
token_id = ti_tokenizer.convert_tokens_to_ids(trigger)
if token_id == ti_tokenizer.unk_token_id:
@ -626,7 +239,6 @@ class ModelPatcher:
class TextualInversionModel:
name: str
embedding: torch.Tensor # [n, 768]|[n, 1280]
@classmethod
@ -640,7 +252,6 @@ class TextualInversionModel:
file_path = Path(file_path)
result = cls() # TODO:
result.name = file_path.stem # TODO:
if file_path.suffix == ".safetensors":
state_dict = load_file(file_path.absolute().as_posix(), device="cpu")
@ -698,3 +309,187 @@ class TextualInversionManager(BaseTextualInversionManager):
new_token_ids.extend(self.pad_tokens[token_id])
return new_token_ids
class ONNXModelPatcher:
from .models.base import IAIOnnxRuntimeModel, OnnxRuntimeModel
@classmethod
@contextmanager
def apply_lora_unet(
cls,
unet: OnnxRuntimeModel,
loras: List[Tuple[LoRAModel, float]],
):
with cls.apply_lora(unet, loras, "lora_unet_"):
yield
@classmethod
@contextmanager
def apply_lora_text_encoder(
cls,
text_encoder: OnnxRuntimeModel,
loras: List[Tuple[LoRAModel, float]],
):
with cls.apply_lora(text_encoder, loras, "lora_te_"):
yield
# based on
# https://github.com/ssube/onnx-web/blob/ca2e436f0623e18b4cfe8a0363fcfcf10508acf7/api/onnx_web/convert/diffusion/lora.py#L323
@classmethod
@contextmanager
def apply_lora(
cls,
model: IAIOnnxRuntimeModel,
loras: List[Tuple[LoraModel, float]],
prefix: str,
):
from .models.base import IAIOnnxRuntimeModel
if not isinstance(model, IAIOnnxRuntimeModel):
raise Exception("Only IAIOnnxRuntimeModel models supported")
orig_weights = dict()
try:
blended_loras = dict()
for lora, lora_weight in loras:
for layer_key, layer in lora.layers.items():
if not layer_key.startswith(prefix):
continue
layer.to(dtype=torch.float32)
layer_key = layer_key.replace(prefix, "")
# TODO: rewrite to pass original tensor weight(required by ia3)
layer_weight = layer.get_weight(None).detach().cpu().numpy() * lora_weight
if layer_key is blended_loras:
blended_loras[layer_key] += layer_weight
else:
blended_loras[layer_key] = layer_weight
node_names = dict()
for node in model.nodes.values():
node_names[node.name.replace("/", "_").replace(".", "_").lstrip("_")] = node.name
for layer_key, lora_weight in blended_loras.items():
conv_key = layer_key + "_Conv"
gemm_key = layer_key + "_Gemm"
matmul_key = layer_key + "_MatMul"
if conv_key in node_names or gemm_key in node_names:
if conv_key in node_names:
conv_node = model.nodes[node_names[conv_key]]
else:
conv_node = model.nodes[node_names[gemm_key]]
weight_name = [n for n in conv_node.input if ".weight" in n][0]
orig_weight = model.tensors[weight_name]
if orig_weight.shape[-2:] == (1, 1):
if lora_weight.shape[-2:] == (1, 1):
new_weight = orig_weight.squeeze((3, 2)) + lora_weight.squeeze((3, 2))
else:
new_weight = orig_weight.squeeze((3, 2)) + lora_weight
new_weight = np.expand_dims(new_weight, (2, 3))
else:
if orig_weight.shape != lora_weight.shape:
new_weight = orig_weight + lora_weight.reshape(orig_weight.shape)
else:
new_weight = orig_weight + lora_weight
orig_weights[weight_name] = orig_weight
model.tensors[weight_name] = new_weight.astype(orig_weight.dtype)
elif matmul_key in node_names:
weight_node = model.nodes[node_names[matmul_key]]
matmul_name = [n for n in weight_node.input if "MatMul" in n][0]
orig_weight = model.tensors[matmul_name]
new_weight = orig_weight + lora_weight.transpose()
orig_weights[matmul_name] = orig_weight
model.tensors[matmul_name] = new_weight.astype(orig_weight.dtype)
else:
# warn? err?
pass
yield
finally:
# restore original weights
for name, orig_weight in orig_weights.items():
model.tensors[name] = orig_weight
@classmethod
@contextmanager
def apply_ti(
cls,
tokenizer: CLIPTokenizer,
text_encoder: IAIOnnxRuntimeModel,
ti_list: List[Tuple[str, Any]],
) -> Tuple[CLIPTokenizer, TextualInversionManager]:
from .models.base import IAIOnnxRuntimeModel
if not isinstance(text_encoder, IAIOnnxRuntimeModel):
raise Exception("Only IAIOnnxRuntimeModel models supported")
orig_embeddings = None
try:
ti_tokenizer = copy.deepcopy(tokenizer)
ti_manager = TextualInversionManager(ti_tokenizer)
def _get_trigger(ti_name, index):
trigger = ti_name
if index > 0:
trigger += f"-!pad-{i}"
return f"<{trigger}>"
# modify tokenizer
new_tokens_added = 0
for ti_name, ti in ti_list:
for i in range(ti.embedding.shape[0]):
new_tokens_added += ti_tokenizer.add_tokens(_get_trigger(ti_name, i))
# modify text_encoder
orig_embeddings = text_encoder.tensors["text_model.embeddings.token_embedding.weight"]
embeddings = np.concatenate(
(np.copy(orig_embeddings), np.zeros((new_tokens_added, orig_embeddings.shape[1]))),
axis=0,
)
for ti_name, ti in ti_list:
ti_tokens = []
for i in range(ti.embedding.shape[0]):
embedding = ti.embedding[i].detach().numpy()
trigger = _get_trigger(ti_name, i)
token_id = ti_tokenizer.convert_tokens_to_ids(trigger)
if token_id == ti_tokenizer.unk_token_id:
raise RuntimeError(f"Unable to find token id for token '{trigger}'")
if embeddings[token_id].shape != embedding.shape:
raise ValueError(
f"Cannot load embedding for {trigger}. It was trained on a model with token dimension {embedding.shape[0]}, but the current model has token dimension {embeddings[token_id].shape[0]}."
)
embeddings[token_id] = embedding
ti_tokens.append(token_id)
if len(ti_tokens) > 1:
ti_manager.pad_tokens[ti_tokens[0]] = ti_tokens[1:]
text_encoder.tensors["text_model.embeddings.token_embedding.weight"] = embeddings.astype(
orig_embeddings.dtype
)
yield ti_tokenizer, ti_manager
finally:
# restore
if orig_embeddings is not None:
text_encoder.tensors["text_model.embeddings.token_embedding.weight"] = orig_embeddings

View File

@ -28,8 +28,6 @@ import torch
import logging
import invokeai.backend.util.logging as logger
from invokeai.app.services.config import get_invokeai_config
from .lora import LoRAModel, TextualInversionModel
from .models import BaseModelType, ModelType, SubModelType, ModelBase
# Maximum size of the cache, in gigs
@ -187,7 +185,9 @@ class ModelCache(object):
# TODO: lock for no copies on simultaneous calls?
cache_entry = self._cached_models.get(key, None)
if cache_entry is None:
self.logger.info(f"Loading model {model_path}, type {base_model}:{model_type}:{submodel}")
self.logger.info(
f"Loading model {model_path}, type {base_model.value}:{model_type.value}{':'+submodel.value if submodel else ''}"
)
# this will remove older cached models until
# there is sufficient room to load the requested model
@ -358,7 +358,8 @@ class ModelCache(object):
# 2 refs:
# 1 from cache_entry
# 1 from getrefcount function
if not cache_entry.locked and refs <= 2:
# 1 from onnx runtime object
if not cache_entry.locked and refs <= 3 if "onnx" in model_key else 2:
self.logger.debug(
f"Unloading model {model_key} to free {(model_size/GIG):.2f} GB (-{(cache_entry.size/GIG):.2f} GB)"
)

View File

@ -228,19 +228,19 @@ the root is the InvokeAI ROOTDIR.
"""
from __future__ import annotations
import os
import hashlib
import os
import textwrap
import yaml
import types
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, List, Tuple, Union, Dict, Set, Callable, types
from shutil import rmtree, move
from typing import Optional, List, Literal, Tuple, Union, Dict, Set, Callable
import torch
import yaml
from omegaconf import OmegaConf
from omegaconf.dictconfig import DictConfig
from pydantic import BaseModel, Field
import invokeai.backend.util.logging as logger
@ -259,6 +259,7 @@ from .models import (
ModelNotFoundException,
InvalidModelException,
DuplicateModelException,
ModelBase,
)
# We are only starting to number the config file with release 3.
@ -276,7 +277,7 @@ class ModelInfo:
hash: str
location: Union[Path, str]
precision: torch.dtype
_cache: ModelCache = None
_cache: Optional[ModelCache] = None
def __enter__(self):
return self.context.__enter__()
@ -361,7 +362,7 @@ class ModelManager(object):
if model_key.startswith("_"):
continue
model_name, base_model, model_type = self.parse_key(model_key)
model_class = MODEL_CLASSES[base_model][model_type]
model_class = self._get_implementation(base_model, model_type)
# alias for config file
model_config["model_format"] = model_config.pop("format")
self.models[model_key] = model_class.create_config(**model_config)
@ -381,18 +382,24 @@ class ModelManager(object):
# causing otherwise unreferenced models to be removed from memory
self._read_models()
def model_exists(
self,
model_name: str,
base_model: BaseModelType,
model_type: ModelType,
) -> bool:
def model_exists(self, model_name: str, base_model: BaseModelType, model_type: ModelType, *, rescan=False) -> bool:
"""
Given a model name, returns True if it is a valid
identifier.
Given a model name, returns True if it is a valid identifier.
:param model_name: symbolic name of the model in models.yaml
:param model_type: ModelType enum indicating the type of model to return
:param base_model: BaseModelType enum indicating the base model used by this model
:param rescan: if True, scan_models_directory
"""
model_key = self.create_key(model_name, base_model, model_type)
return model_key in self.models
exists = model_key in self.models
# if model not found try to find it (maybe file just pasted)
if rescan and not exists:
self.scan_models_directory(base_model=base_model, model_type=model_type)
exists = self.model_exists(model_name, base_model, model_type, rescan=False)
return exists
@classmethod
def create_key(
@ -423,7 +430,7 @@ class ModelManager(object):
return (model_name, base_model, model_type)
def _get_model_cache_path(self, model_path):
return self.app_config.models_path / ".cache" / hashlib.md5(str(model_path).encode()).hexdigest()
return self.resolve_model_path(Path(".cache") / hashlib.md5(str(model_path).encode()).hexdigest())
@classmethod
def initialize_model_config(cls, config_path: Path):
@ -443,39 +450,32 @@ class ModelManager(object):
:param model_name: symbolic name of the model in models.yaml
:param model_type: ModelType enum indicating the type of model to return
:param base_model: BaseModelType enum indicating the base model used by this model
:param submode_typel: an ModelType enum indicating the portion of
:param submodel_type: an ModelType enum indicating the portion of
the model to retrieve (e.g. ModelType.Vae)
"""
model_class = MODEL_CLASSES[base_model][model_type]
model_key = self.create_key(model_name, base_model, model_type)
# if model not found try to find it (maybe file just pasted)
if model_key not in self.models:
self.scan_models_directory(base_model=base_model, model_type=model_type)
if model_key not in self.models:
raise ModelNotFoundException(f"Model not found - {model_key}")
if not self.model_exists(model_name, base_model, model_type, rescan=True):
raise ModelNotFoundException(f"Model not found - {model_key}")
model_config = self.models[model_key]
model_path = self.app_config.root_path / model_config.path
model_config = self._get_model_config(base_model, model_name, model_type)
model_path, is_submodel_override = self._get_model_path(model_config, submodel_type)
if is_submodel_override:
model_type = submodel_type
submodel_type = None
model_class = self._get_implementation(base_model, model_type)
if not model_path.exists():
if model_class.save_to_config:
self.models[model_key].error = ModelError.NotFound
raise Exception(f'Files for model "{model_key}" not found')
raise Exception(f'Files for model "{model_key}" not found at {model_path}')
else:
self.models.pop(model_key, None)
raise ModelNotFoundException(f"Model not found - {model_key}")
# vae/movq override
# TODO:
if submodel_type is not None and hasattr(model_config, submodel_type):
override_path = getattr(model_config, submodel_type)
if override_path:
model_path = self.app_config.root_path / override_path
model_type = submodel_type
submodel_type = None
model_class = MODEL_CLASSES[base_model][model_type]
raise ModelNotFoundException(f'Files for model "{model_key}" not found at {model_path}')
# TODO: path
# TODO: is it accurate to use path as id
@ -513,12 +513,61 @@ class ModelManager(object):
_cache=self.cache,
)
def _get_model_path(
self, model_config: ModelConfigBase, submodel_type: Optional[SubModelType] = None
) -> (Path, bool):
"""Extract a model's filesystem path from its config.
:return: The fully qualified Path of the module (or submodule).
"""
model_path = model_config.path
is_submodel_override = False
# Does the config explicitly override the submodel?
if submodel_type is not None and hasattr(model_config, submodel_type):
submodel_path = getattr(model_config, submodel_type)
if submodel_path is not None and len(submodel_path) > 0:
model_path = getattr(model_config, submodel_type)
is_submodel_override = True
model_path = self.resolve_model_path(model_path)
return model_path, is_submodel_override
def _get_model_config(self, base_model: BaseModelType, model_name: str, model_type: ModelType) -> ModelConfigBase:
"""Get a model's config object."""
model_key = self.create_key(model_name, base_model, model_type)
try:
model_config = self.models[model_key]
except KeyError:
raise ModelNotFoundException(f"Model not found - {model_key}")
return model_config
def _get_implementation(self, base_model: BaseModelType, model_type: ModelType) -> type[ModelBase]:
"""Get the concrete implementation class for a specific model type."""
model_class = MODEL_CLASSES[base_model][model_type]
return model_class
def _instantiate(
self,
model_name: str,
base_model: BaseModelType,
model_type: ModelType,
submodel_type: Optional[SubModelType] = None,
) -> ModelBase:
"""Make a new instance of this model, without loading it."""
model_config = self._get_model_config(base_model, model_name, model_type)
model_path, is_submodel_override = self._get_model_path(model_config, submodel_type)
# FIXME: do non-overriden submodels get the right class?
constructor = self._get_implementation(base_model, model_type)
instance = constructor(model_path, base_model, model_type)
return instance
def model_info(
self,
model_name: str,
base_model: BaseModelType,
model_type: ModelType,
) -> dict:
) -> Union[dict, None]:
"""
Given a model name returns the OmegaConf (dict-like) object describing it.
"""
@ -540,13 +589,16 @@ class ModelManager(object):
model_name: str,
base_model: BaseModelType,
model_type: ModelType,
) -> dict:
) -> Union[dict, None]:
"""
Returns a dict describing one installed model, using
the combined format of the list_models() method.
"""
models = self.list_models(base_model, model_type, model_name)
return models[0] if models else None
if len(models) >= 1:
return models[0]
else:
return None
def list_models(
self,
@ -560,7 +612,7 @@ class ModelManager(object):
model_keys = (
[self.create_key(model_name, base_model, model_type)]
if model_name
if model_name and base_model and model_type
else sorted(self.models, key=str.casefold)
)
models = []
@ -586,7 +638,7 @@ class ModelManager(object):
# expose paths as absolute to help web UI
if path := model_dict.get("path"):
model_dict["path"] = str(self.app_config.root_path / path)
model_dict["path"] = str(self.resolve_model_path(path))
models.append(model_dict)
return models
@ -596,7 +648,7 @@ class ModelManager(object):
Print a table of models and their descriptions. This needs to be redone
"""
# TODO: redo
for model_type, model_dict in self.list_models().items():
for model_dict in self.list_models():
for model_name, model_info in model_dict.items():
line = f'{model_info["name"]:25s} {model_info["type"]:10s} {model_info["description"]}'
print(line)
@ -623,7 +675,7 @@ class ModelManager(object):
self.cache.uncache_model(cache_id)
# if model inside invoke models folder - delete files
model_path = self.app_config.root_path / model_cfg.path
model_path = self.resolve_model_path(model_cfg.path)
cache_path = self._get_model_cache_path(model_path)
if cache_path.exists():
rmtree(str(cache_path))
@ -654,12 +706,11 @@ class ModelManager(object):
The returned dict has the same format as the dict returned by
model_info().
"""
# relativize paths as they go in - this makes it easier to move the root directory around
# relativize paths as they go in - this makes it easier to move the models directory around
if path := model_attributes.get("path"):
if Path(path).is_relative_to(self.app_config.root_path):
model_attributes["path"] = str(Path(path).relative_to(self.app_config.root_path))
model_attributes["path"] = str(self.relative_model_path(Path(path)))
model_class = MODEL_CLASSES[base_model][model_type]
model_class = self._get_implementation(base_model, model_type)
model_config = model_class.create_config(**model_attributes)
model_key = self.create_key(model_name, base_model, model_type)
@ -671,7 +722,7 @@ class ModelManager(object):
# TODO: if path changed and old_model.path inside models folder should we delete this too?
# remove conversion cache as config changed
old_model_path = self.app_config.root_path / old_model.path
old_model_path = self.resolve_model_path(old_model.path)
old_model_cache = self._get_model_cache_path(old_model_path)
if old_model_cache.exists():
if old_model_cache.is_dir():
@ -700,8 +751,8 @@ class ModelManager(object):
model_name: str,
base_model: BaseModelType,
model_type: ModelType,
new_name: str = None,
new_base: BaseModelType = None,
new_name: Optional[str] = None,
new_base: Optional[BaseModelType] = None,
):
"""
Rename or rebase a model.
@ -715,7 +766,7 @@ class ModelManager(object):
if not model_cfg:
raise ModelNotFoundException(f"Unknown model: {model_key}")
old_path = self.app_config.root_path / model_cfg.path
old_path = self.resolve_model_path(model_cfg.path)
new_name = new_name or model_name
new_base = new_base or base_model
new_key = self.create_key(new_name, new_base, model_type)
@ -724,15 +775,15 @@ class ModelManager(object):
# if this is a model file/directory that we manage ourselves, we need to move it
if old_path.is_relative_to(self.app_config.models_path):
new_path = (
self.app_config.root_path
/ "models"
/ BaseModelType(new_base).value
/ ModelType(model_type).value
/ new_name
new_path = self.resolve_model_path(
Path(
BaseModelType(new_base).value,
ModelType(model_type).value,
new_name,
)
)
move(old_path, new_path)
model_cfg.path = str(new_path.relative_to(self.app_config.root_path))
model_cfg.path = str(new_path.relative_to(self.app_config.models_path))
# clean up caches
old_model_cache = self._get_model_cache_path(old_path)
@ -754,7 +805,7 @@ class ModelManager(object):
self,
model_name: str,
base_model: BaseModelType,
model_type: Union[ModelType.Main, ModelType.Vae],
model_type: Literal[ModelType.Main, ModelType.Vae],
dest_directory: Optional[Path] = None,
) -> AddModelResult:
"""
@ -768,6 +819,10 @@ class ModelManager(object):
This will raise a ValueError unless the model is a checkpoint.
"""
info = self.model_info(model_name, base_model, model_type)
if info is None:
raise FileNotFoundError(f"model not found: {model_name}")
if info["model_format"] != "checkpoint":
raise ValueError(f"not a checkpoint format model: {model_name}")
@ -781,8 +836,8 @@ class ModelManager(object):
model_type,
**submodel,
)
checkpoint_path = self.app_config.root_path / info["path"]
old_diffusers_path = self.app_config.models_path / model.location
checkpoint_path = self.resolve_model_path(info["path"])
old_diffusers_path = self.resolve_model_path(model.location)
new_diffusers_path = (
dest_directory or self.app_config.models_path / base_model.value / model_type.value
) / model_name
@ -795,7 +850,7 @@ class ModelManager(object):
info["path"] = (
str(new_diffusers_path)
if dest_directory
else str(new_diffusers_path.relative_to(self.app_config.root_path))
else str(new_diffusers_path.relative_to(self.app_config.models_path))
)
info.pop("config")
@ -810,6 +865,15 @@ class ModelManager(object):
return result
def resolve_model_path(self, path: Union[Path, str]) -> Path:
"""return relative paths based on configured models_path"""
return self.app_config.models_path / path
def relative_model_path(self, model_path: Path) -> Path:
if model_path.is_relative_to(self.app_config.models_path):
model_path = model_path.relative_to(self.app_config.models_path)
return model_path
def search_models(self, search_folder):
self.logger.info(f"Finding Models In: {search_folder}")
models_folder_ckpt = Path(search_folder).glob("**/*.ckpt")
@ -828,7 +892,7 @@ class ModelManager(object):
return search_folder, found_models
def commit(self, conf_file: Path = None) -> None:
def commit(self, conf_file: Optional[Path] = None) -> None:
"""
Write current configuration out to the indicated file.
"""
@ -837,7 +901,7 @@ class ModelManager(object):
for model_key, model_config in self.models.items():
model_name, base_model, model_type = self.parse_key(model_key)
model_class = MODEL_CLASSES[base_model][model_type]
model_class = self._get_implementation(base_model, model_type)
if model_class.save_to_config:
# TODO: or exclude_unset better fits here?
data_to_save[model_key] = model_config.dict(exclude_defaults=True, exclude={"error"})
@ -883,12 +947,19 @@ class ModelManager(object):
new_models_found = False
self.logger.info(f"Scanning {self.app_config.models_path} for new models")
with Chdir(self.app_config.root_path):
with Chdir(self.app_config.models_path):
for model_key, model_config in list(self.models.items()):
model_name, cur_base_model, cur_model_type = self.parse_key(model_key)
model_path = self.app_config.root_path.absolute() / model_config.path
# Patch for relative path bug in older models.yaml - paths should not
# be starting with a hard-coded 'models'. This will also fix up
# models.yaml when committed.
if model_config.path.startswith("models"):
model_config.path = str(Path(*Path(model_config.path).parts[1:]))
model_path = self.resolve_model_path(model_config.path).absolute()
if not model_path.exists():
model_class = MODEL_CLASSES[cur_base_model][cur_model_type]
model_class = self._get_implementation(cur_base_model, cur_model_type)
if model_class.save_to_config:
model_config.error = ModelError.NotFound
self.models.pop(model_key, None)
@ -904,8 +975,8 @@ class ModelManager(object):
for cur_model_type in ModelType:
if model_type is not None and cur_model_type != model_type:
continue
model_class = MODEL_CLASSES[cur_base_model][cur_model_type]
models_dir = self.app_config.models_path / cur_base_model.value / cur_model_type.value
model_class = self._get_implementation(cur_base_model, cur_model_type)
models_dir = self.resolve_model_path(Path(cur_base_model.value, cur_model_type.value))
if not models_dir.exists():
continue # TODO: or create all folders?
@ -919,10 +990,10 @@ class ModelManager(object):
if model_key in self.models:
raise DuplicateModelException(f"Model with key {model_key} added twice")
if model_path.is_relative_to(self.app_config.root_path):
model_path = model_path.relative_to(self.app_config.root_path)
model_config: ModelConfigBase = model_class.probe_config(str(model_path))
model_path = self.relative_model_path(model_path)
model_config: ModelConfigBase = model_class.probe_config(
str(model_path), model_base=cur_base_model
)
self.models[model_key] = model_config
new_models_found = True
except DuplicateModelException as e:
@ -932,12 +1003,11 @@ class ModelManager(object):
except NotImplementedError as e:
self.logger.warning(e)
imported_models = self.autoimport()
imported_models = self.scan_autoimport_directory()
if (new_models_found or imported_models) and self.config_path:
self.commit()
def autoimport(self) -> Dict[str, AddModelResult]:
def scan_autoimport_directory(self) -> Dict[str, AddModelResult]:
"""
Scan the autoimport directory (if defined) and import new models, delete defunct models.
"""
@ -971,7 +1041,7 @@ class ModelManager(object):
# LS: hacky
# Patch in the SD VAE from core so that it is available for use by the UI
try:
self.heuristic_import({config.root_path / "models/core/convert/sd-vae-ft-mse"})
self.heuristic_import({str(self.resolve_model_path("core/convert/sd-vae-ft-mse"))})
except:
pass
@ -980,7 +1050,7 @@ class ModelManager(object):
model_manager=self,
prediction_type_helper=ask_user_for_prediction_type,
)
known_paths = {config.root_path / x["path"] for x in self.list_models()}
known_paths = {self.resolve_model_path(x["path"]) for x in self.list_models()}
directories = {
config.root_path / x
for x in [
@ -999,7 +1069,7 @@ class ModelManager(object):
def heuristic_import(
self,
items_to_import: Set[str],
prediction_type_helper: Callable[[Path], SchedulerPredictionType] = None,
prediction_type_helper: Optional[Callable[[Path], SchedulerPredictionType]] = None,
) -> Dict[str, AddModelResult]:
"""Import a list of paths, repo_ids or URLs. Returns the set of
successfully imported items.

View File

@ -33,7 +33,7 @@ class ModelMerger(object):
self,
model_paths: List[Path],
alpha: float = 0.5,
interp: MergeInterpolationMethod = None,
interp: Optional[MergeInterpolationMethod] = None,
force: bool = False,
**kwargs,
) -> DiffusionPipeline:
@ -73,7 +73,7 @@ class ModelMerger(object):
base_model: Union[BaseModelType, str],
merged_model_name: str,
alpha: float = 0.5,
interp: MergeInterpolationMethod = None,
interp: Optional[MergeInterpolationMethod] = None,
force: bool = False,
merge_dest_directory: Optional[Path] = None,
**kwargs,
@ -122,7 +122,7 @@ class ModelMerger(object):
dump_path.mkdir(parents=True, exist_ok=True)
dump_path = dump_path / merged_model_name
merged_pipe.save_pretrained(dump_path, safe_serialization=1)
merged_pipe.save_pretrained(dump_path, safe_serialization=True)
attributes = dict(
path=str(dump_path),
description=f"Merge of models {', '.join(model_names)}",

View File

@ -17,6 +17,7 @@ from .models import (
SilenceWarnings,
InvalidModelException,
)
from .util import lora_token_vector_length
from .models.base import read_checkpoint_meta
@ -27,7 +28,7 @@ class ModelProbeInfo(object):
variant_type: ModelVariantType
prediction_type: SchedulerPredictionType
upcast_attention: bool
format: Literal["diffusers", "checkpoint", "lycoris"]
format: Literal["diffusers", "checkpoint", "lycoris", "olive", "onnx"]
image_size: int
@ -41,6 +42,7 @@ class ModelProbe(object):
PROBES = {
"diffusers": {},
"checkpoint": {},
"onnx": {},
}
CLASS2TYPE = {
@ -53,7 +55,9 @@ class ModelProbe(object):
}
@classmethod
def register_probe(cls, format: Literal["diffusers", "checkpoint"], model_type: ModelType, probe_class: ProbeBase):
def register_probe(
cls, format: Literal["diffusers", "checkpoint", "onnx"], model_type: ModelType, probe_class: ProbeBase
):
cls.PROBES[format][model_type] = probe_class
@classmethod
@ -95,6 +99,7 @@ class ModelProbe(object):
if format_type == "diffusers"
else cls.get_model_type_from_checkpoint(model_path, model)
)
format_type = "onnx" if model_type == ModelType.ONNX else format_type
probe_class = cls.PROBES[format_type].get(model_type)
if not probe_class:
return None
@ -168,6 +173,8 @@ class ModelProbe(object):
if model:
class_name = model.__class__.__name__
else:
if (folder_path / "unet/model.onnx").exists():
return ModelType.ONNX
if (folder_path / "learned_embeds.bin").exists():
return ModelType.TextualInversion
@ -309,21 +316,16 @@ class LoRACheckpointProbe(CheckpointProbeBase):
def get_base_type(self) -> BaseModelType:
checkpoint = self.checkpoint
key1 = "lora_te_text_model_encoder_layers_0_mlp_fc1.lora_down.weight"
key2 = "lora_te_text_model_encoder_layers_0_self_attn_k_proj.hada_w1_a"
lora_token_vector_length = (
checkpoint[key1].shape[1]
if key1 in checkpoint
else checkpoint[key2].shape[0]
if key2 in checkpoint
else 768
)
if lora_token_vector_length == 768:
token_vector_length = lora_token_vector_length(checkpoint)
if token_vector_length == 768:
return BaseModelType.StableDiffusion1
elif lora_token_vector_length == 1024:
elif token_vector_length == 1024:
return BaseModelType.StableDiffusion2
elif token_vector_length == 2048:
return BaseModelType.StableDiffusionXL
else:
return None
raise InvalidModelException(f"Unknown LoRA type: {self.checkpoint_path}")
class TextualInversionCheckpointProbe(CheckpointProbeBase):
@ -460,6 +462,17 @@ class TextualInversionFolderProbe(FolderProbeBase):
return TextualInversionCheckpointProbe(None, checkpoint=checkpoint).get_base_type()
class ONNXFolderProbe(FolderProbeBase):
def get_format(self) -> str:
return "onnx"
def get_base_type(self) -> BaseModelType:
return BaseModelType.StableDiffusion1
def get_variant_type(self) -> ModelVariantType:
return ModelVariantType.Normal
class ControlNetFolderProbe(FolderProbeBase):
def get_base_type(self) -> BaseModelType:
config_file = self.folder_path / "config.json"
@ -497,3 +510,4 @@ ModelProbe.register_probe("checkpoint", ModelType.Vae, VaeCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.Lora, LoRACheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.TextualInversion, TextualInversionCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.ControlNet, ControlNetCheckpointProbe)
ModelProbe.register_probe("onnx", ModelType.ONNX, ONNXFolderProbe)

View File

@ -23,8 +23,11 @@ from .lora import LoRAModel
from .controlnet import ControlNetModel # TODO:
from .textual_inversion import TextualInversionModel
from .stable_diffusion_onnx import ONNXStableDiffusion1Model, ONNXStableDiffusion2Model
MODEL_CLASSES = {
BaseModelType.StableDiffusion1: {
ModelType.ONNX: ONNXStableDiffusion1Model,
ModelType.Main: StableDiffusion1Model,
ModelType.Vae: VaeModel,
ModelType.Lora: LoRAModel,
@ -32,6 +35,7 @@ MODEL_CLASSES = {
ModelType.TextualInversion: TextualInversionModel,
},
BaseModelType.StableDiffusion2: {
ModelType.ONNX: ONNXStableDiffusion2Model,
ModelType.Main: StableDiffusion2Model,
ModelType.Vae: VaeModel,
ModelType.Lora: LoRAModel,
@ -45,6 +49,7 @@ MODEL_CLASSES = {
ModelType.Lora: LoRAModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.ONNX: ONNXStableDiffusion2Model,
},
BaseModelType.StableDiffusionXLRefiner: {
ModelType.Main: StableDiffusionXLModel,
@ -53,6 +58,7 @@ MODEL_CLASSES = {
ModelType.Lora: LoRAModel,
ModelType.ControlNet: ControlNetModel,
ModelType.TextualInversion: TextualInversionModel,
ModelType.ONNX: ONNXStableDiffusion2Model,
},
# BaseModelType.Kandinsky2_1: {
# ModelType.Main: Kandinsky2_1Model,

View File

@ -8,13 +8,23 @@ from abc import ABCMeta, abstractmethod
from pathlib import Path
from picklescan.scanner import scan_file_path
import torch
import numpy as np
import safetensors.torch
from diffusers import DiffusionPipeline, ConfigMixin
from pathlib import Path
from diffusers import DiffusionPipeline, ConfigMixin, OnnxRuntimeModel
from contextlib import suppress
from pydantic import BaseModel, Field
from typing import List, Dict, Optional, Type, Literal, TypeVar, Generic, Callable, Any, Union
import onnx
from onnx import numpy_helper
from onnxruntime import (
InferenceSession,
SessionOptions,
get_available_providers,
)
class DuplicateModelException(Exception):
pass
@ -37,6 +47,7 @@ class BaseModelType(str, Enum):
class ModelType(str, Enum):
ONNX = "onnx"
Main = "main"
Vae = "vae"
Lora = "lora"
@ -51,6 +62,8 @@ class SubModelType(str, Enum):
Tokenizer = "tokenizer"
Tokenizer2 = "tokenizer_2"
Vae = "vae"
VaeDecoder = "vae_decoder"
VaeEncoder = "vae_encoder"
Scheduler = "scheduler"
SafetyChecker = "safety_checker"
# MoVQ = "movq"
@ -279,8 +292,9 @@ class DiffusersModel(ModelBase):
)
break
except Exception as e:
# print("====ERR LOAD====")
# print(f"{variant}: {e}")
if not str(e).startswith("Error no file"):
print("====ERR LOAD====")
print(f"{variant}: {e}")
pass
else:
raise Exception(f"Failed to load {self.base_model}:{self.model_type}:{child_type} model")
@ -362,6 +376,8 @@ def calc_model_size_by_data(model) -> int:
return _calc_pipeline_by_data(model)
elif isinstance(model, torch.nn.Module):
return _calc_model_by_data(model)
elif isinstance(model, IAIOnnxRuntimeModel):
return _calc_onnx_model_by_data(model)
else:
return 0
@ -382,6 +398,12 @@ def _calc_model_by_data(model) -> int:
return mem
def _calc_onnx_model_by_data(model) -> int:
tensor_size = model.tensors.size() * 2 # The session doubles this
mem = tensor_size # in bytes
return mem
def _fast_safetensors_reader(path: str):
checkpoint = dict()
device = torch.device("meta")
@ -449,3 +471,208 @@ class SilenceWarnings(object):
transformers_logging.set_verbosity(self.transformers_verbosity)
diffusers_logging.set_verbosity(self.diffusers_verbosity)
warnings.simplefilter("default")
ONNX_WEIGHTS_NAME = "model.onnx"
class IAIOnnxRuntimeModel:
class _tensor_access:
def __init__(self, model):
self.model = model
self.indexes = dict()
for idx, obj in enumerate(self.model.proto.graph.initializer):
self.indexes[obj.name] = idx
def __getitem__(self, key: str):
value = self.model.proto.graph.initializer[self.indexes[key]]
return numpy_helper.to_array(value)
def __setitem__(self, key: str, value: np.ndarray):
new_node = numpy_helper.from_array(value)
# set_external_data(new_node, location="in-memory-location")
new_node.name = key
# new_node.ClearField("raw_data")
del self.model.proto.graph.initializer[self.indexes[key]]
self.model.proto.graph.initializer.insert(self.indexes[key], new_node)
# self.model.data[key] = OrtValue.ortvalue_from_numpy(value)
# __delitem__
def __contains__(self, key: str):
return self.indexes[key] in self.model.proto.graph.initializer
def items(self):
raise NotImplementedError("tensor.items")
# return [(obj.name, obj) for obj in self.raw_proto]
def keys(self):
return self.indexes.keys()
def values(self):
raise NotImplementedError("tensor.values")
# return [obj for obj in self.raw_proto]
def size(self):
bytesSum = 0
for node in self.model.proto.graph.initializer:
bytesSum += sys.getsizeof(node.raw_data)
return bytesSum
class _access_helper:
def __init__(self, raw_proto):
self.indexes = dict()
self.raw_proto = raw_proto
for idx, obj in enumerate(raw_proto):
self.indexes[obj.name] = idx
def __getitem__(self, key: str):
return self.raw_proto[self.indexes[key]]
def __setitem__(self, key: str, value):
index = self.indexes[key]
del self.raw_proto[index]
self.raw_proto.insert(index, value)
# __delitem__
def __contains__(self, key: str):
return key in self.indexes
def items(self):
return [(obj.name, obj) for obj in self.raw_proto]
def keys(self):
return self.indexes.keys()
def values(self):
return [obj for obj in self.raw_proto]
def __init__(self, model_path: str, provider: Optional[str]):
self.path = model_path
self.session = None
self.provider = provider
"""
self.data_path = self.path + "_data"
if not os.path.exists(self.data_path):
print(f"Moving model tensors to separate file: {self.data_path}")
tmp_proto = onnx.load(model_path, load_external_data=True)
onnx.save_model(tmp_proto, self.path, save_as_external_data=True, all_tensors_to_one_file=True, location=os.path.basename(self.data_path), size_threshold=1024, convert_attribute=False)
del tmp_proto
gc.collect()
self.proto = onnx.load(model_path, load_external_data=False)
"""
self.proto = onnx.load(model_path, load_external_data=True)
# self.data = dict()
# for tensor in self.proto.graph.initializer:
# name = tensor.name
# if tensor.HasField("raw_data"):
# npt = numpy_helper.to_array(tensor)
# orv = OrtValue.ortvalue_from_numpy(npt)
# # self.data[name] = orv
# # set_external_data(tensor, location="in-memory-location")
# tensor.name = name
# # tensor.ClearField("raw_data")
self.nodes = self._access_helper(self.proto.graph.node)
# self.initializers = self._access_helper(self.proto.graph.initializer)
# print(self.proto.graph.input)
# print(self.proto.graph.initializer)
self.tensors = self._tensor_access(self)
# TODO: integrate with model manager/cache
def create_session(self, height=None, width=None):
if self.session is None or self.session_width != width or self.session_height != height:
# onnx.save(self.proto, "tmp.onnx")
# onnx.save_model(self.proto, "tmp.onnx", save_as_external_data=True, all_tensors_to_one_file=True, location="tmp.onnx_data", size_threshold=1024, convert_attribute=False)
# TODO: something to be able to get weight when they already moved outside of model proto
# (trimmed_model, external_data) = buffer_external_data_tensors(self.proto)
sess = SessionOptions()
# self._external_data.update(**external_data)
# sess.add_external_initializers(list(self.data.keys()), list(self.data.values()))
# sess.enable_profiling = True
# sess.intra_op_num_threads = 1
# sess.inter_op_num_threads = 1
# sess.execution_mode = ExecutionMode.ORT_SEQUENTIAL
# sess.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
# sess.enable_cpu_mem_arena = True
# sess.enable_mem_pattern = True
# sess.add_session_config_entry("session.intra_op.use_xnnpack_threadpool", "1") ########### It's the key code
self.session_height = height
self.session_width = width
if height and width:
sess.add_free_dimension_override_by_name("unet_sample_batch", 2)
sess.add_free_dimension_override_by_name("unet_sample_channels", 4)
sess.add_free_dimension_override_by_name("unet_hidden_batch", 2)
sess.add_free_dimension_override_by_name("unet_hidden_sequence", 77)
sess.add_free_dimension_override_by_name("unet_sample_height", self.session_height)
sess.add_free_dimension_override_by_name("unet_sample_width", self.session_width)
sess.add_free_dimension_override_by_name("unet_time_batch", 1)
providers = []
if self.provider:
providers.append(self.provider)
else:
providers = get_available_providers()
if "TensorrtExecutionProvider" in providers:
providers.remove("TensorrtExecutionProvider")
try:
self.session = InferenceSession(self.proto.SerializeToString(), providers=providers, sess_options=sess)
except Exception as e:
raise e
# self.session = InferenceSession("tmp.onnx", providers=[self.provider], sess_options=self.sess_options)
# self.io_binding = self.session.io_binding()
def release_session(self):
self.session = None
import gc
gc.collect()
return
def __call__(self, **kwargs):
if self.session is None:
raise Exception("You should call create_session before running model")
inputs = {k: np.array(v) for k, v in kwargs.items()}
output_names = self.session.get_outputs()
# for k in inputs:
# self.io_binding.bind_cpu_input(k, inputs[k])
# for name in output_names:
# self.io_binding.bind_output(name.name)
# self.session.run_with_iobinding(self.io_binding, None)
# return self.io_binding.copy_outputs_to_cpu()
return self.session.run(None, inputs)
# compatability with diffusers load code
@classmethod
def from_pretrained(
cls,
model_id: Union[str, Path],
subfolder: Union[str, Path] = None,
file_name: Optional[str] = None,
provider: Optional[str] = None,
sess_options: Optional["SessionOptions"] = None,
**kwargs,
):
file_name = file_name or ONNX_WEIGHTS_NAME
if os.path.isdir(model_id):
model_path = model_id
if subfolder is not None:
model_path = os.path.join(model_path, subfolder)
model_path = os.path.join(model_path, file_name)
else:
model_path = model_id
# load model from local directory
if not os.path.isfile(model_path):
raise Exception(f"Model not found: {model_path}")
# TODO: session options
return cls(model_path, provider=provider)

View File

@ -17,6 +17,7 @@ from .base import (
ModelNotFoundException,
)
from invokeai.app.services.config import InvokeAIAppConfig
import invokeai.backend.util.logging as logger
class ControlNetModelFormat(str, Enum):
@ -66,7 +67,7 @@ class ControlNetModel(ModelBase):
child_type: Optional[SubModelType] = None,
):
if child_type is not None:
raise Exception("There is no child models in controlnet model")
raise Exception("There are no child models in controlnet model")
model = None
for variant in ["fp16", None]:
@ -124,9 +125,7 @@ class ControlNetModel(ModelBase):
return model_path
@classmethod
def _convert_controlnet_ckpt_and_cache(
cls,
model_path: str,
output_path: str,
base_model: BaseModelType,
@ -141,6 +140,7 @@ def _convert_controlnet_ckpt_and_cache(
weights = app_config.root_path / model_path
output_path = Path(output_path)
logger.info(f"Converting {weights} to diffusers format")
# return cached version if it exists
if output_path.exists():
return output_path

View File

@ -1,21 +1,23 @@
import bisect
import os
import torch
from enum import Enum
from typing import Optional, Union, Literal
from pathlib import Path
from typing import Dict, Optional, Union
import torch
from safetensors.torch import load_file
from .base import (
BaseModelType,
InvalidModelException,
ModelBase,
ModelConfigBase,
BaseModelType,
ModelNotFoundException,
ModelType,
SubModelType,
classproperty,
InvalidModelException,
ModelNotFoundException,
)
# TODO: naming
from ..lora import LoRAModel as LoRAModelRaw
class LoRAModelFormat(str, Enum):
LyCORIS = "lycoris"
@ -50,6 +52,7 @@ class LoRAModel(ModelBase):
model = LoRAModelRaw.from_checkpoint(
file_path=self.model_path,
dtype=torch_dtype,
base_model=self.base_model,
)
self.model_size = model.calc_size()
@ -57,7 +60,7 @@ class LoRAModel(ModelBase):
@classproperty
def save_to_config(cls) -> bool:
return False
return True
@classmethod
def detect_format(cls, path: str):
@ -87,3 +90,622 @@ class LoRAModel(ModelBase):
raise NotImplementedError("Diffusers lora not supported")
else:
return model_path
class LoRALayerBase:
# rank: Optional[int]
# alpha: Optional[float]
# bias: Optional[torch.Tensor]
# layer_key: str
# @property
# def scale(self):
# return self.alpha / self.rank if (self.alpha and self.rank) else 1.0
def __init__(
self,
layer_key: str,
values: dict,
):
if "alpha" in values:
self.alpha = values["alpha"].item()
else:
self.alpha = None
if "bias_indices" in values and "bias_values" in values and "bias_size" in values:
self.bias = torch.sparse_coo_tensor(
values["bias_indices"],
values["bias_values"],
tuple(values["bias_size"]),
)
else:
self.bias = None
self.rank = None # set in layer implementation
self.layer_key = layer_key
def get_weight(self, orig_weight: torch.Tensor):
raise NotImplementedError()
def calc_size(self) -> int:
model_size = 0
for val in [self.bias]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
if self.bias is not None:
self.bias = self.bias.to(device=device, dtype=dtype)
# TODO: find and debug lora/locon with bias
class LoRALayer(LoRALayerBase):
# up: torch.Tensor
# mid: Optional[torch.Tensor]
# down: torch.Tensor
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
self.up = values["lora_up.weight"]
self.down = values["lora_down.weight"]
if "lora_mid.weight" in values:
self.mid = values["lora_mid.weight"]
else:
self.mid = None
self.rank = self.down.shape[0]
def get_weight(self, orig_weight: torch.Tensor):
if self.mid is not None:
up = self.up.reshape(self.up.shape[0], self.up.shape[1])
down = self.down.reshape(self.down.shape[0], self.down.shape[1])
weight = torch.einsum("m n w h, i m, n j -> i j w h", self.mid, up, down)
else:
weight = self.up.reshape(self.up.shape[0], -1) @ self.down.reshape(self.down.shape[0], -1)
return weight
def calc_size(self) -> int:
model_size = super().calc_size()
for val in [self.up, self.mid, self.down]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
self.up = self.up.to(device=device, dtype=dtype)
self.down = self.down.to(device=device, dtype=dtype)
if self.mid is not None:
self.mid = self.mid.to(device=device, dtype=dtype)
class LoHALayer(LoRALayerBase):
# w1_a: torch.Tensor
# w1_b: torch.Tensor
# w2_a: torch.Tensor
# w2_b: torch.Tensor
# t1: Optional[torch.Tensor] = None
# t2: Optional[torch.Tensor] = None
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
self.w1_a = values["hada_w1_a"]
self.w1_b = values["hada_w1_b"]
self.w2_a = values["hada_w2_a"]
self.w2_b = values["hada_w2_b"]
if "hada_t1" in values:
self.t1 = values["hada_t1"]
else:
self.t1 = None
if "hada_t2" in values:
self.t2 = values["hada_t2"]
else:
self.t2 = None
self.rank = self.w1_b.shape[0]
def get_weight(self, orig_weight: torch.Tensor):
if self.t1 is None:
weight = (self.w1_a @ self.w1_b) * (self.w2_a @ self.w2_b)
else:
rebuild1 = torch.einsum("i j k l, j r, i p -> p r k l", self.t1, self.w1_b, self.w1_a)
rebuild2 = torch.einsum("i j k l, j r, i p -> p r k l", self.t2, self.w2_b, self.w2_a)
weight = rebuild1 * rebuild2
return weight
def calc_size(self) -> int:
model_size = super().calc_size()
for val in [self.w1_a, self.w1_b, self.w2_a, self.w2_b, self.t1, self.t2]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
self.w1_a = self.w1_a.to(device=device, dtype=dtype)
self.w1_b = self.w1_b.to(device=device, dtype=dtype)
if self.t1 is not None:
self.t1 = self.t1.to(device=device, dtype=dtype)
self.w2_a = self.w2_a.to(device=device, dtype=dtype)
self.w2_b = self.w2_b.to(device=device, dtype=dtype)
if self.t2 is not None:
self.t2 = self.t2.to(device=device, dtype=dtype)
class LoKRLayer(LoRALayerBase):
# w1: Optional[torch.Tensor] = None
# w1_a: Optional[torch.Tensor] = None
# w1_b: Optional[torch.Tensor] = None
# w2: Optional[torch.Tensor] = None
# w2_a: Optional[torch.Tensor] = None
# w2_b: Optional[torch.Tensor] = None
# t2: Optional[torch.Tensor] = None
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
if "lokr_w1" in values:
self.w1 = values["lokr_w1"]
self.w1_a = None
self.w1_b = None
else:
self.w1 = None
self.w1_a = values["lokr_w1_a"]
self.w1_b = values["lokr_w1_b"]
if "lokr_w2" in values:
self.w2 = values["lokr_w2"]
self.w2_a = None
self.w2_b = None
else:
self.w2 = None
self.w2_a = values["lokr_w2_a"]
self.w2_b = values["lokr_w2_b"]
if "lokr_t2" in values:
self.t2 = values["lokr_t2"]
else:
self.t2 = None
if "lokr_w1_b" in values:
self.rank = values["lokr_w1_b"].shape[0]
elif "lokr_w2_b" in values:
self.rank = values["lokr_w2_b"].shape[0]
else:
self.rank = None # unscaled
def get_weight(self, orig_weight: torch.Tensor):
w1 = self.w1
if w1 is None:
w1 = self.w1_a @ self.w1_b
w2 = self.w2
if w2 is None:
if self.t2 is None:
w2 = self.w2_a @ self.w2_b
else:
w2 = torch.einsum("i j k l, i p, j r -> p r k l", self.t2, self.w2_a, self.w2_b)
if len(w2.shape) == 4:
w1 = w1.unsqueeze(2).unsqueeze(2)
w2 = w2.contiguous()
weight = torch.kron(w1, w2)
return weight
def calc_size(self) -> int:
model_size = super().calc_size()
for val in [self.w1, self.w1_a, self.w1_b, self.w2, self.w2_a, self.w2_b, self.t2]:
if val is not None:
model_size += val.nelement() * val.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
if self.w1 is not None:
self.w1 = self.w1.to(device=device, dtype=dtype)
else:
self.w1_a = self.w1_a.to(device=device, dtype=dtype)
self.w1_b = self.w1_b.to(device=device, dtype=dtype)
if self.w2 is not None:
self.w2 = self.w2.to(device=device, dtype=dtype)
else:
self.w2_a = self.w2_a.to(device=device, dtype=dtype)
self.w2_b = self.w2_b.to(device=device, dtype=dtype)
if self.t2 is not None:
self.t2 = self.t2.to(device=device, dtype=dtype)
class FullLayer(LoRALayerBase):
# weight: torch.Tensor
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
self.weight = values["diff"]
if len(values.keys()) > 1:
_keys = list(values.keys())
_keys.remove("diff")
raise NotImplementedError(f"Unexpected keys in lora diff layer: {_keys}")
self.rank = None # unscaled
def get_weight(self, orig_weight: torch.Tensor):
return self.weight
def calc_size(self) -> int:
model_size = super().calc_size()
model_size += self.weight.nelement() * self.weight.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
self.weight = self.weight.to(device=device, dtype=dtype)
class IA3Layer(LoRALayerBase):
# weight: torch.Tensor
# on_input: torch.Tensor
def __init__(
self,
layer_key: str,
values: dict,
):
super().__init__(layer_key, values)
self.weight = values["weight"]
self.on_input = values["on_input"]
self.rank = None # unscaled
def get_weight(self, orig_weight: torch.Tensor):
weight = self.weight
if not self.on_input:
weight = weight.reshape(-1, 1)
return orig_weight * weight
def calc_size(self) -> int:
model_size = super().calc_size()
model_size += self.weight.nelement() * self.weight.element_size()
model_size += self.on_input.nelement() * self.on_input.element_size()
return model_size
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
super().to(device=device, dtype=dtype)
self.weight = self.weight.to(device=device, dtype=dtype)
self.on_input = self.on_input.to(device=device, dtype=dtype)
# TODO: rename all methods used in model logic with Info postfix and remove here Raw postfix
class LoRAModelRaw: # (torch.nn.Module):
_name: str
layers: Dict[str, LoRALayer]
_device: torch.device
_dtype: torch.dtype
def __init__(
self,
name: str,
layers: Dict[str, LoRALayer],
device: torch.device,
dtype: torch.dtype,
):
self._name = name
self._device = device or torch.cpu
self._dtype = dtype or torch.float32
self.layers = layers
@property
def name(self):
return self._name
@property
def device(self):
return self._device
@property
def dtype(self):
return self._dtype
def to(
self,
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
):
# TODO: try revert if exception?
for key, layer in self.layers.items():
layer.to(device=device, dtype=dtype)
self._device = device
self._dtype = dtype
def calc_size(self) -> int:
model_size = 0
for _, layer in self.layers.items():
model_size += layer.calc_size()
return model_size
@classmethod
def _convert_sdxl_keys_to_diffusers_format(cls, state_dict):
"""Convert the keys of an SDXL LoRA state_dict to diffusers format.
The input state_dict can be in either Stability AI format or diffusers format. If the state_dict is already in
diffusers format, then this function will have no effect.
This function is adapted from:
https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L385-L409
Args:
state_dict (Dict[str, Tensor]): The SDXL LoRA state_dict.
Raises:
ValueError: If state_dict contains an unrecognized key, or not all keys could be converted.
Returns:
Dict[str, Tensor]: The diffusers-format state_dict.
"""
converted_count = 0 # The number of Stability AI keys converted to diffusers format.
not_converted_count = 0 # The number of keys that were not converted.
# Get a sorted list of Stability AI UNet keys so that we can efficiently search for keys with matching prefixes.
# For example, we want to efficiently find `input_blocks_4_1` in the list when searching for
# `input_blocks_4_1_proj_in`.
stability_unet_keys = list(SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP)
stability_unet_keys.sort()
new_state_dict = dict()
for full_key, value in state_dict.items():
if full_key.startswith("lora_unet_"):
search_key = full_key.replace("lora_unet_", "")
# Use bisect to find the key in stability_unet_keys that *may* match the search_key's prefix.
position = bisect.bisect_right(stability_unet_keys, search_key)
map_key = stability_unet_keys[position - 1]
# Now, check if the map_key *actually* matches the search_key.
if search_key.startswith(map_key):
new_key = full_key.replace(map_key, SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP[map_key])
new_state_dict[new_key] = value
converted_count += 1
else:
new_state_dict[full_key] = value
not_converted_count += 1
elif full_key.startswith("lora_te1_") or full_key.startswith("lora_te2_"):
# The CLIP text encoders have the same keys in both Stability AI and diffusers formats.
new_state_dict[full_key] = value
continue
else:
raise ValueError(f"Unrecognized SDXL LoRA key prefix: '{full_key}'.")
if converted_count > 0 and not_converted_count > 0:
raise ValueError(
f"The SDXL LoRA could only be partially converted to diffusers format. converted={converted_count},"
f" not_converted={not_converted_count}"
)
return new_state_dict
@classmethod
def from_checkpoint(
cls,
file_path: Union[str, Path],
device: Optional[torch.device] = None,
dtype: Optional[torch.dtype] = None,
base_model: Optional[BaseModelType] = None,
):
device = device or torch.device("cpu")
dtype = dtype or torch.float32
if isinstance(file_path, str):
file_path = Path(file_path)
model = cls(
device=device,
dtype=dtype,
name=file_path.stem, # TODO:
layers=dict(),
)
if file_path.suffix == ".safetensors":
state_dict = load_file(file_path.absolute().as_posix(), device="cpu")
else:
state_dict = torch.load(file_path, map_location="cpu")
state_dict = cls._group_state(state_dict)
if base_model == BaseModelType.StableDiffusionXL:
state_dict = cls._convert_sdxl_keys_to_diffusers_format(state_dict)
for layer_key, values in state_dict.items():
# lora and locon
if "lora_down.weight" in values:
layer = LoRALayer(layer_key, values)
# loha
elif "hada_w1_b" in values:
layer = LoHALayer(layer_key, values)
# lokr
elif "lokr_w1_b" in values or "lokr_w1" in values:
layer = LoKRLayer(layer_key, values)
# diff
elif "diff" in values:
layer = FullLayer(layer_key, values)
# ia3
elif "weight" in values and "on_input" in values:
layer = IA3Layer(layer_key, values)
else:
print(f">> Encountered unknown lora layer module in {model.name}: {layer_key} - {list(values.keys())}")
raise Exception("Unknown lora format!")
# lower memory consumption by removing already parsed layer values
state_dict[layer_key].clear()
layer.to(device=device, dtype=dtype)
model.layers[layer_key] = layer
return model
@staticmethod
def _group_state(state_dict: dict):
state_dict_groupped = dict()
for key, value in state_dict.items():
stem, leaf = key.split(".", 1)
if stem not in state_dict_groupped:
state_dict_groupped[stem] = dict()
state_dict_groupped[stem][leaf] = value
return state_dict_groupped
# code from
# https://github.com/bmaltais/kohya_ss/blob/2accb1305979ba62f5077a23aabac23b4c37e935/networks/lora_diffusers.py#L15C1-L97C32
def make_sdxl_unet_conversion_map():
"""Create a dict mapping state_dict keys from Stability AI SDXL format to diffusers SDXL format."""
unet_conversion_map_layer = []
for i in range(3): # num_blocks is 3 in sdxl
# loop over downblocks/upblocks
for j in range(2):
# loop over resnets/attentions for downblocks
hf_down_res_prefix = f"down_blocks.{i}.resnets.{j}."
sd_down_res_prefix = f"input_blocks.{3*i + j + 1}.0."
unet_conversion_map_layer.append((sd_down_res_prefix, hf_down_res_prefix))
if i < 3:
# no attention layers in down_blocks.3
hf_down_atn_prefix = f"down_blocks.{i}.attentions.{j}."
sd_down_atn_prefix = f"input_blocks.{3*i + j + 1}.1."
unet_conversion_map_layer.append((sd_down_atn_prefix, hf_down_atn_prefix))
for j in range(3):
# loop over resnets/attentions for upblocks
hf_up_res_prefix = f"up_blocks.{i}.resnets.{j}."
sd_up_res_prefix = f"output_blocks.{3*i + j}.0."
unet_conversion_map_layer.append((sd_up_res_prefix, hf_up_res_prefix))
# if i > 0: commentout for sdxl
# no attention layers in up_blocks.0
hf_up_atn_prefix = f"up_blocks.{i}.attentions.{j}."
sd_up_atn_prefix = f"output_blocks.{3*i + j}.1."
unet_conversion_map_layer.append((sd_up_atn_prefix, hf_up_atn_prefix))
if i < 3:
# no downsample in down_blocks.3
hf_downsample_prefix = f"down_blocks.{i}.downsamplers.0.conv."
sd_downsample_prefix = f"input_blocks.{3*(i+1)}.0.op."
unet_conversion_map_layer.append((sd_downsample_prefix, hf_downsample_prefix))
# no upsample in up_blocks.3
hf_upsample_prefix = f"up_blocks.{i}.upsamplers.0."
sd_upsample_prefix = f"output_blocks.{3*i + 2}.{2}." # change for sdxl
unet_conversion_map_layer.append((sd_upsample_prefix, hf_upsample_prefix))
hf_mid_atn_prefix = "mid_block.attentions.0."
sd_mid_atn_prefix = "middle_block.1."
unet_conversion_map_layer.append((sd_mid_atn_prefix, hf_mid_atn_prefix))
for j in range(2):
hf_mid_res_prefix = f"mid_block.resnets.{j}."
sd_mid_res_prefix = f"middle_block.{2*j}."
unet_conversion_map_layer.append((sd_mid_res_prefix, hf_mid_res_prefix))
unet_conversion_map_resnet = [
# (stable-diffusion, HF Diffusers)
("in_layers.0.", "norm1."),
("in_layers.2.", "conv1."),
("out_layers.0.", "norm2."),
("out_layers.3.", "conv2."),
("emb_layers.1.", "time_emb_proj."),
("skip_connection.", "conv_shortcut."),
]
unet_conversion_map = []
for sd, hf in unet_conversion_map_layer:
if "resnets" in hf:
for sd_res, hf_res in unet_conversion_map_resnet:
unet_conversion_map.append((sd + sd_res, hf + hf_res))
else:
unet_conversion_map.append((sd, hf))
for j in range(2):
hf_time_embed_prefix = f"time_embedding.linear_{j+1}."
sd_time_embed_prefix = f"time_embed.{j*2}."
unet_conversion_map.append((sd_time_embed_prefix, hf_time_embed_prefix))
for j in range(2):
hf_label_embed_prefix = f"add_embedding.linear_{j+1}."
sd_label_embed_prefix = f"label_emb.0.{j*2}."
unet_conversion_map.append((sd_label_embed_prefix, hf_label_embed_prefix))
unet_conversion_map.append(("input_blocks.0.0.", "conv_in."))
unet_conversion_map.append(("out.0.", "conv_norm_out."))
unet_conversion_map.append(("out.2.", "conv_out."))
return unet_conversion_map
SDXL_UNET_STABILITY_TO_DIFFUSERS_MAP = {
sd.rstrip(".").replace(".", "_"): hf.rstrip(".").replace(".", "_") for sd, hf in make_sdxl_unet_conversion_map()
}

View File

@ -1,6 +1,5 @@
import os
import json
import invokeai.backend.util.logging as logger
from enum import Enum
from pydantic import Field
from typing import Literal, Optional
@ -12,6 +11,7 @@ from .base import (
DiffusersModel,
read_checkpoint_meta,
classproperty,
InvalidModelException,
)
from omegaconf import OmegaConf
@ -65,7 +65,7 @@ class StableDiffusionXLModel(DiffusersModel):
in_channels = unet_config["in_channels"]
else:
raise Exception("Not supported stable diffusion diffusers format(possibly onnx?)")
raise InvalidModelException(f"{path} is not a recognized Stable Diffusion diffusers model")
else:
raise NotImplementedError(f"Unknown stable diffusion 2.* format: {model_format}")
@ -80,8 +80,10 @@ class StableDiffusionXLModel(DiffusersModel):
raise Exception("Unkown stable diffusion 2.* model format")
if ckpt_config_path is None:
# TO DO: implement picking
pass
# avoid circular import
from .stable_diffusion import _select_ckpt_config
ckpt_config_path = _select_ckpt_config(kwargs.get("model_base", BaseModelType.StableDiffusionXL), variant)
return cls.create_config(
path=path,

View File

@ -4,6 +4,7 @@ from enum import Enum
from pydantic import Field
from pathlib import Path
from typing import Literal, Optional, Union
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionPipeline
from .base import (
ModelConfigBase,
BaseModelType,
@ -123,6 +124,7 @@ class StableDiffusion1Model(DiffusersModel):
return _convert_ckpt_and_cache(
version=BaseModelType.StableDiffusion1,
model_config=config,
load_safety_checker=False,
output_path=output_path,
)
else:
@ -259,9 +261,11 @@ def _convert_ckpt_and_cache(
"""
app_config = InvokeAIAppConfig.get_config()
weights = app_config.root_path / model_config.path
weights = app_config.models_path / model_config.path
config_file = app_config.root_path / model_config.config
output_path = Path(output_path)
variant = model_config.variant
pipeline_class = StableDiffusionInpaintPipeline if variant == "inpaint" else StableDiffusionPipeline
# return cached version if it exists
if output_path.exists():
@ -288,6 +292,7 @@ def _convert_ckpt_and_cache(
original_config_file=config_file,
extract_ema=True,
scan_needed=True,
pipeline_class=pipeline_class,
from_safetensors=weights.suffix == ".safetensors",
precision=torch_dtype(choose_torch_device()),
**kwargs,

View File

@ -0,0 +1,157 @@
import os
import json
from enum import Enum
from pydantic import Field
from pathlib import Path
from typing import Literal, Optional, Union
from .base import (
ModelBase,
ModelConfigBase,
BaseModelType,
ModelType,
SubModelType,
ModelVariantType,
DiffusersModel,
SchedulerPredictionType,
SilenceWarnings,
read_checkpoint_meta,
classproperty,
OnnxRuntimeModel,
IAIOnnxRuntimeModel,
)
from invokeai.app.services.config import InvokeAIAppConfig
class StableDiffusionOnnxModelFormat(str, Enum):
Olive = "olive"
Onnx = "onnx"
class ONNXStableDiffusion1Model(DiffusersModel):
class Config(ModelConfigBase):
model_format: Literal[StableDiffusionOnnxModelFormat.Onnx]
variant: ModelVariantType
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert base_model == BaseModelType.StableDiffusion1
assert model_type == ModelType.ONNX
super().__init__(
model_path=model_path,
base_model=BaseModelType.StableDiffusion1,
model_type=ModelType.ONNX,
)
for child_name, child_type in self.child_types.items():
if child_type is OnnxRuntimeModel:
self.child_types[child_name] = IAIOnnxRuntimeModel
# TODO: check that no optimum models provided
@classmethod
def probe_config(cls, path: str, **kwargs):
model_format = cls.detect_format(path)
in_channels = 4 # TODO:
if in_channels == 9:
variant = ModelVariantType.Inpaint
elif in_channels == 4:
variant = ModelVariantType.Normal
else:
raise Exception("Unkown stable diffusion 1.* model format")
return cls.create_config(
path=path,
model_format=model_format,
variant=variant,
)
@classproperty
def save_to_config(cls) -> bool:
return True
@classmethod
def detect_format(cls, model_path: str):
# TODO: Detect onnx vs olive
return StableDiffusionOnnxModelFormat.Onnx
@classmethod
def convert_if_required(
cls,
model_path: str,
output_path: str,
config: ModelConfigBase,
base_model: BaseModelType,
) -> str:
return model_path
class ONNXStableDiffusion2Model(DiffusersModel):
# TODO: check that configs overwriten properly
class Config(ModelConfigBase):
model_format: Literal[StableDiffusionOnnxModelFormat.Onnx]
variant: ModelVariantType
prediction_type: SchedulerPredictionType
upcast_attention: bool
def __init__(self, model_path: str, base_model: BaseModelType, model_type: ModelType):
assert base_model == BaseModelType.StableDiffusion2
assert model_type == ModelType.ONNX
super().__init__(
model_path=model_path,
base_model=BaseModelType.StableDiffusion2,
model_type=ModelType.ONNX,
)
for child_name, child_type in self.child_types.items():
if child_type is OnnxRuntimeModel:
self.child_types[child_name] = IAIOnnxRuntimeModel
# TODO: check that no optimum models provided
@classmethod
def probe_config(cls, path: str, **kwargs):
model_format = cls.detect_format(path)
in_channels = 4 # TODO:
if in_channels == 9:
variant = ModelVariantType.Inpaint
elif in_channels == 5:
variant = ModelVariantType.Depth
elif in_channels == 4:
variant = ModelVariantType.Normal
else:
raise Exception("Unkown stable diffusion 2.* model format")
if variant == ModelVariantType.Normal:
prediction_type = SchedulerPredictionType.VPrediction
upcast_attention = True
else:
prediction_type = SchedulerPredictionType.Epsilon
upcast_attention = False
return cls.create_config(
path=path,
model_format=model_format,
variant=variant,
prediction_type=prediction_type,
upcast_attention=upcast_attention,
)
@classproperty
def save_to_config(cls) -> bool:
return True
@classmethod
def detect_format(cls, model_path: str):
# TODO: Detect onnx vs olive
return StableDiffusionOnnxModelFormat.Onnx
@classmethod
def convert_if_required(
cls,
model_path: str,
output_path: str,
config: ModelConfigBase,
base_model: BaseModelType,
) -> str:
return model_path

View File

@ -1,9 +1,14 @@
import os
import torch
import safetensors
from enum import Enum
from pathlib import Path
from typing import Optional, Union, Literal
from typing import Optional
import safetensors
import torch
from diffusers.utils import is_safetensors_available
from omegaconf import OmegaConf
from invokeai.app.services.config import InvokeAIAppConfig
from .base import (
ModelBase,
ModelConfigBase,
@ -18,9 +23,6 @@ from .base import (
InvalidModelException,
ModelNotFoundException,
)
from invokeai.app.services.config import InvokeAIAppConfig
from diffusers.utils import is_safetensors_available
from omegaconf import OmegaConf
class VaeModelFormat(str, Enum):
@ -80,7 +82,7 @@ class VaeModel(ModelBase):
@classmethod
def detect_format(cls, path: str):
if not os.path.exists(path):
raise ModelNotFoundException()
raise ModelNotFoundException(f"Does not exist as local file: {path}")
if os.path.isdir(path):
if os.path.exists(os.path.join(path, "config.json")):

View File

@ -0,0 +1,75 @@
# Copyright (c) 2023 The InvokeAI Development Team
"""Utilities used by the Model Manager"""
def lora_token_vector_length(checkpoint: dict) -> int:
"""
Given a checkpoint in memory, return the lora token vector length
:param checkpoint: The checkpoint
"""
def _get_shape_1(key, tensor, checkpoint):
lora_token_vector_length = None
if "." not in key:
return lora_token_vector_length # wrong key format
model_key, lora_key = key.split(".", 1)
# check lora/locon
if lora_key == "lora_down.weight":
lora_token_vector_length = tensor.shape[1]
# check loha (don't worry about hada_t1/hada_t2 as it used only in 4d shapes)
elif lora_key in ["hada_w1_b", "hada_w2_b"]:
lora_token_vector_length = tensor.shape[1]
# check lokr (don't worry about lokr_t2 as it used only in 4d shapes)
elif "lokr_" in lora_key:
if model_key + ".lokr_w1" in checkpoint:
_lokr_w1 = checkpoint[model_key + ".lokr_w1"]
elif model_key + "lokr_w1_b" in checkpoint:
_lokr_w1 = checkpoint[model_key + ".lokr_w1_b"]
else:
return lora_token_vector_length # unknown format
if model_key + ".lokr_w2" in checkpoint:
_lokr_w2 = checkpoint[model_key + ".lokr_w2"]
elif model_key + "lokr_w2_b" in checkpoint:
_lokr_w2 = checkpoint[model_key + ".lokr_w2_b"]
else:
return lora_token_vector_length # unknown format
lora_token_vector_length = _lokr_w1.shape[1] * _lokr_w2.shape[1]
elif lora_key == "diff":
lora_token_vector_length = tensor.shape[1]
# ia3 can be detected only by shape[0] in text encoder
elif lora_key == "weight" and "lora_unet_" not in model_key:
lora_token_vector_length = tensor.shape[0]
return lora_token_vector_length
lora_token_vector_length = None
lora_te1_length = None
lora_te2_length = None
for key, tensor in checkpoint.items():
if key.startswith("lora_unet_") and ("_attn2_to_k." in key or "_attn2_to_v." in key):
lora_token_vector_length = _get_shape_1(key, tensor, checkpoint)
elif key.startswith("lora_te") and "_self_attn_" in key:
tmp_length = _get_shape_1(key, tensor, checkpoint)
if key.startswith("lora_te_"):
lora_token_vector_length = tmp_length
elif key.startswith("lora_te1_"):
lora_te1_length = tmp_length
elif key.startswith("lora_te2_"):
lora_te2_length = tmp_length
if lora_te1_length is not None and lora_te2_length is not None:
lora_token_vector_length = lora_te1_length + lora_te2_length
if lora_token_vector_length is not None:
break
return lora_token_vector_length

View File

@ -8,4 +8,4 @@ from .diffusers_pipeline import (
)
from .diffusion import InvokeAIDiffuserComponent
from .diffusion.cross_attention_map_saving import AttentionMapSaver
from .diffusion.shared_invokeai_diffusion import PostprocessingSettings
from .diffusion.shared_invokeai_diffusion import PostprocessingSettings, BasicConditioningInfo, SDXLConditioningInfo

View File

@ -4,15 +4,11 @@ import dataclasses
import inspect
import math
import secrets
from collections.abc import Sequence
from dataclasses import dataclass, field
from typing import Any, Callable, Generic, List, Optional, Type, TypeVar, Union
from pydantic import Field
from typing import Any, Callable, Generic, List, Optional, Type, Union
import einops
import PIL.Image
import numpy as np
from accelerate.utils import set_seed
import einops
import psutil
import torch
import torchvision.transforms as T
@ -22,36 +18,31 @@ from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import (
StableDiffusionPipeline,
)
from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import (
StableDiffusionImg2ImgPipeline,
)
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
)
from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput
from diffusers.utils import PIL_INTERPOLATION
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.outputs import BaseOutput
from torchvision.transforms.functional import resize as tv_resize
from pydantic import Field
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from typing_extensions import ParamSpec
from invokeai.app.services.config import InvokeAIAppConfig
from ..util import CPU_DEVICE, normalize_device
from .diffusion import (
AttentionMapSaver,
InvokeAIDiffuserComponent,
PostprocessingSettings,
BasicConditioningInfo,
)
from .offloading import FullyLoadedModelGroup, ModelGroup
from ..util import normalize_device
@dataclass
class PipelineIntermediateState:
run_id: str
step: int
order: int
total_steps: int
timestep: int
latents: torch.Tensor
predicted_original: Optional[torch.Tensor] = None
@ -102,7 +93,6 @@ class AddsMaskGuidance:
mask_latents: torch.FloatTensor
scheduler: SchedulerMixin
noise: torch.Tensor
_debug: Optional[Callable] = None
def __call__(self, step_output: Union[BaseOutput, SchedulerOutput], t: torch.Tensor, conditioning) -> BaseOutput:
output_class = step_output.__class__ # We'll create a new one with masked data.
@ -139,8 +129,6 @@ class AddsMaskGuidance:
# mask_latents = self.scheduler.scale_model_input(mask_latents, t)
mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype))
if self._debug:
self._debug(masked_input, f"t={t} lerped")
return masked_input
@ -172,33 +160,6 @@ def is_inpainting_model(unet: UNet2DConditionModel):
return unet.conv_in.in_channels == 9
CallbackType = TypeVar("CallbackType")
ReturnType = TypeVar("ReturnType")
ParamType = ParamSpec("ParamType")
@dataclass(frozen=True)
class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]):
"""Convert a generator to a function with a callback and a return value."""
generator_method: Callable[ParamType, ReturnType]
callback_arg_type: Type[CallbackType]
def __call__(
self,
*args: ParamType.args,
callback: Callable[[CallbackType], Any] = None,
**kwargs: ParamType.kwargs,
) -> ReturnType:
result = None
for result in self.generator_method(*args, **kwargs):
if callback is not None and isinstance(result, self.callback_arg_type):
callback(result)
if result is None:
raise AssertionError("why was that an empty generator?")
return result
@dataclass
class ControlNetData:
model: ControlNetModel = Field(default=None)
@ -212,8 +173,8 @@ class ControlNetData:
@dataclass
class ConditioningData:
unconditioned_embeddings: torch.Tensor
text_embeddings: torch.Tensor
unconditioned_embeddings: BasicConditioningInfo
text_embeddings: BasicConditioningInfo
guidance_scale: Union[float, List[float]]
"""
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
@ -289,9 +250,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
feature_extractor ([`CLIPFeatureExtractor`]):
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
"""
_model_group: ModelGroup
ID_LENGTH = 8
def __init__(
self,
@ -303,9 +261,7 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
safety_checker: Optional[StableDiffusionSafetyChecker],
feature_extractor: Optional[CLIPFeatureExtractor],
requires_safety_checker: bool = False,
precision: str = "float32",
control_model: ControlNetModel = None,
execution_device: Optional[torch.device] = None,
):
super().__init__(
vae,
@ -330,9 +286,6 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# control_model=control_model,
)
self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward)
self._model_group = FullyLoadedModelGroup(execution_device or self.unet.device)
self._model_group.install(*self._submodels)
self.control_model = control_model
def _adjust_memory_efficient_attention(self, latents: torch.Tensor):
@ -340,99 +293,41 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
if xformers is available, use it, otherwise use sliced attention.
"""
config = InvokeAIAppConfig.get_config()
if torch.cuda.is_available() and is_xformers_available() and not config.disable_xformers:
self.enable_xformers_memory_efficient_attention()
if self.unet.device.type == "cuda":
if is_xformers_available() and not config.disable_xformers:
self.enable_xformers_memory_efficient_attention()
return
elif hasattr(torch.nn.functional, "scaled_dot_product_attention"):
# diffusers enable sdp automatically
return
if self.unet.device.type == "cpu" or self.unet.device.type == "mps":
mem_free = psutil.virtual_memory().free
elif self.unet.device.type == "cuda":
mem_free, _ = torch.cuda.mem_get_info(normalize_device(self.unet.device))
else:
if self.device.type == "cpu" or self.device.type == "mps":
mem_free = psutil.virtual_memory().free
elif self.device.type == "cuda":
mem_free, _ = torch.cuda.mem_get_info(normalize_device(self.device))
else:
raise ValueError(f"unrecognized device {self.device}")
# input tensor of [1, 4, h/8, w/8]
# output tensor of [16, (h/8 * w/8), (h/8 * w/8)]
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
max_size_required_for_baddbmm = (
16
* latents.size(dim=2)
* latents.size(dim=3)
* latents.size(dim=2)
* latents.size(dim=3)
* bytes_per_element_needed_for_baddbmm_duplication
)
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): # 3.3 / 4.0 is from old Invoke code
self.enable_attention_slicing(slice_size="max")
elif torch.backends.mps.is_available():
# diffusers recommends always enabling for mps
self.enable_attention_slicing(slice_size="max")
else:
self.disable_attention_slicing()
raise ValueError(f"unrecognized device {self.unet.device}")
# input tensor of [1, 4, h/8, w/8]
# output tensor of [16, (h/8 * w/8), (h/8 * w/8)]
bytes_per_element_needed_for_baddbmm_duplication = latents.element_size() + 4
max_size_required_for_baddbmm = (
16
* latents.size(dim=2)
* latents.size(dim=3)
* latents.size(dim=2)
* latents.size(dim=3)
* bytes_per_element_needed_for_baddbmm_duplication
)
if max_size_required_for_baddbmm > (mem_free * 3.0 / 4.0): # 3.3 / 4.0 is from old Invoke code
self.enable_attention_slicing(slice_size="max")
elif torch.backends.mps.is_available():
# diffusers recommends always enabling for mps
self.enable_attention_slicing(slice_size="max")
else:
self.disable_attention_slicing()
def to(self, torch_device: Optional[Union[str, torch.device]] = None, silence_dtype_warnings=False):
# overridden method; types match the superclass.
if torch_device is None:
return self
self._model_group.set_device(torch.device(torch_device))
self._model_group.ready()
@property
def device(self) -> torch.device:
return self._model_group.execution_device
@property
def _submodels(self) -> Sequence[torch.nn.Module]:
module_names, _, _ = self.extract_init_dict(dict(self.config))
submodels = []
for name in module_names.keys():
if hasattr(self, name):
value = getattr(self, name)
else:
value = getattr(self.config, name)
if isinstance(value, torch.nn.Module):
submodels.append(value)
return submodels
def image_from_embeddings(
self,
latents: torch.Tensor,
num_inference_steps: int,
conditioning_data: ConditioningData,
*,
noise: torch.Tensor,
callback: Callable[[PipelineIntermediateState], None] = None,
run_id=None,
) -> InvokeAIStableDiffusionPipelineOutput:
r"""
Function invoked when calling the pipeline for generation.
:param conditioning_data:
:param latents: Pre-generated un-noised latents, to be used as inputs for
image generation. Can be used to tweak the same generation with different prompts.
:param num_inference_steps: The number of denoising steps. More denoising steps usually lead to a higher quality
image at the expense of slower inference.
:param noise: Noise to add to the latents, sampled from a Gaussian distribution.
:param callback:
:param run_id:
"""
result_latents, result_attention_map_saver = self.latents_from_embeddings(
latents,
num_inference_steps,
conditioning_data,
noise=noise,
run_id=run_id,
callback=callback,
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
torch.cuda.empty_cache()
with torch.inference_mode():
image = self.decode_latents(result_latents)
output = InvokeAIStableDiffusionPipelineOutput(
images=image,
nsfw_content_detected=[],
attention_map_saver=result_attention_map_saver,
)
return self.check_for_safety(output, dtype=conditioning_data.dtype)
raise Exception("Should not be called")
def latents_from_embeddings(
self,
@ -440,35 +335,72 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
num_inference_steps: int,
conditioning_data: ConditioningData,
*,
noise: torch.Tensor,
timesteps=None,
noise: Optional[torch.Tensor],
timesteps: torch.Tensor,
init_timestep: torch.Tensor,
additional_guidance: List[Callable] = None,
run_id=None,
callback: Callable[[PipelineIntermediateState], None] = None,
control_data: List[ControlNetData] = None,
mask: Optional[torch.Tensor] = None,
seed: Optional[int] = None,
) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]:
if self.scheduler.config.get("cpu_only", False):
scheduler_device = torch.device("cpu")
else:
scheduler_device = self._model_group.device_for(self.unet)
if init_timestep.shape[0] == 0:
return latents, None
if timesteps is None:
self.scheduler.set_timesteps(num_inference_steps, device=scheduler_device)
timesteps = self.scheduler.timesteps
infer_latents_from_embeddings = GeneratorToCallbackinator(
self.generate_latents_from_embeddings, PipelineIntermediateState
)
result: PipelineIntermediateState = infer_latents_from_embeddings(
latents,
timesteps,
conditioning_data,
noise=noise,
run_id=run_id,
additional_guidance=additional_guidance,
control_data=control_data,
callback=callback,
)
return result.latents, result.attention_map_saver
if additional_guidance is None:
additional_guidance = []
orig_latents = latents.clone()
batch_size = latents.shape[0]
batched_t = init_timestep.expand(batch_size)
if noise is not None:
# latents = noise * self.scheduler.init_noise_sigma # it's like in t2l according to diffusers
latents = self.scheduler.add_noise(latents, noise, batched_t)
if mask is not None:
if is_inpainting_model(self.unet):
# You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
# (that's why there's a mask!) but it seems to really want that blanked out.
# masked_latents = latents * torch.where(mask < 0.5, 1, 0) TODO: inpaint/outpaint/infill
# TODO: we should probably pass this in so we don't have to try/finally around setting it.
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(self._unet_forward, mask, orig_latents)
else:
# if no noise provided, noisify unmasked area based on seed(or 0 as fallback)
if noise is None:
noise = torch.randn(
orig_latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(seed or 0),
).to(device=orig_latents.device, dtype=orig_latents.dtype)
latents = self.scheduler.add_noise(latents, noise, batched_t)
latents = torch.lerp(
orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype)
)
additional_guidance.append(AddsMaskGuidance(mask, orig_latents, self.scheduler, noise))
try:
latents, attention_map_saver = self.generate_latents_from_embeddings(
latents,
timesteps,
conditioning_data,
additional_guidance=additional_guidance,
control_data=control_data,
callback=callback,
)
finally:
self.invokeai_diffuser.model_forward_callback = self._unet_forward
# restore unmasked part
if mask is not None:
latents = torch.lerp(orig_latents, latents.to(dtype=orig_latents.dtype), mask.to(dtype=orig_latents.dtype))
return latents, attention_map_saver
def generate_latents_from_embeddings(
self,
@ -476,42 +408,40 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
timesteps,
conditioning_data: ConditioningData,
*,
noise: torch.Tensor,
run_id: str = None,
additional_guidance: List[Callable] = None,
control_data: List[ControlNetData] = None,
callback: Callable[[PipelineIntermediateState], None] = None,
):
self._adjust_memory_efficient_attention(latents)
if run_id is None:
run_id = secrets.token_urlsafe(self.ID_LENGTH)
if additional_guidance is None:
additional_guidance = []
batch_size = latents.shape[0]
attention_map_saver: Optional[AttentionMapSaver] = None
if timesteps.shape[0] == 0:
return latents, attention_map_saver
extra_conditioning_info = conditioning_data.extra
with self.invokeai_diffuser.custom_attention_context(
self.invokeai_diffuser.model,
extra_conditioning_info=extra_conditioning_info,
step_count=len(self.scheduler.timesteps),
):
yield PipelineIntermediateState(
run_id=run_id,
step=-1,
timestep=self.scheduler.config.num_train_timesteps,
latents=latents,
)
if callback is not None:
callback(
PipelineIntermediateState(
step=-1,
order=self.scheduler.order,
total_steps=len(timesteps),
timestep=self.scheduler.config.num_train_timesteps,
latents=latents,
)
)
batch_size = latents.shape[0]
batched_t = torch.full(
(batch_size,),
timesteps[0],
dtype=timesteps.dtype,
device=self._model_group.device_for(self.unet),
)
latents = self.scheduler.add_noise(latents, noise, batched_t)
attention_map_saver: Optional[AttentionMapSaver] = None
# print("timesteps:", timesteps)
for i, t in enumerate(self.progress_bar(timesteps)):
batched_t.fill_(t)
batched_t = t.expand(batch_size)
step_output = self.step(
batched_t,
latents,
@ -540,14 +470,18 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# attention_map_saver = AttentionMapSaver(token_ids=attention_map_token_ids, latents_shape=latents.shape[-2:])
# self.invokeai_diffuser.setup_attention_map_saving(attention_map_saver)
yield PipelineIntermediateState(
run_id=run_id,
step=i,
timestep=int(t),
latents=latents,
predicted_original=predicted_original,
attention_map_saver=attention_map_saver,
)
if callback is not None:
callback(
PipelineIntermediateState(
step=i,
order=self.scheduler.order,
total_steps=len(timesteps),
timestep=int(t),
latents=latents,
predicted_original=predicted_original,
attention_map_saver=attention_map_saver,
)
)
return latents, attention_map_saver
@ -569,95 +503,39 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
# TODO: should this scaling happen here or inside self._unet_forward?
# i.e. before or after passing it to InvokeAIDiffuserComponent
unet_latent_input = self.scheduler.scale_model_input(latents, timestep)
latent_model_input = self.scheduler.scale_model_input(latents, timestep)
# default is no controlnet, so set controlnet processing output to None
down_block_res_samples, mid_block_res_sample = None, None
controlnet_down_block_samples, controlnet_mid_block_sample = None, None
if control_data is not None:
# control_data should be type List[ControlNetData]
# this loop covers both ControlNet (one ControlNetData in list)
# and MultiControlNet (multiple ControlNetData in list)
for i, control_datum in enumerate(control_data):
control_mode = control_datum.control_mode
# soft_injection and cfg_injection are the two ControlNet control_mode booleans
# that are combined at higher level to make control_mode enum
# soft_injection determines whether to do per-layer re-weighting adjustment (if True)
# or default weighting (if False)
soft_injection = control_mode == "more_prompt" or control_mode == "more_control"
# cfg_injection = determines whether to apply ControlNet to only the conditional (if True)
# or the default both conditional and unconditional (if False)
cfg_injection = control_mode == "more_control" or control_mode == "unbalanced"
controlnet_down_block_samples, controlnet_mid_block_sample = self.invokeai_diffuser.do_controlnet_step(
control_data=control_data,
sample=latent_model_input,
timestep=timestep,
step_index=step_index,
total_step_count=total_step_count,
conditioning_data=conditioning_data,
)
first_control_step = math.floor(control_datum.begin_step_percent * total_step_count)
last_control_step = math.ceil(control_datum.end_step_percent * total_step_count)
# only apply controlnet if current step is within the controlnet's begin/end step range
if step_index >= first_control_step and step_index <= last_control_step:
if cfg_injection:
control_latent_input = unet_latent_input
else:
# expand the latents input to control model if doing classifier free guidance
# (which I think for now is always true, there is conditional elsewhere that stops execution if
# classifier_free_guidance is <= 1.0 ?)
control_latent_input = torch.cat([unet_latent_input] * 2)
if cfg_injection: # only applying ControlNet to conditional instead of in unconditioned
encoder_hidden_states = conditioning_data.text_embeddings
encoder_attention_mask = None
else:
(
encoder_hidden_states,
encoder_attention_mask,
) = self.invokeai_diffuser._concat_conditionings_for_batch(
conditioning_data.unconditioned_embeddings,
conditioning_data.text_embeddings,
)
if isinstance(control_datum.weight, list):
# if controlnet has multiple weights, use the weight for the current step
controlnet_weight = control_datum.weight[step_index]
else:
# if controlnet has a single weight, use it for all steps
controlnet_weight = control_datum.weight
# controlnet(s) inference
down_samples, mid_sample = control_datum.model(
sample=control_latent_input,
timestep=timestep,
encoder_hidden_states=encoder_hidden_states,
controlnet_cond=control_datum.image_tensor,
conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale
encoder_attention_mask=encoder_attention_mask,
guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel
return_dict=False,
)
if cfg_injection:
# Inferred ControlNet only for the conditional batch.
# To apply the output of ControlNet to both the unconditional and conditional batches,
# prepend zeros for unconditional batch
down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples]
mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample])
if down_block_res_samples is None and mid_block_res_sample is None:
down_block_res_samples, mid_block_res_sample = down_samples, mid_sample
else:
# add controlnet outputs together if have multiple controlnets
down_block_res_samples = [
samples_prev + samples_curr
for samples_prev, samples_curr in zip(down_block_res_samples, down_samples)
]
mid_block_res_sample += mid_sample
# predict the noise residual
noise_pred = self.invokeai_diffuser.do_diffusion_step(
x=unet_latent_input,
sigma=t,
unconditioning=conditioning_data.unconditioned_embeddings,
conditioning=conditioning_data.text_embeddings,
unconditional_guidance_scale=conditioning_data.guidance_scale,
uc_noise_pred, c_noise_pred = self.invokeai_diffuser.do_unet_step(
sample=latent_model_input,
timestep=t, # TODO: debug how handled batched and non batched timesteps
step_index=step_index,
total_step_count=total_step_count,
down_block_additional_residuals=down_block_res_samples, # from controlnet(s)
mid_block_additional_residual=mid_block_res_sample, # from controlnet(s)
conditioning_data=conditioning_data,
# extra:
down_block_additional_residuals=controlnet_down_block_samples, # from controlnet(s)
mid_block_additional_residual=controlnet_mid_block_sample, # from controlnet(s)
)
guidance_scale = conditioning_data.guidance_scale
if isinstance(guidance_scale, list):
guidance_scale = guidance_scale[step_index]
noise_pred = self.invokeai_diffuser._combine(
uc_noise_pred,
c_noise_pred,
guidance_scale,
)
# compute the previous noisy sample x_t -> x_t-1
@ -699,224 +577,3 @@ class StableDiffusionGeneratorPipeline(StableDiffusionPipeline):
cross_attention_kwargs=cross_attention_kwargs,
**kwargs,
).sample
def img2img_from_embeddings(
self,
init_image: Union[torch.FloatTensor, PIL.Image.Image],
strength: float,
num_inference_steps: int,
conditioning_data: ConditioningData,
*,
callback: Callable[[PipelineIntermediateState], None] = None,
run_id=None,
noise_func=None,
seed=None,
) -> InvokeAIStableDiffusionPipelineOutput:
if isinstance(init_image, PIL.Image.Image):
init_image = image_resized_to_grid_as_tensor(init_image.convert("RGB"))
if init_image.dim() == 3:
init_image = einops.rearrange(init_image, "c h w -> 1 c h w")
# 6. Prepare latent variables
initial_latents = self.non_noised_latents_from_image(
init_image,
device=self._model_group.device_for(self.unet),
dtype=self.unet.dtype,
)
if seed is not None:
set_seed(seed)
noise = noise_func(initial_latents)
return self.img2img_from_latents_and_embeddings(
initial_latents,
num_inference_steps,
conditioning_data,
strength,
noise,
run_id,
callback,
)
def img2img_from_latents_and_embeddings(
self,
initial_latents,
num_inference_steps,
conditioning_data: ConditioningData,
strength,
noise: torch.Tensor,
run_id=None,
callback=None,
) -> InvokeAIStableDiffusionPipelineOutput:
timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength)
result_latents, result_attention_maps = self.latents_from_embeddings(
latents=initial_latents
if strength < 1.0
else torch.zeros_like(initial_latents, device=initial_latents.device, dtype=initial_latents.dtype),
num_inference_steps=num_inference_steps,
conditioning_data=conditioning_data,
timesteps=timesteps,
noise=noise,
run_id=run_id,
callback=callback,
)
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
torch.cuda.empty_cache()
with torch.inference_mode():
image = self.decode_latents(result_latents)
output = InvokeAIStableDiffusionPipelineOutput(
images=image,
nsfw_content_detected=[],
attention_map_saver=result_attention_maps,
)
return self.check_for_safety(output, dtype=conditioning_data.dtype)
def get_img2img_timesteps(self, num_inference_steps: int, strength: float, device=None) -> (torch.Tensor, int):
img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components)
assert img2img_pipeline.scheduler is self.scheduler
if self.scheduler.config.get("cpu_only", False):
scheduler_device = torch.device("cpu")
else:
scheduler_device = self._model_group.device_for(self.unet)
img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=scheduler_device)
timesteps, adjusted_steps = img2img_pipeline.get_timesteps(
num_inference_steps, strength, device=scheduler_device
)
# Workaround for low strength resulting in zero timesteps.
# TODO: submit upstream fix for zero-step img2img
if timesteps.numel() == 0:
timesteps = self.scheduler.timesteps[-1:]
adjusted_steps = timesteps.numel()
return timesteps, adjusted_steps
def inpaint_from_embeddings(
self,
init_image: torch.FloatTensor,
mask: torch.FloatTensor,
strength: float,
num_inference_steps: int,
conditioning_data: ConditioningData,
*,
callback: Callable[[PipelineIntermediateState], None] = None,
run_id=None,
noise_func=None,
seed=None,
) -> InvokeAIStableDiffusionPipelineOutput:
device = self._model_group.device_for(self.unet)
latents_dtype = self.unet.dtype
if isinstance(init_image, PIL.Image.Image):
init_image = image_resized_to_grid_as_tensor(init_image.convert("RGB"))
init_image = init_image.to(device=device, dtype=latents_dtype)
mask = mask.to(device=device, dtype=latents_dtype)
if init_image.dim() == 3:
init_image = init_image.unsqueeze(0)
timesteps, _ = self.get_img2img_timesteps(num_inference_steps, strength)
# 6. Prepare latent variables
# can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents
# because we have our own noise function
init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype)
if seed is not None:
set_seed(seed)
noise = noise_func(init_image_latents)
if mask.dim() == 3:
mask = mask.unsqueeze(0)
latent_mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR).to(
device=device, dtype=latents_dtype
)
guidance: List[Callable] = []
if is_inpainting_model(self.unet):
# You'd think the inpainting model wouldn't be paying attention to the area it is going to repaint
# (that's why there's a mask!) but it seems to really want that blanked out.
masked_init_image = init_image * torch.where(mask < 0.5, 1, 0)
masked_latents = self.non_noised_latents_from_image(masked_init_image, device=device, dtype=latents_dtype)
# TODO: we should probably pass this in so we don't have to try/finally around setting it.
self.invokeai_diffuser.model_forward_callback = AddsMaskLatents(
self._unet_forward, latent_mask, masked_latents
)
else:
guidance.append(AddsMaskGuidance(latent_mask, init_image_latents, self.scheduler, noise))
try:
result_latents, result_attention_maps = self.latents_from_embeddings(
latents=init_image_latents
if strength < 1.0
else torch.zeros_like(
init_image_latents, device=init_image_latents.device, dtype=init_image_latents.dtype
),
num_inference_steps=num_inference_steps,
conditioning_data=conditioning_data,
noise=noise,
timesteps=timesteps,
additional_guidance=guidance,
run_id=run_id,
callback=callback,
)
finally:
self.invokeai_diffuser.model_forward_callback = self._unet_forward
# https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699
torch.cuda.empty_cache()
with torch.inference_mode():
image = self.decode_latents(result_latents)
output = InvokeAIStableDiffusionPipelineOutput(
images=image,
nsfw_content_detected=[],
attention_map_saver=result_attention_maps,
)
return self.check_for_safety(output, dtype=conditioning_data.dtype)
def non_noised_latents_from_image(self, init_image, *, device: torch.device, dtype):
init_image = init_image.to(device=device, dtype=dtype)
with torch.inference_mode():
self._model_group.load(self.vae)
init_latent_dist = self.vae.encode(init_image).latent_dist
init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible!
init_latents = 0.18215 * init_latents
return init_latents
def check_for_safety(self, output, dtype):
with torch.inference_mode():
screened_images, has_nsfw_concept = self.run_safety_checker(output.images, dtype=dtype)
screened_attention_map_saver = None
if has_nsfw_concept is None or not has_nsfw_concept:
screened_attention_map_saver = output.attention_map_saver
return InvokeAIStableDiffusionPipelineOutput(
screened_images,
has_nsfw_concept,
# block the attention maps if NSFW content is detected
attention_map_saver=screened_attention_map_saver,
)
def run_safety_checker(self, image, device=None, dtype=None):
# overriding to use the model group for device info instead of requiring the caller to know.
if self.safety_checker is not None:
device = self._model_group.device_for(self.safety_checker)
return super().run_safety_checker(image, device, dtype)
def decode_latents(self, latents):
# Explicit call to get the vae loaded, since `decode` isn't the forward method.
self._model_group.load(self.vae)
return super().decode_latents(latents)
def debug_latents(self, latents, msg):
from invokeai.backend.image_util import debug_image
with torch.inference_mode():
decoded = self.numpy_to_pil(self.decode_latents(latents))
for i, img in enumerate(decoded):
debug_image(img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True)

View File

@ -3,4 +3,9 @@ Initialization file for invokeai.models.diffusion
"""
from .cross_attention_control import InvokeAICrossAttentionMixin
from .cross_attention_map_saving import AttentionMapSaver
from .shared_invokeai_diffusion import InvokeAIDiffuserComponent, PostprocessingSettings
from .shared_invokeai_diffusion import (
InvokeAIDiffuserComponent,
PostprocessingSettings,
BasicConditioningInfo,
SDXLConditioningInfo,
)

View File

@ -1,6 +1,8 @@
from __future__ import annotations
from contextlib import contextmanager
from dataclasses import dataclass
from math import ceil
import math
from typing import Any, Callable, Dict, Optional, Union, List
import numpy as np
@ -32,6 +34,29 @@ ModelForwardCallback: TypeAlias = Union[
]
@dataclass
class BasicConditioningInfo:
embeds: torch.Tensor
extra_conditioning: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo]
# weight: float
# mode: ConditioningAlgo
def to(self, device, dtype=None):
self.embeds = self.embeds.to(device=device, dtype=dtype)
return self
@dataclass
class SDXLConditioningInfo(BasicConditioningInfo):
pooled_embeds: torch.Tensor
add_time_ids: torch.Tensor
def to(self, device, dtype=None):
self.pooled_embeds = self.pooled_embeds.to(device=device, dtype=dtype)
self.add_time_ids = self.add_time_ids.to(device=device, dtype=dtype)
return super().to(device=device, dtype=dtype)
@dataclass(frozen=True)
class PostprocessingSettings:
threshold: float
@ -78,10 +103,9 @@ class InvokeAIDiffuserComponent:
self.cross_attention_control_context = None
self.sequential_guidance = config.sequential_guidance
@classmethod
@contextmanager
def custom_attention_context(
cls,
self,
unet: UNet2DConditionModel, # note: also may futz with the text encoder depending on requested LoRAs
extra_conditioning_info: Optional[ExtraConditioningInfo],
step_count: int,
@ -91,18 +115,19 @@ class InvokeAIDiffuserComponent:
old_attn_processors = unet.attn_processors
# Load lora conditions into the model
if extra_conditioning_info.wants_cross_attention_control:
cross_attention_control_context = Context(
self.cross_attention_control_context = Context(
arguments=extra_conditioning_info.cross_attention_control_args,
step_count=step_count,
)
setup_cross_attention_control_attention_processors(
unet,
cross_attention_control_context,
self.cross_attention_control_context,
)
try:
yield None
finally:
self.cross_attention_control_context = None
if old_attn_processors is not None:
unet.set_attn_processor(old_attn_processors)
# TODO resuscitate attention map saving
@ -127,33 +152,125 @@ class InvokeAIDiffuserComponent:
for _, module in tokens_cross_attention_modules:
module.set_attention_slice_calculated_callback(None)
def do_diffusion_step(
def do_controlnet_step(
self,
x: torch.Tensor,
sigma: torch.Tensor,
unconditioning: Union[torch.Tensor, dict],
conditioning: Union[torch.Tensor, dict],
# unconditional_guidance_scale: float,
unconditional_guidance_scale: Union[float, List[float]],
step_index: Optional[int] = None,
total_step_count: Optional[int] = None,
control_data,
sample: torch.Tensor,
timestep: torch.Tensor,
step_index: int,
total_step_count: int,
conditioning_data,
):
down_block_res_samples, mid_block_res_sample = None, None
# control_data should be type List[ControlNetData]
# this loop covers both ControlNet (one ControlNetData in list)
# and MultiControlNet (multiple ControlNetData in list)
for i, control_datum in enumerate(control_data):
control_mode = control_datum.control_mode
# soft_injection and cfg_injection are the two ControlNet control_mode booleans
# that are combined at higher level to make control_mode enum
# soft_injection determines whether to do per-layer re-weighting adjustment (if True)
# or default weighting (if False)
soft_injection = control_mode == "more_prompt" or control_mode == "more_control"
# cfg_injection = determines whether to apply ControlNet to only the conditional (if True)
# or the default both conditional and unconditional (if False)
cfg_injection = control_mode == "more_control" or control_mode == "unbalanced"
first_control_step = math.floor(control_datum.begin_step_percent * total_step_count)
last_control_step = math.ceil(control_datum.end_step_percent * total_step_count)
# only apply controlnet if current step is within the controlnet's begin/end step range
if step_index >= first_control_step and step_index <= last_control_step:
if cfg_injection:
sample_model_input = sample
else:
# expand the latents input to control model if doing classifier free guidance
# (which I think for now is always true, there is conditional elsewhere that stops execution if
# classifier_free_guidance is <= 1.0 ?)
sample_model_input = torch.cat([sample] * 2)
added_cond_kwargs = None
if cfg_injection: # only applying ControlNet to conditional instead of in unconditioned
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
added_cond_kwargs = {
"text_embeds": conditioning_data.text_embeddings.pooled_embeds,
"time_ids": conditioning_data.text_embeddings.add_time_ids,
}
encoder_hidden_states = conditioning_data.text_embeddings.embeds
encoder_attention_mask = None
else:
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
added_cond_kwargs = {
"text_embeds": torch.cat(
[
# TODO: how to pad? just by zeros? or even truncate?
conditioning_data.unconditioned_embeddings.pooled_embeds,
conditioning_data.text_embeddings.pooled_embeds,
],
dim=0,
),
"time_ids": torch.cat(
[
conditioning_data.unconditioned_embeddings.add_time_ids,
conditioning_data.text_embeddings.add_time_ids,
],
dim=0,
),
}
(
encoder_hidden_states,
encoder_attention_mask,
) = self._concat_conditionings_for_batch(
conditioning_data.unconditioned_embeddings.embeds,
conditioning_data.text_embeddings.embeds,
)
if isinstance(control_datum.weight, list):
# if controlnet has multiple weights, use the weight for the current step
controlnet_weight = control_datum.weight[step_index]
else:
# if controlnet has a single weight, use it for all steps
controlnet_weight = control_datum.weight
# controlnet(s) inference
down_samples, mid_sample = control_datum.model(
sample=sample_model_input,
timestep=timestep,
encoder_hidden_states=encoder_hidden_states,
controlnet_cond=control_datum.image_tensor,
conditioning_scale=controlnet_weight, # controlnet specific, NOT the guidance scale
encoder_attention_mask=encoder_attention_mask,
guess_mode=soft_injection, # this is still called guess_mode in diffusers ControlNetModel
return_dict=False,
)
if cfg_injection:
# Inferred ControlNet only for the conditional batch.
# To apply the output of ControlNet to both the unconditional and conditional batches,
# prepend zeros for unconditional batch
down_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_samples]
mid_sample = torch.cat([torch.zeros_like(mid_sample), mid_sample])
if down_block_res_samples is None and mid_block_res_sample is None:
down_block_res_samples, mid_block_res_sample = down_samples, mid_sample
else:
# add controlnet outputs together if have multiple controlnets
down_block_res_samples = [
samples_prev + samples_curr
for samples_prev, samples_curr in zip(down_block_res_samples, down_samples)
]
mid_block_res_sample += mid_sample
return down_block_res_samples, mid_block_res_sample
def do_unet_step(
self,
sample: torch.Tensor,
timestep: torch.Tensor,
conditioning_data, # TODO: type
step_index: int,
total_step_count: int,
**kwargs,
):
"""
:param x: current latents
:param sigma: aka t, passed to the internal model to control how much denoising will occur
:param unconditioning: embeddings for unconditioned output. for hybrid conditioning this is a dict of tensors [B x 77 x 768], otherwise a single tensor [B x 77 x 768]
:param conditioning: embeddings for conditioned output. for hybrid conditioning this is a dict of tensors [B x 77 x 768], otherwise a single tensor [B x 77 x 768]
:param unconditional_guidance_scale: aka CFG scale, controls how much effect the conditioning tensor has
:param step_index: counts upwards from 0 to (step_count-1) (as passed to setup_cross_attention_control, if using). May be called multiple times for a single step, therefore do not assume that its value will monotically increase. If None, will be estimated by comparing sigma against self.model.sigmas .
:return: the new latents after applying the model to x using unscaled unconditioning and CFG-scaled conditioning.
"""
if isinstance(unconditional_guidance_scale, list):
guidance_scale = unconditional_guidance_scale[step_index]
else:
guidance_scale = unconditional_guidance_scale
cross_attention_control_types_to_do = []
context: Context = self.cross_attention_control_context
if self.cross_attention_control_context is not None:
@ -163,25 +280,15 @@ class InvokeAIDiffuserComponent:
)
wants_cross_attention_control = len(cross_attention_control_types_to_do) > 0
wants_hybrid_conditioning = isinstance(conditioning, dict)
if wants_hybrid_conditioning:
unconditioned_next_x, conditioned_next_x = self._apply_hybrid_conditioning(
x,
sigma,
unconditioning,
conditioning,
**kwargs,
)
elif wants_cross_attention_control:
if wants_cross_attention_control:
(
unconditioned_next_x,
conditioned_next_x,
) = self._apply_cross_attention_controlled_conditioning(
x,
sigma,
unconditioning,
conditioning,
sample,
timestep,
conditioning_data,
cross_attention_control_types_to_do,
**kwargs,
)
@ -190,10 +297,9 @@ class InvokeAIDiffuserComponent:
unconditioned_next_x,
conditioned_next_x,
) = self._apply_standard_conditioning_sequentially(
x,
sigma,
unconditioning,
conditioning,
sample,
timestep,
conditioning_data,
**kwargs,
)
@ -202,21 +308,13 @@ class InvokeAIDiffuserComponent:
unconditioned_next_x,
conditioned_next_x,
) = self._apply_standard_conditioning(
x,
sigma,
unconditioning,
conditioning,
sample,
timestep,
conditioning_data,
**kwargs,
)
combined_next_x = self._combine(
# unconditioned_next_x, conditioned_next_x, unconditional_guidance_scale
unconditioned_next_x,
conditioned_next_x,
guidance_scale,
)
return combined_next_x
return unconditioned_next_x, conditioned_next_x
def do_latent_postprocessing(
self,
@ -228,7 +326,6 @@ class InvokeAIDiffuserComponent:
) -> torch.Tensor:
if postprocessing_settings is not None:
percent_through = step_index / total_step_count
latents = self.apply_threshold(postprocessing_settings, latents, percent_through)
latents = self.apply_symmetry(postprocessing_settings, latents, percent_through)
return latents
@ -281,17 +378,40 @@ class InvokeAIDiffuserComponent:
# methods below are called from do_diffusion_step and should be considered private to this class.
def _apply_standard_conditioning(self, x, sigma, unconditioning, conditioning, **kwargs):
def _apply_standard_conditioning(self, x, sigma, conditioning_data, **kwargs):
# fast batched path
x_twice = torch.cat([x] * 2)
sigma_twice = torch.cat([sigma] * 2)
both_conditionings, encoder_attention_mask = self._concat_conditionings_for_batch(unconditioning, conditioning)
added_cond_kwargs = None
if type(conditioning_data.text_embeddings) is SDXLConditioningInfo:
added_cond_kwargs = {
"text_embeds": torch.cat(
[
# TODO: how to pad? just by zeros? or even truncate?
conditioning_data.unconditioned_embeddings.pooled_embeds,
conditioning_data.text_embeddings.pooled_embeds,
],
dim=0,
),
"time_ids": torch.cat(
[
conditioning_data.unconditioned_embeddings.add_time_ids,
conditioning_data.text_embeddings.add_time_ids,
],
dim=0,
),
}
both_conditionings, encoder_attention_mask = self._concat_conditionings_for_batch(
conditioning_data.unconditioned_embeddings.embeds, conditioning_data.text_embeddings.embeds
)
both_results = self.model_forward_callback(
x_twice,
sigma_twice,
both_conditionings,
encoder_attention_mask=encoder_attention_mask,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
)
unconditioned_next_x, conditioned_next_x = both_results.chunk(2)
@ -301,8 +421,7 @@ class InvokeAIDiffuserComponent:
self,
x: torch.Tensor,
sigma,
unconditioning: torch.Tensor,
conditioning: torch.Tensor,
conditioning_data,
**kwargs,
):
# low-memory sequential path
@ -320,52 +439,46 @@ class InvokeAIDiffuserComponent:
if mid_block_additional_residual is not None:
uncond_mid_block, cond_mid_block = mid_block_additional_residual.chunk(2)
added_cond_kwargs = None
is_sdxl = type(conditioning_data.text_embeddings) is SDXLConditioningInfo
if is_sdxl:
added_cond_kwargs = {
"text_embeds": conditioning_data.unconditioned_embeddings.pooled_embeds,
"time_ids": conditioning_data.unconditioned_embeddings.add_time_ids,
}
unconditioned_next_x = self.model_forward_callback(
x,
sigma,
unconditioning,
conditioning_data.unconditioned_embeddings.embeds,
down_block_additional_residuals=uncond_down_block,
mid_block_additional_residual=uncond_mid_block,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
)
if is_sdxl:
added_cond_kwargs = {
"text_embeds": conditioning_data.text_embeddings.pooled_embeds,
"time_ids": conditioning_data.text_embeddings.add_time_ids,
}
conditioned_next_x = self.model_forward_callback(
x,
sigma,
conditioning,
conditioning_data.text_embeddings.embeds,
down_block_additional_residuals=cond_down_block,
mid_block_additional_residual=cond_mid_block,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
)
return unconditioned_next_x, conditioned_next_x
# TODO: looks unused
def _apply_hybrid_conditioning(self, x, sigma, unconditioning, conditioning, **kwargs):
assert isinstance(conditioning, dict)
assert isinstance(unconditioning, dict)
x_twice = torch.cat([x] * 2)
sigma_twice = torch.cat([sigma] * 2)
both_conditionings = dict()
for k in conditioning:
if isinstance(conditioning[k], list):
both_conditionings[k] = [
torch.cat([unconditioning[k][i], conditioning[k][i]]) for i in range(len(conditioning[k]))
]
else:
both_conditionings[k] = torch.cat([unconditioning[k], conditioning[k]])
unconditioned_next_x, conditioned_next_x = self.model_forward_callback(
x_twice,
sigma_twice,
both_conditionings,
**kwargs,
).chunk(2)
return unconditioned_next_x, conditioned_next_x
def _apply_cross_attention_controlled_conditioning(
self,
x: torch.Tensor,
sigma,
unconditioning,
conditioning,
conditioning_data,
cross_attention_control_types_to_do,
**kwargs,
):
@ -391,26 +504,43 @@ class InvokeAIDiffuserComponent:
mask=context.cross_attention_mask,
cross_attention_types_to_do=[],
)
added_cond_kwargs = None
is_sdxl = type(conditioning_data.text_embeddings) is SDXLConditioningInfo
if is_sdxl:
added_cond_kwargs = {
"text_embeds": conditioning_data.unconditioned_embeddings.pooled_embeds,
"time_ids": conditioning_data.unconditioned_embeddings.add_time_ids,
}
# no cross attention for unconditioning (negative prompt)
unconditioned_next_x = self.model_forward_callback(
x,
sigma,
unconditioning,
conditioning_data.unconditioned_embeddings.embeds,
{"swap_cross_attn_context": cross_attn_processor_context},
down_block_additional_residuals=uncond_down_block,
mid_block_additional_residual=uncond_mid_block,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
)
if is_sdxl:
added_cond_kwargs = {
"text_embeds": conditioning_data.text_embeddings.pooled_embeds,
"time_ids": conditioning_data.text_embeddings.add_time_ids,
}
# do requested cross attention types for conditioning (positive prompt)
cross_attn_processor_context.cross_attention_types_to_do = cross_attention_control_types_to_do
conditioned_next_x = self.model_forward_callback(
x,
sigma,
conditioning,
conditioning_data.text_embeddings.embeds,
{"swap_cross_attn_context": cross_attn_processor_context},
down_block_additional_residuals=cond_down_block,
mid_block_additional_residual=cond_mid_block,
added_cond_kwargs=added_cond_kwargs,
**kwargs,
)
return unconditioned_next_x, conditioned_next_x
@ -421,63 +551,6 @@ class InvokeAIDiffuserComponent:
combined_next_x = unconditioned_next_x + scaled_delta
return combined_next_x
def apply_threshold(
self,
postprocessing_settings: PostprocessingSettings,
latents: torch.Tensor,
percent_through: float,
) -> torch.Tensor:
if postprocessing_settings.threshold is None or postprocessing_settings.threshold == 0.0:
return latents
threshold = postprocessing_settings.threshold
warmup = postprocessing_settings.warmup
if percent_through < warmup:
current_threshold = threshold + threshold * 5 * (1 - (percent_through / warmup))
else:
current_threshold = threshold
if current_threshold <= 0:
return latents
maxval = latents.max().item()
minval = latents.min().item()
scale = 0.7 # default value from #395
if self.debug_thresholding:
std, mean = [i.item() for i in torch.std_mean(latents)]
outside = torch.count_nonzero((latents < -current_threshold) | (latents > current_threshold))
logger.info(f"Threshold: %={percent_through} threshold={current_threshold:.3f} (of {threshold:.3f})")
logger.debug(f"min, mean, max = {minval:.3f}, {mean:.3f}, {maxval:.3f}\tstd={std}")
logger.debug(f"{outside / latents.numel() * 100:.2f}% values outside threshold")
if maxval < current_threshold and minval > -current_threshold:
return latents
num_altered = 0
# MPS torch.rand_like is fine because torch.rand_like is wrapped in generate.py!
if maxval > current_threshold:
latents = torch.clone(latents)
maxval = np.clip(maxval * scale, 1, current_threshold)
num_altered += torch.count_nonzero(latents > maxval)
latents[latents > maxval] = torch.rand_like(latents[latents > maxval]) * maxval
if minval < -current_threshold:
latents = torch.clone(latents)
minval = np.clip(minval * scale, -current_threshold, -1)
num_altered += torch.count_nonzero(latents < minval)
latents[latents < minval] = torch.rand_like(latents[latents < minval]) * minval
if self.debug_thresholding:
logger.debug(f"min, , max = {minval:.3f}, , {maxval:.3f}\t(scaled by {scale})")
logger.debug(f"{num_altered / latents.numel() * 100:.2f}% values altered")
return latents
def apply_symmetry(
self,
postprocessing_settings: PostprocessingSettings,
@ -539,18 +612,6 @@ class InvokeAIDiffuserComponent:
self.last_percent_through = percent_through
return latents.to(device=dev)
def estimate_percent_through(self, step_index, sigma):
if step_index is not None and self.cross_attention_control_context is not None:
# percent_through will never reach 1.0 (but this is intended)
return float(step_index) / float(self.cross_attention_control_context.step_count)
# find the best possible index of the current sigma in the sigma sequence
smaller_sigmas = torch.nonzero(self.model.sigmas <= sigma)
sigma_index = smaller_sigmas[-1].item() if smaller_sigmas.shape[0] > 0 else 0
# flip because sigmas[0] is for the fully denoised image
# percent_through must be <1
return 1.0 - float(sigma_index + 1) / float(self.model.sigmas.shape[0])
# print('estimated percent_through', percent_through, 'from sigma', sigma.item())
# todo: make this work
@classmethod
def apply_conjunction(cls, x, t, forward_func, uc, c_or_weighted_c_list, global_guidance_scale):
@ -564,7 +625,7 @@ class InvokeAIDiffuserComponent:
# below is fugly omg
conditionings = [uc] + [c for c, weight in weighted_cond_list]
weights = [1] + [weight for c, weight in weighted_cond_list]
chunk_count = ceil(len(conditionings) / 2)
chunk_count = math.ceil(len(conditionings) / 2)
deltas = None
for chunk_index in range(chunk_count):
offset = chunk_index * 2

View File

@ -1,253 +0,0 @@
from __future__ import annotations
import warnings
import weakref
from abc import ABCMeta, abstractmethod
from collections.abc import MutableMapping
from typing import Callable, Union
import torch
from accelerate.utils import send_to_device
from torch.utils.hooks import RemovableHandle
OFFLOAD_DEVICE = torch.device("cpu")
class _NoModel:
"""Symbol that indicates no model is loaded.
(We can't weakref.ref(None), so this was my best idea at the time to come up with something
type-checkable.)
"""
def __bool__(self):
return False
def to(self, device: torch.device):
pass
def __repr__(self):
return "<NO MODEL>"
NO_MODEL = _NoModel()
class ModelGroup(metaclass=ABCMeta):
"""
A group of models.
The use case I had in mind when writing this is the sub-models used by a DiffusionPipeline,
e.g. its text encoder, U-net, VAE, etc.
Those models are :py:class:`diffusers.ModelMixin`, but "model" is interchangeable with
:py:class:`torch.nn.Module` here.
"""
def __init__(self, execution_device: torch.device):
self.execution_device = execution_device
@abstractmethod
def install(self, *models: torch.nn.Module):
"""Add models to this group."""
pass
@abstractmethod
def uninstall(self, models: torch.nn.Module):
"""Remove models from this group."""
pass
@abstractmethod
def uninstall_all(self):
"""Remove all models from this group."""
@abstractmethod
def load(self, model: torch.nn.Module):
"""Load this model to the execution device."""
pass
@abstractmethod
def offload_current(self):
"""Offload the current model(s) from the execution device."""
pass
@abstractmethod
def ready(self):
"""Ready this group for use."""
pass
@abstractmethod
def set_device(self, device: torch.device):
"""Change which device models from this group will execute on."""
pass
@abstractmethod
def device_for(self, model) -> torch.device:
"""Get the device the given model will execute on.
The model should already be a member of this group.
"""
pass
@abstractmethod
def __contains__(self, model):
"""Check if the model is a member of this group."""
pass
def __repr__(self) -> str:
return f"<{self.__class__.__name__} object at {id(self):x}: " f"device={self.execution_device} >"
class LazilyLoadedModelGroup(ModelGroup):
"""
Only one model from this group is loaded on the GPU at a time.
Running the forward method of a model will displace the previously-loaded model,
offloading it to CPU.
If you call other methods on the model, e.g. ``model.encode(x)`` instead of ``model(x)``,
you will need to explicitly load it with :py:method:`.load(model)`.
This implementation relies on pytorch forward-pre-hooks, and it will copy forward arguments
to the appropriate execution device, as long as they are positional arguments and not keyword
arguments. (I didn't make the rules; that's the way the pytorch 1.13 API works for hooks.)
"""
_hooks: MutableMapping[torch.nn.Module, RemovableHandle]
_current_model_ref: Callable[[], Union[torch.nn.Module, _NoModel]]
def __init__(self, execution_device: torch.device):
super().__init__(execution_device)
self._hooks = weakref.WeakKeyDictionary()
self._current_model_ref = weakref.ref(NO_MODEL)
def install(self, *models: torch.nn.Module):
for model in models:
self._hooks[model] = model.register_forward_pre_hook(self._pre_hook)
def uninstall(self, *models: torch.nn.Module):
for model in models:
hook = self._hooks.pop(model)
hook.remove()
if self.is_current_model(model):
# no longer hooked by this object, so don't claim to manage it
self.clear_current_model()
def uninstall_all(self):
self.uninstall(*self._hooks.keys())
def _pre_hook(self, module: torch.nn.Module, forward_input):
self.load(module)
if len(forward_input) == 0:
warnings.warn(
f"Hook for {module.__class__.__name__} got no input. " f"Inputs must be positional, not keywords.",
stacklevel=3,
)
return send_to_device(forward_input, self.execution_device)
def load(self, module):
if not self.is_current_model(module):
self.offload_current()
self._load(module)
def offload_current(self):
module = self._current_model_ref()
if module is not NO_MODEL:
module.to(OFFLOAD_DEVICE)
self.clear_current_model()
def _load(self, module: torch.nn.Module) -> torch.nn.Module:
assert self.is_empty(), f"A model is already loaded: {self._current_model_ref()}"
module = module.to(self.execution_device)
self.set_current_model(module)
return module
def is_current_model(self, model: torch.nn.Module) -> bool:
"""Is the given model the one currently loaded on the execution device?"""
return self._current_model_ref() is model
def is_empty(self):
"""Are none of this group's models loaded on the execution device?"""
return self._current_model_ref() is NO_MODEL
def set_current_model(self, value):
self._current_model_ref = weakref.ref(value)
def clear_current_model(self):
self._current_model_ref = weakref.ref(NO_MODEL)
def set_device(self, device: torch.device):
if device == self.execution_device:
return
self.execution_device = device
current = self._current_model_ref()
if current is not NO_MODEL:
current.to(device)
def device_for(self, model):
if model not in self:
raise KeyError(f"This does not manage this model {type(model).__name__}", model)
return self.execution_device # this implementation only dispatches to one device
def ready(self):
pass # always ready to load on-demand
def __contains__(self, model):
return model in self._hooks
def __repr__(self) -> str:
return (
f"<{self.__class__.__name__} object at {id(self):x}: "
f"current_model={type(self._current_model_ref()).__name__} >"
)
class FullyLoadedModelGroup(ModelGroup):
"""
A group of models without any implicit loading or unloading.
:py:meth:`.ready` loads _all_ the models to the execution device at once.
"""
_models: weakref.WeakSet
def __init__(self, execution_device: torch.device):
super().__init__(execution_device)
self._models = weakref.WeakSet()
def install(self, *models: torch.nn.Module):
for model in models:
self._models.add(model)
model.to(self.execution_device)
def uninstall(self, *models: torch.nn.Module):
for model in models:
self._models.remove(model)
def uninstall_all(self):
self.uninstall(*self._models)
def load(self, model):
model.to(self.execution_device)
def offload_current(self):
for model in self._models:
model.to(OFFLOAD_DEVICE)
def ready(self):
for model in self._models:
self.load(model)
def set_device(self, device: torch.device):
self.execution_device = device
for model in self._models:
if model.device != OFFLOAD_DEVICE:
model.to(device)
def device_for(self, model):
if model not in self:
raise KeyError("This does not manage this model f{type(model).__name__}", model)
return self.execution_device # this implementation only dispatches to one device
def __contains__(self, model):
return model in self._models

View File

@ -1,6 +1,8 @@
from __future__ import annotations
from contextlib import nullcontext
from packaging import version
import platform
import torch
from torch import autocast
@ -30,7 +32,7 @@ def choose_precision(device: torch.device) -> str:
device_name = torch.cuda.get_device_name(device)
if not ("GeForce GTX 1660" in device_name or "GeForce GTX 1650" in device_name):
return "float16"
elif device.type == "mps":
elif device.type == "mps" and version.parse(platform.mac_ver()[0]) < version.parse("14.0.0"):
return "float16"
return "float32"

View File

@ -1,6 +1,3 @@
"""
Initialization file for invokeai.frontend.config
"""
from .invokeai_configure import main as invokeai_configure
from .invokeai_update import main as invokeai_update
from .model_install import main as invokeai_model_install

View File

@ -0,0 +1,795 @@
# Copyright (c) 2023 - The InvokeAI Team
# Primary Author: David Lovell (github @f412design, discord @techjedi)
# co-author, minor tweaks - Lincoln Stein
# pylint: disable=line-too-long
# pylint: disable=broad-exception-caught
"""Script to import images into the new database system for 3.0.0"""
import os
import datetime
import shutil
import locale
import sqlite3
import json
import glob
import re
import uuid
import yaml
import PIL
import PIL.ImageOps
import PIL.PngImagePlugin
from pathlib import Path
from prompt_toolkit import prompt
from prompt_toolkit.shortcuts import message_dialog
from prompt_toolkit.completion import PathCompleter
from prompt_toolkit.key_binding import KeyBindings
from invokeai.app.services.config import InvokeAIAppConfig
app_config = InvokeAIAppConfig.get_config()
bindings = KeyBindings()
@bindings.add("c-c")
def _(event):
raise KeyboardInterrupt
# release notes
# "Use All" with size dimensions not selectable in the UI will not load dimensions
class Config:
"""Configuration loader."""
def __init__(self):
pass
TIMESTAMP_STRING = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
INVOKE_DIRNAME = "invokeai"
YAML_FILENAME = "invokeai.yaml"
DATABASE_FILENAME = "invokeai.db"
database_path = None
database_backup_dir = None
outputs_path = None
thumbnail_path = None
def find_and_load(self):
"""find the yaml config file and load"""
root = app_config.root_path
if not self.confirm_and_load(os.path.abspath(root)):
print("\r\nSpecify custom database and outputs paths:")
self.confirm_and_load_from_user()
self.database_backup_dir = os.path.join(os.path.dirname(self.database_path), "backup")
self.thumbnail_path = os.path.join(self.outputs_path, "thumbnails")
def confirm_and_load(self, invoke_root):
"""Validates a yaml path exists, confirms the user wants to use it and loads config."""
yaml_path = os.path.join(invoke_root, self.YAML_FILENAME)
if os.path.exists(yaml_path):
db_dir, outdir = self.load_paths_from_yaml(yaml_path)
if os.path.isabs(db_dir):
database_path = os.path.join(db_dir, self.DATABASE_FILENAME)
else:
database_path = os.path.join(invoke_root, db_dir, self.DATABASE_FILENAME)
if os.path.isabs(outdir):
outputs_path = os.path.join(outdir, "images")
else:
outputs_path = os.path.join(invoke_root, outdir, "images")
db_exists = os.path.exists(database_path)
outdir_exists = os.path.exists(outputs_path)
text = f"Found {self.YAML_FILENAME} file at {yaml_path}:"
text += f"\n Database : {database_path}"
text += f"\n Outputs : {outputs_path}"
text += "\n\nUse these paths for import (yes) or choose different ones (no) [Yn]: "
if db_exists and outdir_exists:
if (prompt(text).strip() or "Y").upper().startswith("Y"):
self.database_path = database_path
self.outputs_path = outputs_path
return True
else:
return False
else:
print(" Invalid: One or more paths in this config did not exist and cannot be used.")
else:
message_dialog(
title="Path not found",
text=f"Auto-discovery of configuration failed! Could not find ({yaml_path}), Custom paths can be specified.",
).run()
return False
def confirm_and_load_from_user(self):
default = ""
while True:
database_path = os.path.expanduser(
prompt(
"Database: Specify absolute path to the database to import into: ",
completer=PathCompleter(
expanduser=True, file_filter=lambda x: Path(x).is_dir() or x.endswith((".db"))
),
default=default,
)
)
if database_path.endswith(".db") and os.path.isabs(database_path) and os.path.exists(database_path):
break
default = database_path + "/" if Path(database_path).is_dir() else database_path
default = ""
while True:
outputs_path = os.path.expanduser(
prompt(
"Outputs: Specify absolute path to outputs/images directory to import into: ",
completer=PathCompleter(expanduser=True, only_directories=True),
default=default,
)
)
if outputs_path.endswith("images") and os.path.isabs(outputs_path) and os.path.exists(outputs_path):
break
default = outputs_path + "/" if Path(outputs_path).is_dir() else outputs_path
self.database_path = database_path
self.outputs_path = outputs_path
return
def load_paths_from_yaml(self, yaml_path):
"""Load an Invoke AI yaml file and get the database and outputs paths."""
try:
with open(yaml_path, "rt", encoding=locale.getpreferredencoding()) as file:
yamlinfo = yaml.safe_load(file)
db_dir = yamlinfo.get("InvokeAI", {}).get("Paths", {}).get("db_dir", None)
outdir = yamlinfo.get("InvokeAI", {}).get("Paths", {}).get("outdir", None)
return db_dir, outdir
except Exception:
print(f"Failed to load paths from yaml file! {yaml_path}!")
return None, None
class ImportStats:
"""DTO for tracking work progress."""
def __init__(self):
pass
time_start = datetime.datetime.utcnow()
count_source_files = 0
count_skipped_file_exists = 0
count_skipped_db_exists = 0
count_imported = 0
count_imported_by_version = {}
count_file_errors = 0
@staticmethod
def get_elapsed_time_string():
"""Get a friendly time string for the time elapsed since processing start."""
time_now = datetime.datetime.utcnow()
total_seconds = (time_now - ImportStats.time_start).total_seconds()
hours = int((total_seconds) / 3600)
minutes = int(((total_seconds) % 3600) / 60)
seconds = total_seconds % 60
out_str = f"{hours} hour(s) -" if hours > 0 else ""
out_str += f"{minutes} minute(s) -" if minutes > 0 else ""
out_str += f"{seconds:.2f} second(s)"
return out_str
class InvokeAIMetadata:
"""DTO for core Invoke AI generation properties parsed from metadata."""
def __init__(self):
pass
def __str__(self):
formatted_str = f"{self.generation_mode}~{self.steps}~{self.cfg_scale}~{self.model_name}~{self.scheduler}~{self.seed}~{self.width}~{self.height}~{self.rand_device}~{self.strength}~{self.init_image}"
formatted_str += f"\r\npositive_prompt: {self.positive_prompt}"
formatted_str += f"\r\nnegative_prompt: {self.negative_prompt}"
return formatted_str
generation_mode = None
steps = None
cfg_scale = None
model_name = None
scheduler = None
seed = None
width = None
height = None
rand_device = None
strength = None
init_image = None
positive_prompt = None
negative_prompt = None
imported_app_version = None
def to_json(self):
"""Convert the active instance to json format."""
prop_dict = {}
prop_dict["generation_mode"] = self.generation_mode
# dont render prompt nodes if neither are set to avoid the ui thinking it can set them
# if at least one exists, render them both, but use empty string instead of None if one of them is empty
# this allows the field that is empty to actually be cleared byt he UI instead of leaving the previous value
if self.positive_prompt or self.negative_prompt:
prop_dict["positive_prompt"] = "" if self.positive_prompt is None else self.positive_prompt
prop_dict["negative_prompt"] = "" if self.negative_prompt is None else self.negative_prompt
prop_dict["width"] = self.width
prop_dict["height"] = self.height
# only render seed if it has a value to avoid ui thinking it can set this and then error
if self.seed:
prop_dict["seed"] = self.seed
prop_dict["rand_device"] = self.rand_device
prop_dict["cfg_scale"] = self.cfg_scale
prop_dict["steps"] = self.steps
prop_dict["scheduler"] = self.scheduler
prop_dict["clip_skip"] = 0
prop_dict["model"] = {}
prop_dict["model"]["model_name"] = self.model_name
prop_dict["model"]["base_model"] = None
prop_dict["controlnets"] = []
prop_dict["loras"] = []
prop_dict["vae"] = None
prop_dict["strength"] = self.strength
prop_dict["init_image"] = self.init_image
prop_dict["positive_style_prompt"] = None
prop_dict["negative_style_prompt"] = None
prop_dict["refiner_model"] = None
prop_dict["refiner_cfg_scale"] = None
prop_dict["refiner_steps"] = None
prop_dict["refiner_scheduler"] = None
prop_dict["refiner_aesthetic_store"] = None
prop_dict["refiner_start"] = None
prop_dict["imported_app_version"] = self.imported_app_version
return json.dumps(prop_dict)
class InvokeAIMetadataParser:
"""Parses strings with json data to find Invoke AI core metadata properties."""
def __init__(self):
pass
def parse_meta_tag_dream(self, dream_string):
"""Take as input an png metadata json node for the 'dream' field variant from prior to 1.15"""
props = InvokeAIMetadata()
props.imported_app_version = "pre1.15"
seed_match = re.search("-S\\s*(\\d+)", dream_string)
if seed_match is not None:
try:
props.seed = int(seed_match[1])
except ValueError:
props.seed = None
raw_prompt = re.sub("(-S\\s*\\d+)", "", dream_string)
else:
raw_prompt = dream_string
pos_prompt, neg_prompt = self.split_prompt(raw_prompt)
props.positive_prompt = pos_prompt
props.negative_prompt = neg_prompt
return props
def parse_meta_tag_sd_metadata(self, tag_value):
"""Take as input an png metadata json node for the 'sd-metadata' field variant from 1.15 through 2.3.5 post 2"""
props = InvokeAIMetadata()
props.imported_app_version = tag_value.get("app_version")
props.model_name = tag_value.get("model_weights")
img_node = tag_value.get("image")
if img_node is not None:
props.generation_mode = img_node.get("type")
props.width = img_node.get("width")
props.height = img_node.get("height")
props.seed = img_node.get("seed")
props.rand_device = "cuda" # hardcoded since all generations pre 3.0 used cuda random noise instead of cpu
props.cfg_scale = img_node.get("cfg_scale")
props.steps = img_node.get("steps")
props.scheduler = self.map_scheduler(img_node.get("sampler"))
props.strength = img_node.get("strength")
if props.strength is None:
props.strength = img_node.get("strength_steps") # try second name for this property
props.init_image = img_node.get("init_image_path")
if props.init_image is None: # try second name for this property
props.init_image = img_node.get("init_img")
# remove the path info from init_image so if we move the init image, it will be correctly relative in the new location
if props.init_image is not None:
props.init_image = os.path.basename(props.init_image)
raw_prompt = img_node.get("prompt")
if isinstance(raw_prompt, list):
raw_prompt = raw_prompt[0].get("prompt")
props.positive_prompt, props.negative_prompt = self.split_prompt(raw_prompt)
return props
def parse_meta_tag_invokeai(self, tag_value):
"""Take as input an png metadata json node for the 'invokeai' field variant from 3.0.0 beta 1 through 5"""
props = InvokeAIMetadata()
props.imported_app_version = "3.0.0 or later"
props.generation_mode = tag_value.get("type")
if props.generation_mode is not None:
props.generation_mode = props.generation_mode.replace("t2l", "txt2img").replace("l2l", "img2img")
props.width = tag_value.get("width")
props.height = tag_value.get("height")
props.seed = tag_value.get("seed")
props.cfg_scale = tag_value.get("cfg_scale")
props.steps = tag_value.get("steps")
props.scheduler = tag_value.get("scheduler")
props.strength = tag_value.get("strength")
props.positive_prompt = tag_value.get("positive_conditioning")
props.negative_prompt = tag_value.get("negative_conditioning")
return props
def map_scheduler(self, old_scheduler):
"""Convert the legacy sampler names to matching 3.0 schedulers"""
if old_scheduler is None:
return None
match (old_scheduler):
case "ddim":
return "ddim"
case "plms":
return "pnmd"
case "k_lms":
return "lms"
case "k_dpm_2":
return "kdpm_2"
case "k_dpm_2_a":
return "kdpm_2_a"
case "dpmpp_2":
return "dpmpp_2s"
case "k_dpmpp_2":
return "dpmpp_2m"
case "k_dpmpp_2_a":
return None # invalid, in 2.3.x, selecting this sample would just fallback to last run or plms if new session
case "k_euler":
return "euler"
case "k_euler_a":
return "euler_a"
case "k_heun":
return "heun"
return None
def split_prompt(self, raw_prompt: str):
"""Split the unified prompt strings by extracting all negative prompt blocks out into the negative prompt."""
if raw_prompt is None:
return "", ""
raw_prompt_search = raw_prompt.replace("\r", "").replace("\n", "")
matches = re.findall(r"\[(.+?)\]", raw_prompt_search)
if len(matches) > 0:
negative_prompt = ""
if len(matches) == 1:
negative_prompt = matches[0].strip().strip(",")
else:
for match in matches:
negative_prompt += f"({match.strip().strip(',')})"
positive_prompt = re.sub(r"(\[.+?\])", "", raw_prompt_search).strip()
else:
positive_prompt = raw_prompt_search.strip()
negative_prompt = ""
return positive_prompt, negative_prompt
class DatabaseMapper:
"""Class to abstract database functionality."""
def __init__(self, database_path, database_backup_dir):
self.database_path = database_path
self.database_backup_dir = database_backup_dir
self.connection = None
self.cursor = None
def connect(self):
"""Open connection to the database."""
self.connection = sqlite3.connect(self.database_path)
self.cursor = self.connection.cursor()
def get_board_names(self):
"""Get a list of the current board names from the database."""
sql_get_board_name = "SELECT board_name FROM boards"
self.cursor.execute(sql_get_board_name)
rows = self.cursor.fetchall()
return [row[0] for row in rows]
def does_image_exist(self, image_name):
"""Check database if a image name already exists and return a boolean."""
sql_get_image_by_name = f"SELECT image_name FROM images WHERE image_name='{image_name}'"
self.cursor.execute(sql_get_image_by_name)
rows = self.cursor.fetchall()
return True if len(rows) > 0 else False
def add_new_image_to_database(self, filename, width, height, metadata, modified_date_string):
"""Add an image to the database."""
sql_add_image = f"""INSERT INTO images (image_name, image_origin, image_category, width, height, session_id, node_id, metadata, is_intermediate, created_at, updated_at)
VALUES ('{filename}', 'internal', 'general', {width}, {height}, null, null, '{metadata}', 0, '{modified_date_string}', '{modified_date_string}')"""
self.cursor.execute(sql_add_image)
self.connection.commit()
def get_board_id_with_create(self, board_name):
"""Get the board id for supplied name, and create the board if one does not exist."""
sql_find_board = f"SELECT board_id FROM boards WHERE board_name='{board_name}' COLLATE NOCASE"
self.cursor.execute(sql_find_board)
rows = self.cursor.fetchall()
if len(rows) > 0:
return rows[0][0]
else:
board_date_string = datetime.datetime.utcnow().date().isoformat()
new_board_id = str(uuid.uuid4())
sql_insert_board = f"INSERT INTO boards (board_id, board_name, created_at, updated_at) VALUES ('{new_board_id}', '{board_name}', '{board_date_string}', '{board_date_string}')"
self.cursor.execute(sql_insert_board)
self.connection.commit()
return new_board_id
def add_image_to_board(self, filename, board_id):
"""Add an image mapping to a board."""
add_datetime_str = datetime.datetime.utcnow().isoformat()
sql_add_image_to_board = f"""INSERT INTO board_images (board_id, image_name, created_at, updated_at)
VALUES ('{board_id}', '{filename}', '{add_datetime_str}', '{add_datetime_str}')"""
self.cursor.execute(sql_add_image_to_board)
self.connection.commit()
def disconnect(self):
"""Disconnect from the db, cleaning up connections and cursors."""
if self.cursor is not None:
self.cursor.close()
if self.connection is not None:
self.connection.close()
def backup(self, timestamp_string):
"""Take a backup of the database."""
if not os.path.exists(self.database_backup_dir):
print(f"Database backup directory {self.database_backup_dir} does not exist -> creating...", end="")
os.makedirs(self.database_backup_dir)
print("Done!")
database_backup_path = os.path.join(self.database_backup_dir, f"backup-{timestamp_string}-invokeai.db")
print(f"Making DB Backup at {database_backup_path}...", end="")
shutil.copy2(self.database_path, database_backup_path)
print("Done!")
class MediaImportProcessor:
"""Containing class for script functionality."""
def __init__(self):
pass
board_name_id_map = {}
def get_import_file_list(self):
"""Ask the user for the import folder and scan for the list of files to return."""
while True:
default = ""
while True:
import_dir = os.path.expanduser(
prompt(
"Inputs: Specify absolute path containing InvokeAI .png images to import: ",
completer=PathCompleter(expanduser=True, only_directories=True),
default=default,
)
)
if len(import_dir) > 0 and Path(import_dir).is_dir():
break
default = import_dir
recurse_directories = (
(prompt("Include files from subfolders recursively [yN]? ").strip() or "N").upper().startswith("N")
)
if recurse_directories:
is_recurse = False
matching_file_list = glob.glob(import_dir + "/*.png", recursive=False)
else:
is_recurse = True
matching_file_list = glob.glob(import_dir + "/**/*.png", recursive=True)
if len(matching_file_list) > 0:
return import_dir, is_recurse, matching_file_list
else:
print(f"The specific path {import_dir} exists, but does not contain .png files!")
def get_file_details(self, filepath):
"""Retrieve the embedded metedata fields and dimensions from an image file."""
with PIL.Image.open(filepath) as img:
img.load()
png_width, png_height = img.size
img_info = img.info
return img_info, png_width, png_height
def select_board_option(self, board_names, timestamp_string):
"""Allow the user to choose how a board is selected for imported files."""
while True:
print("\r\nOptions for board selection for imported images:")
print(f"1) Select an existing board name. (found {len(board_names)})")
print("2) Specify a board name to create/add to.")
print("3) Create/add to board named 'IMPORT'.")
print(
f"4) Create/add to board named 'IMPORT' with the current datetime string appended (.e.g IMPORT_{timestamp_string})."
)
print(
"5) Create/add to board named 'IMPORT' with a the original file app_version appended (.e.g IMPORT_2.2.5)."
)
input_option = input("Specify desired board option: ")
match (input_option):
case "1":
if len(board_names) < 1:
print("\r\nThere are no existing board names to choose from. Select another option!")
continue
board_name = self.select_item_from_list(
board_names, "board name", True, "Cancel, go back and choose a different board option."
)
if board_name is not None:
return board_name
case "2":
while True:
board_name = input("Specify new/existing board name: ")
if board_name:
return board_name
case "3":
return "IMPORT"
case "4":
return f"IMPORT_{timestamp_string}"
case "5":
return "IMPORT_APPVERSION"
def select_item_from_list(self, items, entity_name, allow_cancel, cancel_string):
"""A general function to render a list of items to select in the console, prompt the user for a selection and ensure a valid entry is selected."""
print(f"Select a {entity_name.lower()} from the following list:")
index = 1
for item in items:
print(f"{index}) {item}")
index += 1
if allow_cancel:
print(f"{index}) {cancel_string}")
while True:
try:
option_number = int(input("Specify number of selection: "))
except ValueError:
continue
if allow_cancel and option_number == index:
return None
if option_number >= 1 and option_number <= len(items):
return items[option_number - 1]
def import_image(self, filepath: str, board_name_option: str, db_mapper: DatabaseMapper, config: Config):
"""Import a single file by its path"""
parser = InvokeAIMetadataParser()
file_name = os.path.basename(filepath)
file_destination_path = os.path.join(config.outputs_path, file_name)
print("===============================================================================")
print(f"Importing {filepath}")
# check destination to see if the file was previously imported
if os.path.exists(file_destination_path):
print("File already exists in the destination, skipping!")
ImportStats.count_skipped_file_exists += 1
return
# check if file name is already referenced in the database
if db_mapper.does_image_exist(file_name):
print("A reference to a file with this name already exists in the database, skipping!")
ImportStats.count_skipped_db_exists += 1
return
# load image info and dimensions
img_info, png_width, png_height = self.get_file_details(filepath)
# parse metadata
destination_needs_meta_update = True
log_version_note = "(Unknown)"
if "invokeai_metadata" in img_info:
# for the latest, we will just re-emit the same json, no need to parse/modify
converted_field = None
latest_json_string = img_info.get("invokeai_metadata")
log_version_note = "3.0.0+"
destination_needs_meta_update = False
else:
if "sd-metadata" in img_info:
converted_field = parser.parse_meta_tag_sd_metadata(json.loads(img_info.get("sd-metadata")))
elif "invokeai" in img_info:
converted_field = parser.parse_meta_tag_invokeai(json.loads(img_info.get("invokeai")))
elif "dream" in img_info:
converted_field = parser.parse_meta_tag_dream(img_info.get("dream"))
elif "Dream" in img_info:
converted_field = parser.parse_meta_tag_dream(img_info.get("Dream"))
else:
converted_field = InvokeAIMetadata()
destination_needs_meta_update = False
print("File does not have metadata from known Invoke AI versions, add only, no update!")
# use the loaded img dimensions if the metadata didnt have them
if converted_field.width is None:
converted_field.width = png_width
if converted_field.height is None:
converted_field.height = png_height
log_version_note = converted_field.imported_app_version if converted_field else "NoVersion"
log_version_note = log_version_note or "NoVersion"
latest_json_string = converted_field.to_json()
print(f"From Invoke AI Version {log_version_note} with dimensions {png_width} x {png_height}.")
# if metadata needs update, then update metdata and copy in one shot
if destination_needs_meta_update:
print("Updating metadata while copying...", end="")
self.update_file_metadata_while_copying(
filepath, file_destination_path, "invokeai_metadata", latest_json_string
)
print("Done!")
else:
print("No metadata update necessary, copying only...", end="")
shutil.copy2(filepath, file_destination_path)
print("Done!")
# create thumbnail
print("Creating thumbnail...", end="")
thumbnail_path = os.path.join(config.thumbnail_path, os.path.splitext(file_name)[0]) + ".webp"
thumbnail_size = 256, 256
with PIL.Image.open(filepath) as source_image:
source_image.thumbnail(thumbnail_size)
source_image.save(thumbnail_path, "webp")
print("Done!")
# finalize the dynamic board name if there is an APPVERSION token in it.
if converted_field is not None:
board_name = board_name_option.replace("APPVERSION", converted_field.imported_app_version or "NoVersion")
else:
board_name = board_name_option.replace("APPVERSION", "Latest")
# maintain a map of alrady created/looked up ids to avoid DB queries
print("Finding/Creating board...", end="")
if board_name in self.board_name_id_map:
board_id = self.board_name_id_map[board_name]
else:
board_id = db_mapper.get_board_id_with_create(board_name)
self.board_name_id_map[board_name] = board_id
print("Done!")
# add image to db
print("Adding image to database......", end="")
modified_time = datetime.datetime.utcfromtimestamp(os.path.getmtime(filepath))
db_mapper.add_new_image_to_database(file_name, png_width, png_height, latest_json_string, modified_time)
print("Done!")
# add image to board
print("Adding image to board......", end="")
db_mapper.add_image_to_board(file_name, board_id)
print("Done!")
ImportStats.count_imported += 1
if log_version_note in ImportStats.count_imported_by_version:
ImportStats.count_imported_by_version[log_version_note] += 1
else:
ImportStats.count_imported_by_version[log_version_note] = 1
def update_file_metadata_while_copying(self, filepath, file_destination_path, tag_name, tag_value):
"""Perform a metadata update with save to a new destination which accomplishes a copy while updating metadata."""
with PIL.Image.open(filepath) as target_image:
existing_img_info = target_image.info
metadata = PIL.PngImagePlugin.PngInfo()
# re-add any existing invoke ai tags unless they are the one we are trying to add
for key in existing_img_info:
if key != tag_name and key in ("dream", "Dream", "sd-metadata", "invokeai", "invokeai_metadata"):
metadata.add_text(key, existing_img_info[key])
metadata.add_text(tag_name, tag_value)
target_image.save(file_destination_path, pnginfo=metadata)
def process(self):
"""Begin main processing."""
print("===============================================================================")
print("This script will import images generated by earlier versions of")
print("InvokeAI into the currently installed root directory:")
print(f" {app_config.root_path}")
print("If this is not what you want to do, type ctrl-C now to cancel.")
# load config
print("===============================================================================")
print("= Configuration & Settings")
config = Config()
config.find_and_load()
db_mapper = DatabaseMapper(config.database_path, config.database_backup_dir)
db_mapper.connect()
import_dir, is_recurse, import_file_list = self.get_import_file_list()
ImportStats.count_source_files = len(import_file_list)
board_names = db_mapper.get_board_names()
board_name_option = self.select_board_option(board_names, config.TIMESTAMP_STRING)
print("\r\n===============================================================================")
print("= Import Settings Confirmation")
print()
print(f"Database File Path : {config.database_path}")
print(f"Outputs/Images Directory : {config.outputs_path}")
print(f"Import Image Source Directory : {import_dir}")
print(f" Recurse Source SubDirectories : {'Yes' if is_recurse else 'No'}")
print(f"Count of .png file(s) found : {len(import_file_list)}")
print(f"Board name option specified : {board_name_option}")
print(f"Database backup will be taken at : {config.database_backup_dir}")
print("\r\nNotes about the import process:")
print("- Source image files will not be modified, only copied to the outputs directory.")
print("- If the same file name already exists in the destination, the file will be skipped.")
print("- If the same file name already has a record in the database, the file will be skipped.")
print("- Invoke AI metadata tags will be updated/written into the imported copy only.")
print(
"- On the imported copy, only Invoke AI known tags (latest and legacy) will be retained (dream, sd-metadata, invokeai, invokeai_metadata)"
)
print(
"- A property 'imported_app_version' will be added to metadata that can be viewed in the UI's metadata viewer."
)
print(
"- The new 3.x InvokeAI outputs folder structure is flat so recursively found source imges will all be placed into the single outputs/images folder."
)
while True:
should_continue = prompt("\nDo you wish to continue with the import [Yn] ? ").lower() or "y"
if should_continue == "n":
print("\r\nCancelling Import")
return
elif should_continue == "y":
print()
break
db_mapper.backup(config.TIMESTAMP_STRING)
print()
ImportStats.time_start = datetime.datetime.utcnow()
for filepath in import_file_list:
try:
self.import_image(filepath, board_name_option, db_mapper, config)
except sqlite3.Error as sql_ex:
print(f"A database related exception was found processing {filepath}, will continue to next file. ")
print("Exception detail:")
print(sql_ex)
ImportStats.count_file_errors += 1
except Exception as ex:
print(f"Exception processing {filepath}, will continue to next file. ")
print("Exception detail:")
print(ex)
ImportStats.count_file_errors += 1
print("\r\n===============================================================================")
print(f"= Import Complete - Elpased Time: {ImportStats.get_elapsed_time_string()}")
print()
print(f"Source File(s) : {ImportStats.count_source_files}")
print(f"Total Imported : {ImportStats.count_imported}")
print(f"Skipped b/c file already exists on disk : {ImportStats.count_skipped_file_exists}")
print(f"Skipped b/c file already exists in db : {ImportStats.count_skipped_db_exists}")
print(f"Errors during import : {ImportStats.count_file_errors}")
if ImportStats.count_imported > 0:
print("\r\nBreakdown of imported files by version:")
for key, version in ImportStats.count_imported_by_version.items():
print(f" {key:20} : {version}")
def main():
try:
processor = MediaImportProcessor()
processor.process()
except KeyboardInterrupt:
print("\r\n\r\nUser cancelled execution.")
if __name__ == "__main__":
main()

View File

@ -1,4 +1,4 @@
"""
Wrapper for invokeai.backend.configure.invokeai_configure
"""
from ...backend.install.invokeai_configure import main
from ...backend.install.invokeai_configure import main as invokeai_configure

View File

@ -112,7 +112,7 @@ def main():
extras = get_extras()
print(f":crossed_fingers: Upgrading to [yellow]{tag if tag else release}[/yellow]")
print(f":crossed_fingers: Upgrading to [yellow]{tag or release or branch}[/yellow]")
if release:
cmd = f'pip install "invokeai{extras} @ {INVOKE_AI_SRC}/{release}.zip" --use-pep517 --upgrade'
elif tag:

View File

@ -28,7 +28,6 @@ from npyscreen import widget
from invokeai.backend.util.logging import InvokeAILogger
from invokeai.backend.install.model_install_backend import (
ModelInstallList,
InstallSelections,
ModelInstall,
SchedulerPredictionType,
@ -41,12 +40,12 @@ from invokeai.frontend.install.widgets import (
SingleSelectColumns,
TextBox,
BufferBox,
FileBox,
set_min_terminal_size,
select_stable_diffusion_config_file,
CyclingForm,
MIN_COLS,
MIN_LINES,
WindowTooSmallException,
)
from invokeai.app.services.config import InvokeAIAppConfig
@ -58,6 +57,9 @@ logger = InvokeAILogger.getLogger()
# from https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
NOPRINT_TRANS_TABLE = {i: None for i in range(0, sys.maxunicode + 1) if not chr(i).isprintable()}
# maximum number of installed models we can display before overflowing vertically
MAX_OTHER_MODELS = 72
def make_printable(s: str) -> str:
"""Replace non-printable characters in a string"""
@ -102,7 +104,7 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
SingleSelectColumns,
values=[
"STARTER MODELS",
"MORE MODELS",
"MAIN MODELS",
"CONTROLNETS",
"LORA/LYCORIS",
"TEXTUAL INVERSION",
@ -153,7 +155,7 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
BufferBox,
name="Log Messages",
editable=False,
max_height=8,
max_height=6,
)
self.nextrely += 1
@ -253,6 +255,7 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
model_labels = [self.model_labels[x] for x in model_list]
show_recommended = len(self.installed_models) == 0
truncated = False
if len(model_list) > 0:
max_width = max([len(x) for x in model_labels])
columns = window_width // (max_width + 8) # 8 characters for "[x] " and padding
@ -271,6 +274,10 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
)
)
if len(model_labels) > MAX_OTHER_MODELS:
model_labels = model_labels[0:MAX_OTHER_MODELS]
truncated = True
widgets.update(
models_selected=self.add_widget_intelligent(
MultiSelectColumns,
@ -289,6 +296,16 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
models=model_list,
)
if truncated:
widgets.update(
warning_message=self.add_widget_intelligent(
npyscreen.FixedText,
value=f"Too many models to display (max={MAX_OTHER_MODELS}). Some are not displayed.",
editable=False,
color="CAUTION",
)
)
self.nextrely += 1
widgets.update(
download_ids=self.add_widget_intelligent(
@ -313,7 +330,7 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
widgets = self.add_model_widgets(
model_type=model_type,
window_width=window_width,
install_prompt=f"Additional {model_type.value.title()} models already installed.",
install_prompt=f"Installed {model_type.value.title()} models. Unchecked models in the InvokeAI root directory will be deleted. Enter URLs, paths or repo_ids to import.",
**kwargs,
)
@ -399,7 +416,7 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
self.ok_button.hidden = True
self.display()
# for communication with the subprocess
# TO DO: Spawn a worker thread, not a subprocess
parent_conn, child_conn = Pipe()
p = Process(
target=process_and_execute,
@ -414,7 +431,6 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
self.subprocess_connection = parent_conn
self.subprocess = p
app.install_selections = InstallSelections()
# process_and_execute(app.opt, app.install_selections)
def on_back(self):
self.parentApp.switchFormPrevious()
@ -489,8 +505,6 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
# rebuild the form, saving and restoring some of the fields that need to be preserved.
saved_messages = self.monitor.entry_widget.values
# autoload_dir = str(config.root_path / self.pipeline_models['autoload_directory'].value)
# autoscan = self.pipeline_models['autoscan_on_startup'].value
app.main_form = app.addForm(
"MAIN",
@ -544,12 +558,6 @@ class addModelsForm(CyclingForm, npyscreen.FormMultiPage):
if downloads := section.get("download_ids"):
selections.install_models.extend(downloads.value.split())
# load directory and whether to scan on startup
# if self.parentApp.autoload_pending:
# selections.scan_directory = str(config.root_path / self.pipeline_models['autoload_directory'].value)
# self.parentApp.autoload_pending = False
# selections.autoscan_on_startup = self.pipeline_models['autoscan_on_startup'].value
class AddModelApplication(npyscreen.NPSAppManaged):
def __init__(self, opt):
@ -639,6 +647,11 @@ def process_and_execute(
selections: InstallSelections,
conn_out: Connection = None,
):
# need to reinitialize config in subprocess
config = InvokeAIAppConfig.get_config()
args = ["--root", opt.root] if opt.root else []
config.parse_args(args)
# set up so that stderr is sent to conn_out
if conn_out:
translator = StderrToMessage(conn_out)
@ -656,38 +669,11 @@ def process_and_execute(
conn_out.close()
def do_listings(opt) -> bool:
"""List installed models of various sorts, and return
True if any were requested."""
model_manager = ModelManager(config.model_conf_path)
if opt.list_models == "diffusers":
print("Diffuser models:")
model_manager.print_models()
elif opt.list_models == "controlnets":
print("Installed Controlnet Models:")
cnm = model_manager.list_controlnet_models()
print(textwrap.indent("\n".join([x for x in cnm if cnm[x]]), prefix=" "))
elif opt.list_models == "loras":
print("Installed LoRA/LyCORIS Models:")
cnm = model_manager.list_lora_models()
print(textwrap.indent("\n".join([x for x in cnm if cnm[x]]), prefix=" "))
elif opt.list_models == "tis":
print("Installed Textual Inversion Embeddings:")
cnm = model_manager.list_ti_models()
print(textwrap.indent("\n".join([x for x in cnm if cnm[x]]), prefix=" "))
else:
return False
return True
# --------------------------------------------------------
def select_and_download_models(opt: Namespace):
precision = "float32" if opt.full_precision else choose_precision(torch.device(choose_torch_device()))
config.precision = precision
helper = lambda x: ask_user_for_prediction_type(x)
# if do_listings(opt):
# pass
installer = ModelInstall(config, prediction_type_helper=helper)
if opt.list_models:
installer.list_models(opt.list_models)
@ -706,9 +692,11 @@ def select_and_download_models(opt: Namespace):
# needed to support the probe() method running under a subprocess
torch.multiprocessing.set_start_method("spawn")
# the third argument is needed in the Windows 11 environment in
# order to launch and resize a console window running this program
set_min_terminal_size(MIN_COLS, MIN_LINES)
if not set_min_terminal_size(MIN_COLS, MIN_LINES):
raise WindowTooSmallException(
"Could not increase terminal size. Try running again with a larger window or smaller font size."
)
installApp = AddModelApplication(opt)
try:
installApp.run()
@ -787,7 +775,7 @@ def main():
if not config.model_conf_path.exists():
logger.info("Your InvokeAI root directory is not set up. Calling invokeai-configure.")
from invokeai.frontend.install import invokeai_configure
from invokeai.frontend.install.invokeai_configure import invokeai_configure
invokeai_configure()
sys.exit(0)
@ -802,6 +790,8 @@ def main():
curses.echo()
curses.endwin()
logger.info("Goodbye! Come back soon.")
except WindowTooSmallException as e:
logger.error(str(e))
except widget.NotEnoughSpaceForWidget as e:
if str(e).startswith("Height of 1 allocated"):
logger.error("Insufficient vertical space for the interface. Please make your window taller and try again")

View File

@ -21,31 +21,40 @@ MIN_COLS = 130
MIN_LINES = 38
class WindowTooSmallException(Exception):
pass
# -------------------------------------
def set_terminal_size(columns: int, lines: int):
ts = get_terminal_size()
width = max(columns, ts.columns)
height = max(lines, ts.lines)
def set_terminal_size(columns: int, lines: int) -> bool:
OS = platform.uname().system
if OS == "Windows":
pass
# not working reliably - ask user to adjust the window
# _set_terminal_size_powershell(width,height)
elif OS in ["Darwin", "Linux"]:
_set_terminal_size_unix(width, height)
screen_ok = False
while not screen_ok:
ts = get_terminal_size()
width = max(columns, ts.columns)
height = max(lines, ts.lines)
# check whether it worked....
ts = get_terminal_size()
pause = False
if ts.columns < columns:
print("\033[1mThis window is too narrow for the user interface.\033[0m")
pause = True
if ts.lines < lines:
print("\033[1mThis window is too short for the user interface.\033[0m")
pause = True
if pause:
input("Maximize the window then press any key to continue..")
if OS == "Windows":
pass
# not working reliably - ask user to adjust the window
# _set_terminal_size_powershell(width,height)
elif OS in ["Darwin", "Linux"]:
_set_terminal_size_unix(width, height)
# check whether it worked....
ts = get_terminal_size()
if ts.columns < columns or ts.lines < lines:
print(
f"\033[1mThis window is too small for the interface. InvokeAI requires {columns}x{lines} (w x h) characters, but window is {ts.columns}x{ts.lines}\033[0m"
)
resp = input(
"Maximize the window and/or decrease the font size then press any key to continue. Type [Q] to give up.."
)
if resp.upper().startswith("Q"):
break
else:
screen_ok = True
return screen_ok
def _set_terminal_size_powershell(width: int, height: int):
@ -80,14 +89,14 @@ def _set_terminal_size_unix(width: int, height: int):
sys.stdout.flush()
def set_min_terminal_size(min_cols: int, min_lines: int):
def set_min_terminal_size(min_cols: int, min_lines: int) -> bool:
# make sure there's enough room for the ui
term_cols, term_lines = get_terminal_size()
if term_cols >= min_cols and term_lines >= min_lines:
return
return True
cols = max(term_cols, min_cols)
lines = max(term_lines, min_lines)
set_terminal_size(cols, lines)
return set_terminal_size(cols, lines)
class IntSlider(npyscreen.Slider):
@ -164,7 +173,7 @@ class FloatSlider(npyscreen.Slider):
class FloatTitleSlider(npyscreen.TitleText):
_entry_type = FloatSlider
_entry_type = npyscreen.Slider
class SelectColumnBase:

View File

@ -320,7 +320,7 @@ class mergeModelsForm(npyscreen.FormMultiPageAction):
def get_model_names(self, base_model: BaseModelType = None) -> List[str]:
model_names = [
info["name"]
info["model_name"]
for info in self.model_manager.list_models(model_type=ModelType.Main, base_model=base_model)
if info["model_format"] == "diffusers"
]
@ -382,7 +382,8 @@ def run_cli(args: Namespace):
def main():
args = _parse_args()
config.parse_args(["--root", str(args.root_dir)])
if args.root_dir:
config.parse_args(["--root", str(args.root_dir)])
try:
if args.front_end:

View File

@ -21,7 +21,7 @@ export const packageConfig: UserConfig = {
fileName: (format) => `invoke-ai-ui.${format}.js`,
},
rollupOptions: {
external: ['react', 'react-dom', '@emotion/react'],
external: ['react', 'react-dom', '@emotion/react', '@chakra-ui/react'],
output: {
globals: {
react: 'React',

Some files were not shown because too many files have changed in this diff Show More