resolve doc conflicts during merge

This commit is contained in:
Lincoln Stein 2022-10-18 08:27:33 -04:00
commit 230de023ff
40 changed files with 1647 additions and 851 deletions

View File

@ -319,7 +319,7 @@ class InvokeAIWebServer:
elif postprocessing_parameters['type'] == 'gfpgan':
image = self.gfpgan.process(
image=image,
strength=postprocessing_parameters['gfpgan_strength'],
strength=postprocessing_parameters['facetool_strength'],
seed=seed,
)
else:
@ -625,7 +625,7 @@ class InvokeAIWebServer:
seed=seed,
)
postprocessing = True
all_parameters['gfpgan_strength'] = gfpgan_parameters[
all_parameters['facetool_strength'] = gfpgan_parameters[
'strength'
]
@ -723,6 +723,7 @@ class InvokeAIWebServer:
'height',
'extra',
'seamless',
'hires_fix',
]
rfc_dict = {}
@ -735,12 +736,12 @@ class InvokeAIWebServer:
postprocessing = []
# 'postprocessing' is either null or an
if 'gfpgan_strength' in parameters:
if 'facetool_strength' in parameters:
postprocessing.append(
{
'type': 'gfpgan',
'strength': float(parameters['gfpgan_strength']),
'strength': float(parameters['facetool_strength']),
}
)
@ -837,7 +838,7 @@ class InvokeAIWebServer:
elif parameters['type'] == 'gfpgan':
postprocessing_metadata['type'] = 'gfpgan'
postprocessing_metadata['strength'] = parameters[
'gfpgan_strength'
'facetool_strength'
]
else:
raise TypeError(f"Invalid type: {parameters['type']}")

View File

@ -36,6 +36,8 @@ def parameters_to_command(params):
switches.append(f'-A {params["sampler_name"]}')
if "seamless" in params and params["seamless"] == True:
switches.append(f"--seamless")
if "hires_fix" in params and params["hires_fix"] == True:
switches.append(f"--hires")
if "init_img" in params and len(params["init_img"]) > 0:
switches.append(f'-I {params["init_img"]}')
if "init_mask" in params and len(params["init_mask"]) > 0:
@ -46,8 +48,14 @@ def parameters_to_command(params):
switches.append(f'-f {params["strength"]}')
if "fit" in params and params["fit"] == True:
switches.append(f"--fit")
if "gfpgan_strength" in params and params["gfpgan_strength"]:
if "facetool" in params:
switches.append(f'-ft {params["facetool"]}')
if "facetool_strength" in params and params["facetool_strength"]:
switches.append(f'-G {params["facetool_strength"]}')
elif "gfpgan_strength" in params and params["gfpgan_strength"]:
switches.append(f'-G {params["gfpgan_strength"]}')
if "codeformer_fidelity" in params:
switches.append(f'-cf {params["codeformer_fidelity"]}')
if "upscale" in params and params["upscale"]:
switches.append(f'-U {params["upscale"][0]} {params["upscale"][1]}')
if "variation_amount" in params and params["variation_amount"] > 0:

View File

@ -349,7 +349,7 @@ def handle_run_gfpgan_event(original_image, gfpgan_parameters):
eventlet.sleep(0)
image = gfpgan.process(
image=image, strength=gfpgan_parameters["gfpgan_strength"], seed=seed
image=image, strength=gfpgan_parameters["facetool_strength"], seed=seed
)
progress["currentStatus"] = "Saving image"
@ -464,7 +464,7 @@ def parameters_to_post_processed_image_metadata(parameters, original_image_path,
image["strength"] = parameters["upscale"][1]
elif type == "gfpgan":
image["type"] = "gfpgan"
image["strength"] = parameters["gfpgan_strength"]
image["strength"] = parameters["facetool_strength"]
else:
raise TypeError(f"Invalid type: {type}")
@ -493,6 +493,7 @@ def parameters_to_generated_image_metadata(parameters):
"height",
"extra",
"seamless",
"hires_fix",
]
rfc_dict = {}
@ -505,10 +506,10 @@ def parameters_to_generated_image_metadata(parameters):
postprocessing = []
# 'postprocessing' is either null or an
if "gfpgan_strength" in parameters:
if "facetool_strength" in parameters:
postprocessing.append(
{"type": "gfpgan", "strength": float(parameters["gfpgan_strength"])}
{"type": "gfpgan", "strength": float(parameters["facetool_strength"])}
)
if "upscale" in parameters:
@ -751,7 +752,7 @@ def generate_images(generation_parameters, esrgan_parameters, gfpgan_parameters)
image=image, strength=gfpgan_parameters["strength"], seed=seed
)
postprocessing = True
all_parameters["gfpgan_strength"] = gfpgan_parameters["strength"]
all_parameters["facetool_strength"] = gfpgan_parameters["strength"]
progress["currentStatus"] = "Saving image"
socketio.emit("progressUpdate", progress)

View File

@ -9,10 +9,12 @@
laion400m:
config: configs/latent-diffusion/txt2img-1p4B-eval.yaml
weights: models/ldm/text2img-large/model.ckpt
description: Latent Diffusion LAION400M model
width: 256
height: 256
stable-diffusion-1.4:
config: configs/stable-diffusion/v1-inference.yaml
weights: models/ldm/stable-diffusion-v1/model.ckpt
description: Stable Diffusion inference model version 1.4
width: 512
height: 512

View File

@ -85,6 +85,7 @@ overridden on a per-prompt basis (see [List of prompt arguments](#list-of-prompt
| `--from_file <path>` | | `None` | Read list of prompts from a file. Use `-` to read from standard input |
| `--model <modelname>` | | `stable-diffusion-1.4` | Loads model specified in configs/models.yaml. Currently one of "stable-diffusion-1.4" or "laion400m" |
| `--full_precision` | `-F` | `False` | Run in slower full-precision mode. Needed for Macintosh M1/M2 hardware and some older video cards. |
| `--png_compression <0-9>` | `-z<0-9>` | 6 | Select level of compression for output files, from 0 (no compression) to 9 (max compression) |
| `--web` | | `False` | Start in web server mode |
| `--host <ip addr>` | | `localhost` | Which network interface web server should listen on. Set to 0.0.0.0 to listen on any. |
| `--port <port>` | | `9090` | Which port web server should listen for requests on. |
@ -142,46 +143,47 @@ Here are the invoke> command that apply to txt2img:
| Argument <img width="680" align="right"/> | Shortcut <img width="420" align="right"/> | Default <img width="480" align="right"/> | Description |
|--------------------|------------|---------------------|--------------|
| `"my prompt"` | | | Text prompt to use. The quotation marks are optional. |
| `--width <int>` | `-W<int>` | `512` | Width of generated image |
| `--height <int>` | `-H<int>` | `512` | Height of generated image |
| `--iterations <int>` | `-n<int>` | `1` | How many images to generate from this prompt |
| `--steps <int>` | `-s<int>` | `50` | How many steps of refinement to apply |
| `--cfg_scale <float>`| `-C<float>` | `7.5` | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
| `--seed <int>` | `-S<int>` | `None` | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
| `--sampler <sampler>`| `-A<sampler>`| `k_lms` | Sampler to use. Use -h to get list of available samplers. |
| `--hires_fix` | | | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
| `--grid` | `-g` | `False` | Turn on grid mode to return a single image combining all the images generated by this prompt |
| `--individual` | `-i` | `True` | Turn off grid mode (deprecated; leave off `--grid` instead) |
| `--outdir <path>` | `-o<path>` | `outputs/img_samples` | Temporarily change the location of these images |
| `--seamless` | | `False` | Activate seamless tiling for interesting effects |
| `--log_tokenization` | `-t` | `False` | Display a color-coded list of the parsed tokens derived from the prompt |
| `--skip_normalization`| `-x` | `False` | Weighted subprompts will not be normalized. See [Weighted Prompts](./OTHER.md#weighted-prompts) |
| `--upscale <int> <float>` | `-U <int> <float>` | `-U 1 0.75`| Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
| `--gfpgan_strength <float>` | `-G <float>` | `-G0` | Fix faces using the GFPGAN algorithm; argument indicates how hard the algorithm should try (0.0-1.0) |
| `--save_original` | `-save_orig`| `False` | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
| `--variation <float>` |`-v<float>`| `0.0` | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with `-S<seed>` and `-n<int>` to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| `--with_variations <pattern>` | `-V<pattern>`| `None` | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
| "my prompt" | | | Text prompt to use. The quotation marks are optional. |
| --width <int> | -W<int> | 512 | Width of generated image |
| --height <int> | -H<int> | 512 | Height of generated image |
| --iterations <int> | -n<int> | 1 | How many images to generate from this prompt |
| --steps <int> | -s<int> | 50 | How many steps of refinement to apply |
| --cfg_scale <float>| -C<float> | 7.5 | How hard to try to match the prompt to the generated image; any number greater than 1.0 works, but the useful range is roughly 5.0 to 20.0 |
| --seed <int> | -S<int> | None | Set the random seed for the next series of images. This can be used to recreate an image generated previously.|
| --sampler <sampler>| -A<sampler>| k_lms | Sampler to use. Use -h to get list of available samplers. |
| --hires_fix | | | Larger images often have duplication artefacts. This option suppresses duplicates by generating the image at low res, and then using img2img to increase the resolution |
| `--png_compression <0-9>` | `-z<0-9>` | 6 | Select level of compression for output files, from 0 (no compression) to 9 (max compression) |
| --grid | -g | False | Turn on grid mode to return a single image combining all the images generated by this prompt |
| --individual | -i | True | Turn off grid mode (deprecated; leave off --grid instead) |
| --outdir <path> | -o<path> | outputs/img_samples | Temporarily change the location of these images |
| --seamless | | False | Activate seamless tiling for interesting effects |
| --log_tokenization | -t | False | Display a color-coded list of the parsed tokens derived from the prompt |
| --skip_normalization| -x | False | Weighted subprompts will not be normalized. See [Weighted Prompts](./OTHER.md#weighted-prompts) |
| --upscale <int> <float> | -U <int> <float> | -U 1 0.75| Upscale image by magnification factor (2, 4), and set strength of upscaling (0.0-1.0). If strength not set, will default to 0.75. |
| --facetool_strength <float> | -G <float> | -G0 | Fix faces (defaults to using the GFPGAN algorithm); argument indicates how hard the algorithm should try (0.0-1.0) |
| --facetool <name> | -ft <name> | -ft gfpgan | Select face restoration algorithm to use: gfpgan, codeformer |
| --codeformer_fidelity | -cf <float> | 0.75 | Used along with CodeFormer. Takes values between 0 and 1. 0 produces high quality but low accuracy. 1 produces high accuracy but low quality |
| --save_original | -save_orig| False | When upscaling or fixing faces, this will cause the original image to be saved rather than replaced. |
| --variation <float> |-v<float>| 0.0 | Add a bit of noise (0.0=none, 1.0=high) to the image in order to generate a series of variations. Usually used in combination with -S<seed> and -n<int> to generate a series a riffs on a starting image. See [Variations](./VARIATIONS.md). |
| --with_variations <pattern> | | None | Combine two or more variations. See [Variations](./VARIATIONS.md) for now to use this. |
| --save_intermediates <n> | | None | Save the image from every nth step into an "intermediates" folder inside the output directory |
!!! note
Note that the width and height of the image must be multiples of
64. You can provide different values, but they will be rounded down to
the nearest multiple of 64.
The width and height of the image must be multiples of
64. You can provide different values, but they will be rounded down to
the nearest multiple of 64.
### img2img
### This is an example of img2img:
!!! example ""
~~~~
invoke> waterfall and rainbow -I./vacation-photo.png -W640 -H480 --fit
~~~~
```bash
invoke> waterfall and rainbow -I./vacation-photo.png -W640 -H480 --fit
```
This will modify the indicated vacation photograph by making it more
like the prompt. Results will vary greatly depending on what is in the
image. We also ask to `--fit` the image into a box no bigger than
640x480. Otherwise the image size will be identical to the provided
photo and you may run out of memory if it is large.
This will modify the indicated vacation photograph by making it more
like the prompt. Results will vary greatly depending on what is in the
image. We also ask to --fit the image into a box no bigger than
640x480. Otherwise the image size will be identical to the provided
photo and you may run out of memory if it is large.
In addition to the command-line options recognized by txt2img, img2img
accepts additional options:
@ -214,10 +216,14 @@ well as the --mask (-M) argument:
|--------------------|------------|---------------------|--------------|
| `--init_mask <path>` | `-M<path>` | `None` |Path to an image the same size as the initial_image, with areas for inpainting made transparent.|
## Convenience commands
# Other Commands
In addition to the standard image generation arguments, there are a
series of convenience commands that begin with !:
The CLI offers a number of commands that begin with "!".
## Postprocessing images
To postprocess a file using face restoration or upscaling, use the
`!fix` command.
### `!fix`
@ -250,19 +256,161 @@ Some examples:
Outputs:
[1] outputs/img-samples/000017.4829112.gfpgan-00.png: !fix "outputs/img-samples/0000045.4829112.png" -s 50 -S -W 512 -H 512 -C 7.5 -A k_lms -G 0.8
# Model selection and importation
The CLI allows you to add new models on the fly, as well as to switch
among them rapidly without leaving the script.
## !models
This prints out a list of the models defined in `config/models.yaml'.
The active model is bold-faced
Example:
<pre>
laion400m not loaded <no description>
<b>stable-diffusion-1.4 active Stable Diffusion v1.4</b>
waifu-diffusion not loaded Waifu Diffusion v1.3
</pre>
## !switch <model>
This quickly switches from one model to another without leaving the
CLI script. `invoke.py` uses a memory caching system; once a model
has been loaded, switching back and forth is quick. The following
example shows this in action. Note how the second column of the
`!models` table changes to `cached` after a model is first loaded,
and that the long initialization step is not needed when loading
a cached model.
<pre>
invoke> !models
laion400m not loaded <no description>
<b>stable-diffusion-1.4 cached Stable Diffusion v1.4</b>
waifu-diffusion active Waifu Diffusion v1.3
invoke> !switch waifu-diffusion
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
| LatentDiffusion: Running in eps-prediction mode
| DiffusionWrapper has 859.52 M params.
| Making attention of type 'vanilla' with 512 in_channels
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
| Making attention of type 'vanilla' with 512 in_channels
| Using faster float16 precision
>> Model loaded in 18.24s
>> Max VRAM used to load the model: 2.17G
>> Current VRAM usage:2.17G
>> Setting Sampler to k_lms
invoke> !models
laion400m not loaded <no description>
stable-diffusion-1.4 cached Stable Diffusion v1.4
<b>waifu-diffusion active Waifu Diffusion v1.3</b>
invoke> !switch stable-diffusion-1.4
>> Caching model waifu-diffusion in system RAM
>> Retrieving model stable-diffusion-1.4 from system RAM cache
>> Setting Sampler to k_lms
invoke> !models
laion400m not loaded <no description>
<b>stable-diffusion-1.4 active Stable Diffusion v1.4</b>
waifu-diffusion cached Waifu Diffusion v1.3
</pre>
## !import_model <path/to/model/weights>
This command imports a new model weights file into InvokeAI, makes it
available for image generation within the script, and writes out the
configuration for the model into `config/models.yaml` for use in
subsequent sessions.
Provide `!import_model` with the path to a weights file ending in
`.ckpt`. If you type a partial path and press tab, the CLI will
autocomplete. Although it will also autocomplete to `.vae` files,
these are not currenty supported (but will be soon).
When you hit return, the CLI will prompt you to fill in additional
information about the model, including the short name you wish to use
for it with the `!switch` command, a brief description of the model,
the default image width and height to use with this model, and the
model's configuration file. The latter three fields are automatically
filled with reasonable defaults. In the example below, the bold-faced
text shows what the user typed in with the exception of the width,
height and configuration file paths, which were filled in
automatically.
Example:
<pre>
invoke> <b>!import_model models/ldm/stable-diffusion-v1/ model-epoch08-float16.ckpt</b>
>> Model import in process. Please enter the values needed to configure this model:
Name for this model: <b>waifu-diffusion</b>
Description of this model: <b>Waifu Diffusion v1.3</b>
Configuration file for this model: <b>configs/stable-diffusion/v1-inference.yaml</b>
Default image width: <b>512</b>
Default image height: <b>512</b>
>> New configuration:
waifu-diffusion:
config: configs/stable-diffusion/v1-inference.yaml
description: Waifu Diffusion v1.3
height: 512
weights: models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
width: 512
OK to import [n]? <b>y</b>
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch08-float16.ckpt
| LatentDiffusion: Running in eps-prediction mode
| DiffusionWrapper has 859.52 M params.
| Making attention of type 'vanilla' with 512 in_channels
| Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
| Making attention of type 'vanilla' with 512 in_channels
| Using faster float16 precision
invoke>
</pre>
##!edit_model <name_of_model>
The `!edit_model` command can be used to modify a model that is
already defined in `config/models.yaml`. Call it with the short
name of the model you wish to modify, and it will allow you to
modify the model's `description`, `weights` and other fields.
Example:
<pre>
invoke> <b>!edit_model waifu-diffusion</b>
>> Editing model waifu-diffusion from configuration file ./configs/models.yaml
description: <b>Waifu diffusion v1.4beta</b>
weights: models/ldm/stable-diffusion-v1/<b>model-epoch10-float16.ckpt</b>
config: configs/stable-diffusion/v1-inference.yaml
width: 512
height: 512
>> New configuration:
waifu-diffusion:
config: configs/stable-diffusion/v1-inference.yaml
description: Waifu diffusion v1.4beta
weights: models/ldm/stable-diffusion-v1/model-epoch10-float16.ckpt
height: 512
width: 512
OK to import [n]? y
>> Caching model stable-diffusion-1.4 in system RAM
>> Loading waifu-diffusion from models/ldm/stable-diffusion-v1/model-epoch10-float16.ckpt
...
</pre>
=======
invoke> !fix 000017.4829112.gfpgan-00.png --embiggen 3
...lots of text...
Outputs:
[2] outputs/img-samples/000018.2273800735.embiggen-00.png: !fix "outputs/img-samples/000017.243781548.gfpgan-00.png" -s 50 -S 2273800735 -W 512 -H 512 -C 7.5 -A k_lms --embiggen 3.0 0.75 0.25
```
# History processing
### `!fetch`
This command retrieves the generation parameters from a previously
generated image and either loads them into the command line. You may
provide either the name of a file in the current output directory, or
a full file path.
The CLI provides a series of convenient commands for reviewing previous
actions, retrieving them, modifying them, and re-running them.
```bash
invoke> !fetch 0000015.8929913.png
# the script returns the next line, ready for editing and running:
@ -297,7 +445,23 @@ invoke> !20
invoke> watercolor of beautiful woman sitting under tree wearing broad hat and flowing garment -v0.2 -n6 -S2878767194
```
### `!search <search string>`
## !fetch
This command retrieves the generation parameters from a previously
generated image and either loads them into the command line. You may
provide either the name of a file in the current output directory, or
a full file path.
~~~
invoke> !fetch 0000015.8929913.png
# the script returns the next line, ready for editing and running:
invoke> a fantastic alien landscape -W 576 -H 512 -s 60 -A plms -C 7.5
~~~
Note that this command may behave unexpectedly if given a PNG file that
was not generated by InvokeAI.
### !search <search string>
This is similar to !history but it only returns lines that contain
`search string`. For example:

View File

@ -58,16 +58,13 @@ information underneath the transparent needs to be preserved, not erased.
!!! warning
`img2img` does not work properly on initial images smaller than 512x512. Please scale your
image to at least 512x512 before using it. Larger images are not a problem, but may run out of VRAM on your
GPU card.
To fix this, use the `--fit` option, which downscales the initial image to fit within the box specified
by width x height:
```bash
invoke> "tree on a hill with a river, national geographic" -I./test-pictures/big-sketch.png -H512 -W512 --fit
```
**IMPORTANT ISSUE** `img2img` does not work properly on initial images smaller than 512x512. Please scale your
image to at least 512x512 before using it. Larger images are not a problem, but may run out of VRAM on your
GPU card. To fix this, use the --fit option, which downscales the initial image to fit within the box specified
by width x height:
~~~
tree on a hill with a river, national geographic -I./test-pictures/big-sketch.png -H512 -W512 --fit
~~~
## How does it actually work, though?
@ -77,7 +74,7 @@ gaussian noise and progressively refines it over the requested number of steps,
**Let's start** by thinking about vanilla `prompt2img`, just generating an image from a prompt. If the step count is 10, then the "latent space" (Stable Diffusion's internal representation of the image) for the prompt "fire" with seed `1592514025` develops something like this:
```bash
```commandline
invoke> "fire" -s10 -W384 -H384 -S1592514025
```
@ -112,9 +109,9 @@ With strength `0.4`, the steps look more like this:
Notice how much more fuzzy the starting image is for strength `0.7` compared to `0.4`, and notice also how much longer the sequence is with `0.7`:
| | strength = 0.7 | strength = 0.4 |
| -- | :--: | :--: |
| initial image that SD sees | ![step-0-32](../assets/img2img/000032.step-0.png) | ![step-0-30](../assets/img2img/000030.step-0.png) |
| steps argument to `dream>` | `-S10` | `-S10` |
| -- | -- | -- |
| initial image that SD sees | ![](../assets/img2img/000032.step-0.png) | ![](../assets/img2img/000030.step-0.png) |
| steps argument to `invoke>` | `-S10` | `-S10` |
| steps actually taken | 7 | 4 |
| latent space at each step | ![gravity32](../assets/img2img/000032.steps.gravity.png) | ![gravity30](../assets/img2img/000030.steps.gravity.png) |
| output | ![000032.1592514025](../assets/img2img/000032.1592514025.png) | ![000030.1592514025](../assets/img2img/000030.1592514025.png) |
@ -123,11 +120,13 @@ Both of the outputs look kind of like what I was thinking of. With the strength
If you want to try this out yourself, all of these are using a seed of `1592514025` with a width/height of `384`, step count `10`, the default sampler (`k_lms`), and the single-word prompt `"fire"`:
```bash
If you want to try this out yourself, all of these are using a seed of `1592514025` with a width/height of `384`, step count `10`, the default sampler (`k_lms`), and the single-word prompt `fire`:
```commandline
invoke> "fire" -s10 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png --strength 0.7
```
The code for rendering intermediates is on my (damian0815's) branch [document-img2img](https://github.com/damian0815/InvokeAI/tree/document-img2img) - run `invoke.py` and check your `outputs/img-samples/intermediates` folder while generating an image.
The code for rendering intermediates is on my (damian0815's) branch [document-img2img](https://github.com/damian0815/InvokeAI/tree/document-img2img) - run `invoke.py` and check your `outputs/img-samples/intermediates` folder while generating an image.
### Compensating for the reduced step count
@ -135,7 +134,7 @@ After putting this guide together I was curious to see how the difference would
Here's strength `0.4` (note step count `50`, which is `20 ÷ 0.4` to make sure SD does `20` steps from my image):
```bash
```commandline
invoke> "fire" -s50 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png -f 0.4
```
@ -145,7 +144,7 @@ invoke> "fire" -s50 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png -f 0.4
and here is strength `0.7` (note step count `30`, which is roughly `20 ÷ 0.7` to make sure SD does `20` steps from my image):
```bash
```commandline
invoke> "fire" -s30 -W384 -H384 -S1592514025 -I /tmp/fire-drawing.png -f 0.7
```

View File

@ -6,21 +6,29 @@ title: Inpainting
## **Creating Transparent Regions for Inpainting**
Inpainting is really cool. To do it, you start with an initial image and use a photoeditor to make
one or more regions transparent (i.e. they have a "hole" in them). You then provide the path to this
image at the invoke> command line using the `-I` switch. Stable Diffusion will only paint within the
transparent region.
Inpainting is really cool. To do it, you start with an initial image
and use a photoeditor to make one or more regions transparent
(i.e. they have a "hole" in them). You then provide the path to this
image at the dream> command line using the `-I` switch. Stable
Diffusion will only paint within the transparent region.
There's a catch. In the current implementation, you have to prepare the initial image correctly so
that the underlying colors are preserved under the transparent area. Many imaging editing
applications will by default erase the color information under the transparent pixels and replace
them with white or black, which will lead to suboptimal inpainting. You also must take care to
export the PNG file in such a way that the color information is preserved.
There's a catch. In the current implementation, you have to prepare
the initial image correctly so that the underlying colors are
preserved under the transparent area. Many imaging editing
applications will by default erase the color information under the
transparent pixels and replace them with white or black, which will
lead to suboptimal inpainting. It often helps to apply incomplete
transparency, such as any value between 1 and 99%
If your photoeditor is erasing the underlying color information, `invoke.py` will give you a big fat
warning. If you can't find a way to coax your photoeditor to retain color values under transparent
areas, then you can combine the `-I` and `-M` switches to provide both the original unedited image
and the masked (partially transparent) image:
You also must take care to export the PNG file in such a way that the
color information is preserved. There is often an option in the export
dialog that lets you specify this.
If your photoeditor is erasing the underlying color information,
`dream.py` will give you a big fat warning. If you can't find a way to
coax your photoeditor to retain color values under transparent areas,
then you can combine the `-I` and `-M` switches to provide both the
original unedited image and the masked (partially transparent) image:
```bash
invoke> "man with cat on shoulder" -I./images/man.png -M./images/man-transparent.png
@ -28,6 +36,26 @@ invoke> "man with cat on shoulder" -I./images/man.png -M./images/man-transparent
We are hoping to get rid of the need for this workaround in an upcoming release.
### Inpainting is not changing the masked region enough!
One of the things to understand about how inpainting works is that it
is equivalent to running img2img on just the masked (transparent)
area. img2img builds on top of the existing image data, and therefore
will attempt to preserve colors, shapes and textures to the best of
its ability. Unfortunately this means that if you want to make a
dramatic change in the inpainted region, for example replacing a red
wall with a blue one, the algorithm will fight you.
You have a couple of options. The first is to increase the values of
the requested steps (`-sXXX`), strength (`-f0.XX`), and/or
condition-free guidance (`-CXX.X`). If this is not working for you, a
more extreme step is to provide the `--inpaint_replace 0.X` (`-r0.X`)
option. This value ranges from 0.0 to 1.0. The higher it is the less
attention the algorithm will pay to the data underneath the masked
region. At high values this will enable you to replace colored regions
entirely, but beware that the masked region mayl not blend in with the
surrounding unmasked regions as well.
---
## Recipe for GIMP
@ -35,10 +63,10 @@ We are hoping to get rid of the need for this workaround in an upcoming release.
[GIMP](https://www.gimp.org/) is a popular Linux photoediting tool.
1. Open image in GIMP.
2. Layer --> Transparency --> Add Alpha Channel
3. Use lasoo tool to select region to mask
4. Choose Select --> Float to create a floating selection
5. Open the Layers toolbar (++ctrl+l++) and select "Floating Selection"
2. Layer->Transparency->Add Alpha Channel
3. Use lasso tool to select region to mask
4. Choose Select -> Float to create a floating selection
5. Open the Layers toolbar (^L) and select "Floating Selection"
6. Set opacity to a value between 0% and 99%
7. Export as PNG
8. In the export dialogue, Make sure the "Save colour values from
@ -62,7 +90,7 @@ We are hoping to get rid of the need for this workaround in an upcoming release.
3. Because we'll be applying a mask over the area we want to preserve, you should now select the inverse by using the ++shift+ctrl+i++ shortcut, or right clicking and using the "Select Inverse" option.
4. You'll now create a mask by selecting the image layer, and Masking the selection. Make sure that you don't delete any of the undrlying image, or your inpainting results will be dramatically impacted.
4. You'll now create a mask by selecting the image layer, and Masking the selection. Make sure that you don't delete any of the underlying image, or your inpainting results will be dramatically impacted.
<figure markdown>
![step4](../assets/step4.png)

View File

@ -70,7 +70,7 @@ If you do not explicitly specify an upscaling_strength, it will default to 0.75.
### Face Restoration
`-G : <gfpgan_strength>`
`-G : <facetool_strength>`
This prompt argument controls the strength of the face restoration that is being
applied. Similar to upscaling, values between `0.5 to 0.8` are recommended.

View File

@ -51,7 +51,15 @@ While that is downloading, open Terminal and run the following commands one at a
brew install cmake protobuf rust
```
Then choose the kind of your Mac and install miniconda:
Then clone the InvokeAI repository:
```bash title="Clone the InvokeAI repository:
# Clone the Invoke AI repo
git clone https://github.com/invoke-ai/InvokeAI.git
cd InvokeAI
```
Choose the appropriate architecture for your system and install miniconda:
=== "M1 arm64"
@ -81,7 +89,7 @@ While that is downloading, open Terminal and run the following commands one at a
!!! todo "Clone the Invoke AI repo"
```bash
```bash
git clone https://github.com/invoke-ai/InvokeAI.git
cd InvokeAI
```
@ -202,7 +210,7 @@ conda update \
---
### "No module named cv2", torch, 'ldm', 'transformers', 'taming', etc
### "No module named cv2", torch, 'invokeai', 'transformers', 'taming', etc
There are several causes of these errors:

File diff suppressed because one or more lines are too long

483
frontend/dist/assets/index.ea68b5f5.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -6,7 +6,7 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>InvokeAI - A Stable Diffusion Toolkit</title>
<link rel="shortcut icon" type="icon" href="/assets/favicon.0d253ced.ico" />
<script type="module" crossorigin src="/assets/index.989a0ca2.js"></script>
<script type="module" crossorigin src="/assets/index.ea68b5f5.js"></script>
<link rel="stylesheet" href="/assets/index.58175ea1.css">
</head>

View File

@ -50,6 +50,7 @@ export const PARAMETERS: { [key: string]: string } = {
maskPath: 'Initial Image Mask',
shouldFitToWidthHeight: 'Fit Initial Image',
seamless: 'Seamless Tiling',
hiresFix: 'High Resolution Optimizations',
};
export const NUMPY_RAND_MIN = 0;

View File

@ -14,10 +14,13 @@ export enum Feature {
FACE_CORRECTION,
IMAGE_TO_IMAGE,
}
/** For each tooltip in the UI, the below feature definitions & props will pull relevant information into the tooltip.
*
* To-do: href & GuideImages are placeholders, and are not currently utilized, but will be updated (along with the tooltip UI) as feature and UI development and we get a better idea on where things "forever homes" will be .
*/
export const FEATURES: Record<Feature, FeatureHelpInfo> = {
[Feature.PROMPT]: {
text: 'This field will take all prompt text, including both content and stylistic terms. CLI Commands will not work in the prompt.',
text: 'This field will take all prompt text, including both content and stylistic terms. While weights can be included in the prompt, standard CLI Commands/parameters will not work.',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
@ -27,17 +30,16 @@ export const FEATURES: Record<Feature, FeatureHelpInfo> = {
guideImage: 'asset/path.gif',
},
[Feature.OTHER]: {
text: 'Additional Options',
href: 'link/to/docs/feature3.html',
text: 'These options will enable alternative processing modes for Invoke. Seamless tiling will work to generate repeating patterns in the output. High Resolution Optimization performs a two-step generation cycle, and should be used at higher resolutions when you desire a more coherent image/composition. ', href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
[Feature.SEED]: {
text: 'Seed values provide an initial set of noise which guide the denoising process.',
text: 'Seed values provide an initial set of noise which guide the denoising process, and can be randomized or populated with a seed from a previous invocation. The Threshold feature can be used to mitigate undesirable outcomes at higher CFG values (try between 0-10), and Perlin can be used to add Perlin noise into the denoising process - Both serve to add variation to your outputs. ',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
[Feature.VARIATIONS]: {
text: 'Try a variation with an amount of between 0 and 1 to change the output image for the set seed.',
text: 'Try a variation with an amount of between 0 and 1 to change the output image for the set seed - Interesting variations on the seed are found between 0.1 and 0.3.',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
@ -47,8 +49,8 @@ export const FEATURES: Record<Feature, FeatureHelpInfo> = {
guideImage: 'asset/path.gif',
},
[Feature.FACE_CORRECTION]: {
text: 'Using GFPGAN or CodeFormer, Face Correction will attempt to identify faces in outputs, and correct any defects/abnormalities. Higher values will apply a stronger corrective pressure on outputs.',
href: 'link/to/docs/feature2.html',
text: 'Using GFPGAN, Face Correction will attempt to identify faces in outputs, and correct any defects/abnormalities. Higher values will apply a stronger corrective pressure on outputs, resulting in more appealing faces (with less respect for accuracy of the original subject).',
href: 'link/to/docs/feature3.html',
guideImage: 'asset/path.gif',
},
[Feature.IMAGE_TO_IMAGE]: {

View File

@ -55,6 +55,7 @@ export declare type CommonGeneratedImageMetadata = {
width: number;
height: number;
seamless: boolean;
hires_fix: boolean;
extra: null | Record<string, never>; // Pending development of RFC #266
};

View File

@ -76,7 +76,7 @@ const makeSocketIOEmitters = (
const { gfpganStrength } = getState().options;
const gfpganParameters = {
gfpgan_strength: gfpganStrength,
facetool_strength: gfpganStrength,
};
socketio.emit('runPostprocessing', imageToProcess, {
type: 'gfpgan',

View File

@ -29,6 +29,7 @@ export const frontendToBackendParameters = (
sampler,
seed,
seamless,
hiresFix,
shouldUseInitImage,
img2imgStrength,
initialImagePath,
@ -59,6 +60,7 @@ export const frontendToBackendParameters = (
sampler_name: sampler,
seed,
seamless,
hires_fix: hiresFix,
progress_images: shouldDisplayInProgress,
};
@ -123,10 +125,11 @@ export const backendToFrontendParameters = (parameters: {
sampler_name,
seed,
seamless,
hires_fix,
progress_images,
variation_amount,
with_variations,
gfpgan_strength,
facetool_strength,
upscale,
init_img,
init_mask,
@ -151,9 +154,9 @@ export const backendToFrontendParameters = (parameters: {
}
}
if (gfpgan_strength > 0) {
if (facetool_strength > 0) {
options.shouldRunGFPGAN = true;
options.gfpganStrength = gfpgan_strength;
options.gfpganStrength = facetool_strength;
}
if (upscale) {
@ -185,6 +188,7 @@ export const backendToFrontendParameters = (parameters: {
options.sampler = sampler_name;
options.seed = seed;
options.seamless = seamless;
options.hiresFix = hires_fix;
}
return options;

View File

@ -16,11 +16,13 @@ import {
setCfgScale,
setGfpganStrength,
setHeight,
setHiresFix,
setImg2imgStrength,
setInitialImagePath,
setMaskPath,
setPrompt,
setSampler,
setSeamless,
setSeed,
setSeedWeights,
setShouldFitToWidthHeight,
@ -116,6 +118,7 @@ const ImageMetadataViewer = memo(
steps,
cfg_scale,
seamless,
hires_fix,
width,
height,
strength,
@ -214,7 +217,14 @@ const ImageMetadataViewer = memo(
<MetadataItem
label="Seamless"
value={seamless}
onClick={() => dispatch(setWidth(seamless))}
onClick={() => dispatch(setSeamless(seamless))}
/>
)}
{hires_fix && (
<MetadataItem
label="High Resolution Optimization"
value={hires_fix}
onClick={() => dispatch(setHiresFix(hires_fix))}
/>
)}
{width && (

View File

@ -0,0 +1,32 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/store';
import { setHiresFix } from './optionsSlice';
import { ChangeEvent } from 'react';
import IAISwitch from '../../common/components/IAISwitch';
/**
* Image output options. Includes width, height, seamless tiling.
*/
const HiresOptions = () => {
const dispatch = useAppDispatch();
const hiresFix = useAppSelector((state: RootState) => state.options.hiresFix);
const handleChangeHiresFix = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setHiresFix(e.target.checked));
return (
<Flex gap={2} direction={'column'}>
<IAISwitch
label="High Res Optimization"
fontSize={'md'}
isChecked={hiresFix}
onChange={handleChangeHiresFix}
/>
</Flex>
);
};
export default HiresOptions;

View File

@ -1,29 +1,14 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/store';
import { setSeamless } from './optionsSlice';
import { ChangeEvent } from 'react';
import IAISwitch from '../../common/components/IAISwitch';
/**
* Image output options. Includes width, height, seamless tiling.
*/
import HiresOptions from './HiresOptions';
import SeamlessOptions from './SeamlessOptions';
const OutputOptions = () => {
const dispatch = useAppDispatch();
const seamless = useAppSelector((state: RootState) => state.options.seamless);
const handleChangeSeamless = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setSeamless(e.target.checked));
return (
<Flex gap={2} direction={'column'}>
<IAISwitch
label="Seamless tiling"
fontSize={'md'}
isChecked={seamless}
onChange={handleChangeSeamless}
/>
<SeamlessOptions />
<HiresOptions />
</Flex>
);
};

View File

@ -0,0 +1,28 @@
import { Flex } from '@chakra-ui/react';
import { RootState } from '../../app/store';
import { useAppDispatch, useAppSelector } from '../../app/store';
import { setSeamless } from './optionsSlice';
import { ChangeEvent } from 'react';
import IAISwitch from '../../common/components/IAISwitch';
const SeamlessOptions = () => {
const dispatch = useAppDispatch();
const seamless = useAppSelector((state: RootState) => state.options.seamless);
const handleChangeSeamless = (e: ChangeEvent<HTMLInputElement>) =>
dispatch(setSeamless(e.target.checked));
return (
<Flex gap={2} direction={'column'}>
<IAISwitch
label="Seamless tiling"
fontSize={'md'}
isChecked={seamless}
onChange={handleChangeSeamless}
/>
</Flex>
);
};
export default SeamlessOptions;

View File

@ -25,6 +25,7 @@ export interface OptionsState {
initialImagePath: string | null;
maskPath: string;
seamless: boolean;
hiresFix: boolean;
shouldFitToWidthHeight: boolean;
shouldGenerateVariations: boolean;
variationAmount: number;
@ -50,6 +51,7 @@ const initialOptionsState: OptionsState = {
perlin: 0,
seed: 0,
seamless: false,
hiresFix: false,
shouldUseInitImage: false,
img2imgStrength: 0.75,
initialImagePath: null,
@ -138,6 +140,9 @@ export const optionsSlice = createSlice({
setSeamless: (state, action: PayloadAction<boolean>) => {
state.seamless = action.payload;
},
setHiresFix: (state, action: PayloadAction<boolean>) => {
state.hiresFix = action.payload;
},
setShouldFitToWidthHeight: (state, action: PayloadAction<boolean>) => {
state.shouldFitToWidthHeight = action.payload;
},
@ -180,6 +185,7 @@ export const optionsSlice = createSlice({
threshold,
perlin,
seamless,
hires_fix,
width,
height,
strength,
@ -256,6 +262,7 @@ export const optionsSlice = createSlice({
if (perlin) state.perlin = perlin;
if (typeof perlin === 'undefined') state.perlin = 0;
if (typeof seamless === 'boolean') state.seamless = seamless;
if (typeof hires_fix === 'boolean') state.hiresFix = hires_fix;
if (width) state.width = width;
if (height) state.height = height;
},
@ -301,6 +308,7 @@ export const {
setSampler,
setSeed,
setSeamless,
setHiresFix,
setImg2imgStrength,
setGfpganStrength,
setUpscalingLevel,

View File

@ -33,6 +33,25 @@ from ldm.invoke.args import metadata_from_png
from ldm.invoke.image_util import InitImageResizer
from ldm.invoke.devices import choose_torch_device, choose_precision
from ldm.invoke.conditioning import get_uc_and_c
from ldm.invoke.model_cache import ModelCache
def fix_func(orig):
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
def new_func(*args, **kw):
device = kw.get("device", "mps")
kw["device"]="cpu"
return orig(*args, **kw).to(device)
return new_func
return orig
torch.rand = fix_func(torch.rand)
torch.rand_like = fix_func(torch.rand_like)
torch.randn = fix_func(torch.randn)
torch.randn_like = fix_func(torch.randn_like)
torch.randint = fix_func(torch.randint)
torch.randint_like = fix_func(torch.randint_like)
torch.bernoulli = fix_func(torch.bernoulli)
torch.multinomial = fix_func(torch.multinomial)
def fix_func(orig):
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
@ -141,12 +160,11 @@ class Generate:
esrgan=None,
free_gpu_mem=False,
):
models = OmegaConf.load(conf)
mconfig = models[model]
self.weights = mconfig.weights if weights is None else weights
self.config = mconfig.config if config is None else config
self.height = mconfig.height
self.width = mconfig.width
mconfig = OmegaConf.load(conf)
self.model_name = model
self.height = None
self.width = None
self.model_cache = None
self.iterations = 1
self.steps = 50
self.cfg_scale = 7.5
@ -155,8 +173,10 @@ class Generate:
self.precision = precision
self.strength = 0.75
self.seamless = False
self.hires_fix = False
self.embedding_path = embedding_path
self.model = None # empty for now
self.model_hash = None
self.sampler = None
self.device = None
self.session_peakmem = None
@ -167,11 +187,13 @@ class Generate:
self.codeformer = codeformer
self.esrgan = esrgan
self.free_gpu_mem = free_gpu_mem
self.size_matters = True # used to warn once about large image sizes and VRAM
# Note that in previous versions, there was an option to pass the
# device to Generate(). However the device was then ignored, so
# it wasn't actually doing anything. This logic could be reinstated.
device_type = choose_torch_device()
print(f'>> Using device_type {device_type}')
self.device = torch.device(device_type)
if full_precision:
if self.precision != 'auto':
@ -182,6 +204,9 @@ class Generate:
if self.precision == 'auto':
self.precision = choose_precision(self.device)
# model caching system for fast switching
self.model_cache = ModelCache(mconfig,self.device,self.precision)
# for VRAM usage statistics
self.session_peakmem = torch.cuda.max_memory_allocated() if self._has_cuda else None
transformers.logging.set_verbosity_error()
@ -249,10 +274,12 @@ class Generate:
embiggen_tiles = None,
# these are specific to GFPGAN/ESRGAN
facetool = None,
gfpgan_strength = 0,
facetool_strength = 0,
codeformer_fidelity = None,
save_original = False,
upscale = None,
# this is specific to inpainting and causes more extreme inpainting
inpaint_replace = 0.0,
# Set this True to handle KeyboardInterrupt internally
catch_interrupts = False,
hires_fix = False,
@ -269,9 +296,10 @@ class Generate:
height // height of image, in multiples of 64 (512)
cfg_scale // how strongly the prompt influences the image (7.5) (must be >1)
seamless // whether the generated image should tile
hires_fix // whether the Hires Fix should be applied during generation
init_img // path to an initial image
strength // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
gfpgan_strength // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
facetool_strength // strength for GFPGAN/CodeFormer. 0.0 preserves image exactly, 1.0 replaces it completely
ddim_eta // image randomness (eta=0.0 means the same seed always produces the same image)
step_callback // a function or method that will be called each step
image_callback // a function or method that will be called each time an image is generated
@ -302,6 +330,7 @@ class Generate:
width = width or self.width
height = height or self.height
seamless = seamless or self.seamless
hires_fix = hires_fix or self.hires_fix
cfg_scale = cfg_scale or self.cfg_scale
ddim_eta = ddim_eta or self.ddim_eta
iterations = iterations or self.iterations
@ -312,7 +341,12 @@ class Generate:
with_variations = [] if with_variations is None else with_variations
# will instantiate the model or return it from cache
model = self.load_model()
model = self.set_model(self.model_name)
# self.width and self.height are set by set_model()
# to the width and height of the image training set
width = width or self.width
height = height or self.height
for m in model.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
@ -344,6 +378,7 @@ class Generate:
f'variation weights must be in [0.0, 1.0]: got {[weight for _, weight in with_variations]}'
width, height, _ = self._resolution_check(width, height, log=True)
assert inpaint_replace >=0.0 and inpaint_replace <= 1.0,'inpaint_replace must be between 0.0 and 1.0'
if sampler_name and (sampler_name != self.sampler_name):
self.sampler_name = sampler_name
@ -371,6 +406,8 @@ class Generate:
height,
fit=fit,
)
# TODO: Hacky selection of operation to perform. Needs to be refactored.
if (init_image is not None) and (mask_image is not None):
generator = self._make_inpaint()
elif (embiggen != None or embiggen_tiles != None):
@ -385,6 +422,7 @@ class Generate:
generator.set_variation(
self.seed, variation_amount, with_variations
)
results = generator.generate(
prompt,
iterations=iterations,
@ -406,6 +444,7 @@ class Generate:
perlin=perlin,
embiggen=embiggen,
embiggen_tiles=embiggen_tiles,
inpaint_replace=inpaint_replace,
)
if init_color:
@ -413,11 +452,11 @@ class Generate:
reference_image_path = init_color,
image_callback = image_callback)
if upscale is not None or gfpgan_strength > 0:
if upscale is not None or facetool_strength > 0:
self.upscale_and_reconstruct(results,
upscale = upscale,
facetool = facetool,
strength = gfpgan_strength,
strength = facetool_strength,
codeformer_fidelity = codeformer_fidelity,
save_original = save_original,
image_callback = image_callback)
@ -460,7 +499,7 @@ class Generate:
self,
image_path,
tool = 'gfpgan', # one of 'upscale', 'gfpgan', 'codeformer', 'outpaint', or 'embiggen'
gfpgan_strength = 0.0,
facetool_strength = 0.0,
codeformer_fidelity = 0.75,
upscale = None,
out_direction = None,
@ -507,11 +546,11 @@ class Generate:
facetool = 'codeformer'
elif tool == 'upscale':
facetool = 'gfpgan' # but won't be run
gfpgan_strength = 0
facetool_strength = 0
return self.upscale_and_reconstruct(
[[image,seed]],
facetool = facetool,
strength = gfpgan_strength,
strength = facetool_strength,
codeformer_fidelity = codeformer_fidelity,
save_original = save_original,
upscale = upscale,
@ -602,8 +641,9 @@ class Generate:
# this returns a torch tensor
init_mask = self._create_init_mask(image, width, height, fit=fit)
if (image.width * image.height) > (self.width * self.height):
if (image.width * image.height) > (self.width * self.height) and self.size_matters:
print(">> This input is larger than your defaults. If you run out of memory, please use a smaller image.")
self.size_matters = False
init_image = self._create_init_image(image,width,height,fit=fit) # this returns a torch tensor
@ -653,29 +693,40 @@ class Generate:
return self.generators['inpaint']
def load_model(self):
"""Load and initialize the model from configuration variables passed at object creation time"""
if self.model is None:
seed_everything(random.randrange(0, np.iinfo(np.uint32).max))
try:
model = self._load_model_from_config(self.config, self.weights)
if self.embedding_path is not None:
model.embedding_manager.load(
self.embedding_path, self.precision == 'float32' or self.precision == 'autocast'
)
self.model = model.to(self.device)
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
self.model.cond_stage_model.device = self.device
except AttributeError as e:
print(f'>> Error loading model. {str(e)}', file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
raise SystemExit from e
'''
preload model identified in self.model_name
'''
self.set_model(self.model_name)
self._set_sampler()
def set_model(self,model_name):
"""
Given the name of a model defined in models.yaml, will load and initialize it
and return the model object. Previously-used models will be cached.
"""
if self.model_name == model_name and self.model is not None:
return self.model
for m in self.model.modules():
if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
m._orig_padding_mode = m.padding_mode
model_data = self.model_cache.get_model(model_name)
if model_data is None or len(model_data) == 0:
print(f'** Model switch failed **')
return self.model
self.model = model_data['model']
self.width = model_data['width']
self.height= model_data['height']
self.model_hash = model_data['hash']
# uncache generators so they pick up new models
self.generators = {}
seed_everything(random.randrange(0, np.iinfo(np.uint32).max))
if self.embedding_path is not None:
model.embedding_manager.load(
self.embedding_path, self.precision == 'float32' or self.precision == 'autocast'
)
self._set_sampler()
self.model_name = model_name
return self.model
def correct_colors(self,
@ -779,53 +830,6 @@ class Generate:
print(msg)
# Be warned: config is the path to the model config file, not the invoke conf file!
# Also note that we can get config and weights from self, so why do we need to
# pass them as args?
def _load_model_from_config(self, config, weights):
print(f'>> Loading model from {weights}')
# for usage statistics
device_type = choose_torch_device()
if device_type == 'cuda':
torch.cuda.reset_peak_memory_stats()
tic = time.time()
# this does the work
c = OmegaConf.load(config)
with open(weights,'rb') as f:
weight_bytes = f.read()
self.model_hash = self._cached_sha256(weights,weight_bytes)
pl_sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu')
del weight_bytes
sd = pl_sd['state_dict']
model = instantiate_from_config(c.model)
m, u = model.load_state_dict(sd, strict=False)
if self.precision == 'float16':
print('>> Using faster float16 precision')
model.to(torch.float16)
else:
print('>> Using more accurate float32 precision')
model.to(self.device)
model.eval()
# usage statistics
toc = time.time()
print(
f'>> Model loaded in', '%4.2fs' % (toc - tic)
)
if self._has_cuda():
print(
'>> Max VRAM used to load the model:',
'%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9),
'\n>> Current VRAM usage:'
'%4.2fG' % (torch.cuda.memory_allocated() / 1e9),
)
return model
def _load_img(self, img, width, height)->Image:
if isinstance(img, Image.Image):
image = img
@ -969,26 +973,6 @@ class Generate:
def _has_cuda(self):
return self.device.type == 'cuda'
def _cached_sha256(self,path,data):
dirname = os.path.dirname(path)
basename = os.path.basename(path)
base, _ = os.path.splitext(basename)
hashpath = os.path.join(dirname,base+'.sha256')
if os.path.exists(hashpath) and os.path.getmtime(path) <= os.path.getmtime(hashpath):
with open(hashpath) as f:
hash = f.read()
return hash
print(f'>> Calculating sha256 hash of weights file')
tic = time.time()
sha = hashlib.sha256()
sha.update(data)
hash = sha.hexdigest()
toc = time.time()
print(f'>> sha256 = {hash}','(%4.2fs)' % (toc - tic))
with open(hashpath,'w') as f:
f.write(hash)
return hash
def write_intermediate_images(self,modulus,path):
counter = -1
if not os.path.exists(path):

View File

@ -239,12 +239,17 @@ class Args(object):
switches.append(f'--init_color {a["init_color"]}')
if a['strength'] and a['strength']>0:
switches.append(f'-f {a["strength"]}')
if a['inpaint_replace']:
switches.append(f'--inpaint_replace')
else:
switches.append(f'-A {a["sampler_name"]}')
# gfpgan-specific parameters
if a['gfpgan_strength']:
switches.append(f'-G {a["gfpgan_strength"]}')
# facetool-specific parameters, only print if running facetool
if a['facetool_strength']:
switches.append(f'-G {a["facetool_strength"]}')
switches.append(f'-ft {a["facetool"]}')
if a["facetool"] == "codeformer":
switches.append(f'-cf {a["codeformer_fidelity"]}')
if a['outcrop']:
switches.append(f'-c {" ".join([str(u) for u in a["outcrop"]])}')
@ -262,11 +267,12 @@ class Args(object):
# outpainting parameters
if a['out_direction']:
switches.append(f'-D {" ".join([str(u) for u in a["out_direction"]])}')
# LS: slight semantic drift which needs addressing in the future:
# 1. Variations come out of the stored metadata as a packed string with the keyword "variations"
# 2. However, they come out of the CLI (and probably web) with the keyword "with_variations" and
# in broken-out form. Variation (1) should be changed to comply with (2)
if a['with_variations']:
if a['with_variations'] and len(a['with_variations'])>0:
formatted_variations = ','.join(f'{seed}:{weight}' for seed, weight in (a["with_variations"]))
switches.append(f'-V {formatted_variations}')
if 'variations' in a and len(a['variations'])>0:
@ -372,6 +378,14 @@ class Args(object):
default='stable-diffusion-1.4',
help='Indicates which diffusion model to load. (currently "stable-diffusion-1.4" (default) or "laion400m")',
)
model_group.add_argument(
'--png_compression','-z',
type=int,
default=6,
choices=range(0,9),
dest='png_compression',
help='level of PNG compression, from 0 (none) to 9 (maximum). Default is 6.'
)
model_group.add_argument(
'--sampler',
'-A',
@ -643,6 +657,14 @@ class Args(object):
dest='save_intermediates',
help='Save every nth intermediate image into an "intermediates" directory within the output directory'
)
render_group.add_argument(
'--png_compression','-z',
type=int,
default=6,
choices=range(0,10),
dest='png_compression',
help='level of PNG compression, from 0 (none) to 9 (maximum). Default is 6.'
)
img2img_group.add_argument(
'-I',
'--init_img',
@ -690,6 +712,13 @@ class Args(object):
metavar=('direction','pixels'),
help='Outcrop the image with one or more direction/pixel pairs: -c top 64 bottom 128 left 64 right 64',
)
img2img_group.add_argument(
'-r',
'--inpaint_replace',
type=float,
default=0.0,
help='when inpainting, adjust how aggressively to replace the part of the picture under the mask, from 0.0 (a gentle merge) to 1.0 (replace entirely)',
)
postprocessing_group.add_argument(
'-ft',
'--facetool',
@ -699,6 +728,7 @@ class Args(object):
)
postprocessing_group.add_argument(
'-G',
'--facetool_strength',
'--gfpgan_strength',
type=float,
help='The strength at which to apply the face restoration to the result.',
@ -795,7 +825,8 @@ def metadata_dumps(opt,
# remove any image keys not mentioned in RFC #266
rfc266_img_fields = ['type','postprocessing','sampler','prompt','seed','variations','steps',
'cfg_scale','threshold','perlin','step_number','width','height','extra','strength']
'cfg_scale','threshold','perlin','step_number','width','height','extra','strength',
'init_img','init_mask']
rfc_dict ={}
@ -816,11 +847,15 @@ def metadata_dumps(opt,
# 'variations' should always exist and be an array, empty or consisting of {'seed': seed, 'weight': weight} pairs
rfc_dict['variations'] = [{'seed':x[0],'weight':x[1]} for x in opt.with_variations] if opt.with_variations else []
# if variations are present then we need to replace 'seed' with 'orig_seed'
if hasattr(opt,'first_seed'):
rfc_dict['seed'] = opt.first_seed
if opt.init_img:
rfc_dict['type'] = 'img2img'
rfc_dict['strength_steps'] = rfc_dict.pop('strength')
rfc_dict['orig_hash'] = calculate_init_img_hash(opt.init_img)
rfc_dict['sampler'] = 'ddim' # TODO: FIX ME WHEN IMG2IMG SUPPORTS ALL SAMPLERS
rfc_dict['type'] = 'img2img'
rfc_dict['strength_steps'] = rfc_dict.pop('strength')
rfc_dict['orig_hash'] = calculate_init_img_hash(opt.init_img)
rfc_dict['inpaint_replace'] = opt.inpaint_replace
else:
rfc_dict['type'] = 'txt2img'
rfc_dict.pop('strength')

View File

@ -5,6 +5,7 @@ including img2img, txt2img, and inpaint
import torch
import numpy as np
import random
import os
from tqdm import tqdm, trange
from PIL import Image
from einops import rearrange, repeat
@ -168,3 +169,14 @@ class Generator():
return v2
# this is a handy routine for debugging use. Given a generated sample,
# convert it into a PNG image and store it at the indicated path
def save_sample(self, sample, filepath):
image = self.sample_to_image(sample)
dirname = os.path.dirname(filepath) or '.'
if not os.path.exists(dirname):
print(f'** creating directory {dirname}')
os.makedirs(dirname, exist_ok=True)
image.save(filepath,'PNG')

View File

@ -18,7 +18,7 @@ class Inpaint(Img2Img):
@torch.no_grad()
def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta,
conditioning,init_image,mask_image,strength,
step_callback=None,**kwargs):
step_callback=None,inpaint_replace=False,**kwargs):
"""
Returns a function returning an image derived from the prompt and
the initial image + mask. Return value depends on the seed at
@ -58,6 +58,14 @@ class Inpaint(Img2Img):
noise=x_T
)
# to replace masked area with latent noise, weighted by inpaint_replace strength
if inpaint_replace > 0.0:
print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}')
l_noise = self.get_noise(kwargs['width'],kwargs['height'])
inverted_mask = 1.0-mask_image # there will be 1s where the mask is
masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise
z_enc = z_enc * mask_image + masked_region
# decode it
samples = sampler.decode(
z_enc,

281
ldm/invoke/model_cache.py Normal file
View File

@ -0,0 +1,281 @@
'''
Manage a cache of Stable Diffusion model files for fast switching.
They are moved between GPU and CPU as necessary. If CPU memory falls
below a preset minimum, the least recently used model will be
cleared and loaded from disk when next needed.
'''
import torch
import os
import io
import time
import gc
import hashlib
import psutil
import transformers
from sys import getrefcount
from omegaconf import OmegaConf
from omegaconf.errors import ConfigAttributeError
from ldm.util import instantiate_from_config
GIGS=2**30
AVG_MODEL_SIZE=2.1*GIGS
DEFAULT_MIN_AVAIL=2*GIGS
class ModelCache(object):
def __init__(self, config:OmegaConf, device_type:str, precision:str, min_avail_mem=DEFAULT_MIN_AVAIL):
'''
Initialize with the path to the models.yaml config file,
the torch device type, and precision. The optional
min_avail_mem argument specifies how much unused system
(CPU) memory to preserve. The cache of models in RAM will
grow until this value is approached. Default is 2G.
'''
# prevent nasty-looking CLIP log message
transformers.logging.set_verbosity_error()
self.config = config
self.precision = precision
self.device = torch.device(device_type)
self.min_avail_mem = min_avail_mem
self.models = {}
self.stack = [] # this is an LRU FIFO
self.current_model = None
def get_model(self, model_name:str):
'''
Given a model named identified in models.yaml, return
the model object. If in RAM will load into GPU VRAM.
If on disk, will load from there.
'''
if model_name not in self.config:
print(f'** "{model_name}" is not a known model name. Please check your models.yaml file')
return None
if self.current_model != model_name:
self.unload_model(self.current_model)
if model_name in self.models:
requested_model = self.models[model_name]['model']
print(f'>> Retrieving model {model_name} from system RAM cache')
self.models[model_name]['model'] = self._model_from_cpu(requested_model)
width = self.models[model_name]['width']
height = self.models[model_name]['height']
hash = self.models[model_name]['hash']
else:
self._check_memory()
try:
requested_model, width, height, hash = self._load_model(model_name)
self.models[model_name] = {}
self.models[model_name]['model'] = requested_model
self.models[model_name]['width'] = width
self.models[model_name]['height'] = height
self.models[model_name]['hash'] = hash
except Exception as e:
print(f'** model {model_name} could not be loaded: {str(e)}')
print(f'** restoring {self.current_model}')
return self.get_model(self.current_model)
self.current_model = model_name
self._push_newest_model(model_name)
return {
'model':requested_model,
'width':width,
'height':height,
'hash': hash
}
def list_models(self) -> dict:
'''
Return a dict of models in the format:
{ model_name1: {'status': ('active'|'cached'|'not loaded'),
'description': description,
},
model_name2: { etc }
'''
result = {}
for name in self.config:
try:
description = self.config[name].description
except ConfigAttributeError:
description = '<no description>'
if self.current_model == name:
status = 'active'
elif name in self.models:
status = 'cached'
else:
status = 'not loaded'
result[name]={}
result[name]['status']=status
result[name]['description']=description
return result
def print_models(self):
'''
Print a table of models, their descriptions, and load status
'''
models = self.list_models()
for name in models:
line = f'{name:25s} {models[name]["status"]:>10s} {models[name]["description"]}'
if models[name]['status'] == 'active':
print(f'\033[1m{line}\033[0m')
else:
print(line)
def add_model(self, model_name:str, model_attributes:dict, clobber=False) ->str:
'''
Update the named model with a dictionary of attributes. Will fail with an
assertion error if the name already exists. Pass clobber=True to overwrite.
On a successful update, the config will be changed in memory and a YAML
string will be returned.
'''
omega = self.config
# check that all the required fields are present
for field in ('description','weights','height','width','config'):
assert field in model_attributes, f'required field {field} is missing'
assert (clobber or model_name not in omega), f'attempt to overwrite existing model definition "{model_name}"'
config = omega[model_name] if model_name in omega else {}
for field in model_attributes:
config[field] = model_attributes[field]
omega[model_name] = config
return OmegaConf.to_yaml(omega)
def _check_memory(self):
avail_memory = psutil.virtual_memory()[1]
if AVG_MODEL_SIZE + self.min_avail_mem > avail_memory:
least_recent_model = self._pop_oldest_model()
if least_recent_model is not None:
del self.models[least_recent_model]
gc.collect()
def _load_model(self, model_name:str):
"""Load and initialize the model from configuration variables passed at object creation time"""
if model_name not in self.config:
print(f'"{model_name}" is not a known model name. Please check your models.yaml file')
return None
mconfig = self.config[model_name]
config = mconfig.config
weights = mconfig.weights
width = mconfig.width
height = mconfig.height
print(f'>> Loading {model_name} from {weights}')
# for usage statistics
if self._has_cuda():
torch.cuda.reset_peak_memory_stats()
torch.cuda.empty_cache()
tic = time.time()
# this does the work
c = OmegaConf.load(config)
with open(weights,'rb') as f:
weight_bytes = f.read()
model_hash = self._cached_sha256(weights,weight_bytes)
pl_sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu')
del weight_bytes
sd = pl_sd['state_dict']
model = instantiate_from_config(c.model)
m, u = model.load_state_dict(sd, strict=False)
if self.precision == 'float16':
print(' | Using faster float16 precision')
model.to(torch.float16)
else:
print(' | Using more accurate float32 precision')
model.to(self.device)
# model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here
model.cond_stage_model.device = self.device
model.eval()
for m in model.modules():
if isinstance(m, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)):
m._orig_padding_mode = m.padding_mode
# usage statistics
toc = time.time()
print(f'>> Model loaded in', '%4.2fs' % (toc - tic))
if self._has_cuda():
print(
'>> Max VRAM used to load the model:',
'%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9),
'\n>> Current VRAM usage:'
'%4.2fG' % (torch.cuda.memory_allocated() / 1e9),
)
return model, width, height, model_hash
def unload_model(self, model_name:str):
if model_name not in self.models:
return
print(f'>> Caching model {model_name} in system RAM')
model = self.models[model_name]['model']
self.models[model_name]['model'] = self._model_to_cpu(model)
gc.collect()
if self._has_cuda():
torch.cuda.empty_cache()
def _model_to_cpu(self,model):
if self.device != 'cpu':
model.cond_stage_model.device = 'cpu'
model.first_stage_model.to('cpu')
model.cond_stage_model.to('cpu')
model.model.to('cpu')
return model.to('cpu')
else:
return model
def _model_from_cpu(self,model):
if self.device != 'cpu':
model.to(self.device)
model.first_stage_model.to(self.device)
model.cond_stage_model.to(self.device)
model.cond_stage_model.device = self.device
return model
def _pop_oldest_model(self):
'''
Remove the first element of the FIFO, which ought
to be the least recently accessed model. Do not
pop the last one, because it is in active use!
'''
if len(self.stack) > 1:
return self.stack.pop(0)
def _push_newest_model(self,model_name:str):
'''
Maintain a simple FIFO. First element is always the
least recent, and last element is always the most recent.
'''
try:
self.stack.remove(model_name)
except ValueError:
pass
self.stack.append(model_name)
def _has_cuda(self):
return self.device.type == 'cuda'
def _cached_sha256(self,path,data):
dirname = os.path.dirname(path)
basename = os.path.basename(path)
base, _ = os.path.splitext(basename)
hashpath = os.path.join(dirname,base+'.sha256')
if os.path.exists(hashpath) and os.path.getmtime(path) <= os.path.getmtime(hashpath):
with open(hashpath) as f:
hash = f.read()
return hash
print(f'>> Calculating sha256 hash of weights file')
tic = time.time()
sha = hashlib.sha256()
sha.update(data)
hash = sha.hexdigest()
toc = time.time()
print(f'>> sha256 = {hash}','(%4.2fs)' % (toc - tic))
with open(hashpath,'w') as f:
f.write(hash)
return hash

View File

@ -33,13 +33,13 @@ class PngWriter:
# saves image named _image_ to outdir/name, writing metadata from prompt
# returns full path of output
def save_image_and_prompt_to_png(self, image, dream_prompt, name, metadata=None):
def save_image_and_prompt_to_png(self, image, dream_prompt, name, metadata=None, compress_level=6):
path = os.path.join(self.outdir, name)
info = PngImagePlugin.PngInfo()
info.add_text('Dream', dream_prompt)
if metadata:
info.add_text('sd-metadata', json.dumps(metadata))
image.save(path, 'PNG', pnginfo=info)
image.save(path, 'PNG', pnginfo=info, compress_level=compress_level)
return path
def retrieve_metadata(self,img_basename):

View File

@ -21,6 +21,8 @@ except (ImportError,ModuleNotFoundError):
readline_available = False
IMG_EXTENSIONS = ('.png','.jpg','.jpeg','.PNG','.JPG','.JPEG','.gif','.GIF')
WEIGHT_EXTENSIONS = ('.ckpt','.bae')
CONFIG_EXTENSIONS = ('.yaml','.yml')
COMMANDS = (
'--steps','-s',
'--seed','-S',
@ -42,13 +44,25 @@ COMMANDS = (
'--embedding_path',
'--device',
'--grid','-g',
'--gfpgan_strength','-G',
'--facetool','-ft',
'--facetool_strength','-G',
'--codeformer_fidelity','-cf',
'--upscale','-U',
'-save_orig','--save_original',
'--skip_normalize','-x',
'--log_tokenization','-t',
'--hires_fix',
'--inpaint_replace','-r',
'--png_compression','-z',
'!fix','!fetch','!history','!search','!clear',
'!models','!switch','!import_model','!edit_model'
)
MODEL_COMMANDS = (
'!switch',
'!edit_model',
)
WEIGHT_COMMANDS = (
'!import_model',
)
IMG_PATH_COMMANDS = (
'--outdir[=\s]',
@ -61,16 +75,19 @@ IMG_FILE_COMMANDS=(
'--init_color[=\s]',
'--embedding_path[=\s]',
)
path_regexp = '('+'|'.join(IMG_PATH_COMMANDS+IMG_FILE_COMMANDS) + ')\s*\S*$'
path_regexp = '('+'|'.join(IMG_PATH_COMMANDS+IMG_FILE_COMMANDS) + ')\s*\S*$'
weight_regexp = '('+'|'.join(WEIGHT_COMMANDS) + ')\s*\S*$'
class Completer(object):
def __init__(self, options):
def __init__(self, options, models=[]):
self.options = sorted(options)
self.models = sorted(models)
self.seeds = set()
self.matches = list()
self.default_dir = None
self.linebuffer = None
self.auto_history_active = True
self.extensions = None
return
def complete(self, text, state):
@ -81,7 +98,13 @@ class Completer(object):
buffer = readline.get_line_buffer()
if state == 0:
if re.search(path_regexp,buffer):
# extensions defined, so go directly into path completion mode
if self.extensions is not None:
self.matches = self._path_completions(text, state, self.extensions)
# looking for an image file
elif re.search(path_regexp,buffer):
do_shortcut = re.search('^'+'|'.join(IMG_FILE_COMMANDS),buffer)
self.matches = self._path_completions(text, state, IMG_EXTENSIONS,shortcut_ok=do_shortcut)
@ -89,6 +112,13 @@ class Completer(object):
elif re.search('(-S\s*|--seed[=\s])\d*$',buffer):
self.matches= self._seed_completions(text,state)
# looking for a model
elif re.match('^'+'|'.join(MODEL_COMMANDS),buffer):
self.matches= self._model_completions(text, state)
elif re.search(weight_regexp,buffer):
self.matches = self._path_completions(text, state, WEIGHT_EXTENSIONS)
# This is the first time for this text, so build a match list.
elif text:
self.matches = [
@ -105,6 +135,13 @@ class Completer(object):
response = None
return response
def complete_extensions(self, extensions:list):
'''
If called with a list of extensions, will force completer
to do file path completions.
'''
self.extensions=extensions
def add_history(self,line):
'''
Pass thru to readline
@ -189,6 +226,21 @@ class Completer(object):
matches.sort()
return matches
def _model_completions(self, text, state):
m = re.search('(!switch\s+)(\w*)',text)
if m:
switch = m.groups()[0]
partial = m.groups()[1]
else:
switch = ''
partial = text
matches = list()
for s in self.models:
if s.startswith(partial):
matches.append(switch+s)
matches.sort()
return matches
def _pre_input_hook(self):
if self.linebuffer:
readline.insert_text(self.linebuffer)
@ -267,9 +319,9 @@ class DummyCompleter(Completer):
def set_line(self,line):
print(f'# {line}')
def get_completer(opt:Args)->Completer:
def get_completer(opt:Args, models=[])->Completer:
if readline_available:
completer = Completer(COMMANDS)
completer = Completer(COMMANDS,models)
readline.set_completer(
completer.complete

View File

@ -31,12 +31,13 @@ def build_opt(post_data, seed, gfpgan_model_exists):
setattr(opt, 'embiggen', None)
setattr(opt, 'embiggen_tiles', None)
setattr(opt, 'gfpgan_strength', float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0)
setattr(opt, 'facetool_strength', float(post_data['facetool_strength']) if gfpgan_model_exists else 0)
setattr(opt, 'upscale', [int(post_data['upscale_level']), float(post_data['upscale_strength'])] if post_data['upscale_level'] != '' else None)
setattr(opt, 'progress_images', 'progress_images' in post_data)
setattr(opt, 'seed', None if int(post_data['seed']) == -1 else int(post_data['seed']))
setattr(opt, 'threshold', float(post_data['threshold']))
setattr(opt, 'perlin', float(post_data['perlin']))
setattr(opt, 'hires_fix', 'hires_fix' in post_data)
setattr(opt, 'variation_amount', float(post_data['variation_amount']) if int(post_data['seed']) != -1 else 0)
setattr(opt, 'with_variations', [])
setattr(opt, 'embiggen', None)
@ -196,7 +197,7 @@ class DreamServer(BaseHTTPRequestHandler):
) + '\n',"utf-8"))
# control state of the "postprocessing..." message
upscaling_requested = opt.upscale or opt.gfpgan_strength > 0
upscaling_requested = opt.upscale or opt.facetool_strength > 0
nonlocal images_generated # NB: Is this bad python style? It is typical usage in a perl closure.
nonlocal images_upscaled # NB: Is this bad python style? It is typical usage in a perl closure.
if upscaled:

View File

@ -106,7 +106,7 @@ class DDPM(pl.LightningModule):
], 'currently only supporting "eps" and "x0"'
self.parameterization = parameterization
print(
f'{self.__class__.__name__}: Running in {self.parameterization}-prediction mode'
f' | {self.__class__.__name__}: Running in {self.parameterization}-prediction mode'
)
self.cond_stage_model = None
self.clip_denoised = clip_denoised
@ -1353,7 +1353,7 @@ class LatentDiffusion(DDPM):
num_downs = self.first_stage_model.encoder.num_resolutions - 1
rescale_latent = 2 ** (num_downs)
# get top left postions of patches as conforming for the bbbox tokenizer, therefore we
# get top left positions of patches as conforming for the bbbox tokenizer, therefore we
# need to rescale the tl patch coordinates to be in between (0,1)
tl_patch_coordinates = [
(

View File

@ -49,9 +49,15 @@ class Upsample(nn.Module):
padding=1)
def forward(self, x):
cpu_m1_cond = True if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() and \
x.size()[0] * x.size()[1] * x.size()[2] * x.size()[3] % 2**27 == 0 else False
if cpu_m1_cond:
x = x.to('cpu') # send to cpu
x = torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
if self.with_conv:
x = self.conv(x)
if cpu_m1_cond:
x = x.to('mps') # return to mps
return x
@ -117,6 +123,14 @@ class ResnetBlock(nn.Module):
padding=0)
def forward(self, x, temb):
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
x_size = x.size()
if (x_size[0] * x_size[1] * x_size[2] * x_size[3]) % 2**29 == 0:
self.to('cpu')
x = x.to('cpu')
else:
self.to('mps')
x = x.to('mps')
h = self.norm1(x)
h = silu(h)
h = self.conv1(h)
@ -245,7 +259,7 @@ class AttnBlock(nn.Module):
def make_attn(in_channels, attn_type="vanilla"):
assert attn_type in ["vanilla", "linear", "none"], f'attn_type {attn_type} unknown'
print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
print(f" | Making attention of type '{attn_type}' with {in_channels} in_channels")
if attn_type == "vanilla":
return AttnBlock(in_channels)
elif attn_type == "none":
@ -521,7 +535,7 @@ class Decoder(nn.Module):
block_in = ch*ch_mult[self.num_resolutions-1]
curr_res = resolution // 2**(self.num_resolutions-1)
self.z_shape = (1,z_channels,curr_res,curr_res)
print("Working with z of shape {} = {} dimensions.".format(
print(" | Working with z of shape {} = {} dimensions.".format(
self.z_shape, np.prod(self.z_shape)))
# z to block_in

View File

@ -64,7 +64,8 @@ def make_ddim_timesteps(
):
if ddim_discr_method == 'uniform':
c = num_ddpm_timesteps // num_ddim_timesteps
ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))
# ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))
ddim_timesteps = (np.arange(0, num_ddim_timesteps) * c).astype(int)
elif ddim_discr_method == 'quad':
ddim_timesteps = (
(
@ -81,8 +82,8 @@ def make_ddim_timesteps(
# assert ddim_timesteps.shape[0] == num_ddim_timesteps
# add one to get the final alpha values right (the ones from first scale to data during sampling)
# steps_out = ddim_timesteps + 1
steps_out = ddim_timesteps
steps_out = ddim_timesteps + 1
# steps_out = ddim_timesteps
if verbose:
print(f'Selected timesteps for ddim sampler: {steps_out}')

View File

@ -75,7 +75,7 @@ def count_params(model, verbose=False):
total_params = sum(p.numel() for p in model.parameters())
if verbose:
print(
f'{model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.'
f' | {model.__class__.__name__} has {total_params * 1.e-6:.2f} M params.'
)
return total_params

View File

@ -6,7 +6,7 @@
"id": "ycYWcsEKc6w7"
},
"source": [
"# Stable Diffusion AI Notebook (Release 1.14)\n",
"# Stable Diffusion AI Notebook (Release 2.0.0)\n",
"\n",
"<img src=\"https://user-images.githubusercontent.com/60411196/186547976-d9de378a-9de8-4201-9c25-c057a9c59bad.jpeg\" alt=\"stable-diffusion-ai\" width=\"170px\"/> <br>\n",
"#### Instructions:\n",
@ -58,8 +58,8 @@
"from os.path import exists\n",
"\n",
"!git clone --quiet https://github.com/invoke-ai/InvokeAI.git # Original repo\n",
"%cd /content/stable-diffusion/\n",
"!git checkout --quiet tags/release-1.14.1"
"%cd /content/InvokeAI/\n",
"!git checkout --quiet tags/v2.0.0"
]
},
{
@ -79,6 +79,7 @@
"!pip install colab-xterm\n",
"!pip install -r requirements-lin-win-colab-CUDA.txt\n",
"!pip install clean-fid torchtext\n",
"!pip install transformers\n",
"gc.collect()"
]
},
@ -106,7 +107,7 @@
"source": [
"#@title 5. Load small ML models required\n",
"import gc\n",
"%cd /content/stable-diffusion/\n",
"%cd /content/InvokeAI/\n",
"!python scripts/preload_models.py\n",
"gc.collect()"
]
@ -171,18 +172,18 @@
"import os \n",
"\n",
"# Folder creation if it doesn't exist\n",
"if exists(\"/content/stable-diffusion/models/ldm/stable-diffusion-v1\"):\n",
"if exists(\"/content/InvokeAI/models/ldm/stable-diffusion-v1\"):\n",
" print(\"❗ Dir stable-diffusion-v1 already exists\")\n",
"else:\n",
" %mkdir /content/stable-diffusion/models/ldm/stable-diffusion-v1\n",
" %mkdir /content/InvokeAI/models/ldm/stable-diffusion-v1\n",
" print(\"✅ Dir stable-diffusion-v1 created\")\n",
"\n",
"# Symbolic link if it doesn't exist\n",
"if exists(\"/content/stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt\"):\n",
"if exists(\"/content/InvokeAI/models/ldm/stable-diffusion-v1/model.ckpt\"):\n",
" print(\"❗ Symlink already created\")\n",
"else: \n",
" src = model_path\n",
" dst = '/content/stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt'\n",
" dst = '/content/InvokeAI/models/ldm/stable-diffusion-v1/model.ckpt'\n",
" os.symlink(src, dst) \n",
" print(\"✅ Symbolic link created successfully\")"
]
@ -207,7 +208,7 @@
"source": [
"#@title 9. Run Terminal and Execute Dream bot\n",
"#@markdown <font color=\"blue\">Steps:</font> <br>\n",
"#@markdown 1. Execute command `python scripts/dream.py` to run dream bot.<br>\n",
"#@markdown 1. Execute command `python scripts/invoke.py` to run InvokeAI.<br>\n",
"#@markdown 2. After initialized you'll see `Dream>` line.<br>\n",
"#@markdown 3. Example text: `Astronaut floating in a distant galaxy` <br>\n",
"#@markdown 4. To quit Dream bot use: `q` command.<br>\n",
@ -233,7 +234,7 @@
"%matplotlib inline\n",
"\n",
"images = []\n",
"for img_path in sorted(glob.glob('/content/stable-diffusion/outputs/img-samples/*.png'), reverse=True):\n",
"for img_path in sorted(glob.glob('/content/InvokeAI/outputs/img-samples/*.png'), reverse=True):\n",
" images.append(mpimg.imread(img_path))\n",
"\n",
"images = images[:15] \n",

View File

@ -9,6 +9,7 @@ import copy
import warnings
import time
import traceback
import yaml
sys.path.append('.') # corrects a weird problem on Macs
from ldm.invoke.readline import get_completer
from ldm.invoke.args import Args, metadata_dumps, metadata_from_png, dream_cmd_from_png
@ -16,8 +17,6 @@ from ldm.invoke.pngwriter import PngWriter, retrieve_metadata, write_metadata
from ldm.invoke.image_util import make_grid
from ldm.invoke.log import write_log
from omegaconf import OmegaConf
from backend.invoke_ai_web_server import InvokeAIWebServer
def main():
"""Initialize command-line parsers and the diffusion model"""
@ -33,7 +32,7 @@ def main():
print('--weights argument has been deprecated. Please edit ./configs/models.yaml, and select the weights using --model instead.')
sys.exit(-1)
print('* Initializing, be patient...\n')
print('* Initializing, be patient...')
from ldm.generate import Generate
# these two lines prevent a horrible warning message from appearing
@ -42,45 +41,7 @@ def main():
transformers.logging.set_verbosity_error()
# Loading Face Restoration and ESRGAN Modules
try:
gfpgan, codeformer, esrgan = None, None, None
if opt.restore or opt.esrgan:
from ldm.invoke.restoration import Restoration
restoration = Restoration()
if opt.restore:
gfpgan, codeformer = restoration.load_face_restore_models(opt.gfpgan_dir, opt.gfpgan_model_path)
else:
print('>> Face restoration disabled')
if opt.esrgan:
esrgan = restoration.load_esrgan(opt.esrgan_bg_tile)
else:
print('>> Upscaling disabled')
else:
print('>> Face restoration and upscaling disabled')
except (ModuleNotFoundError, ImportError):
print(traceback.format_exc(), file=sys.stderr)
print('>> You may need to install the ESRGAN and/or GFPGAN modules')
# creating a simple text2image object with a handful of
# defaults passed on the command line.
# additional parameters will be added (or overriden) during
# the user input loop
try:
gen = Generate(
conf = opt.conf,
model = opt.model,
sampler_name = opt.sampler_name,
embedding_path = opt.embedding_path,
full_precision = opt.full_precision,
precision = opt.precision,
gfpgan=gfpgan,
codeformer=codeformer,
esrgan=esrgan,
free_gpu_mem=opt.free_gpu_mem,
)
except (FileNotFoundError, IOError, KeyError) as e:
print(f'{e}. Aborting.')
sys.exit(-1)
gfpgan,codeformer,esrgan = load_face_restoration(opt)
# make sure the output directory exists
if not os.path.exists(opt.outdir):
@ -100,6 +61,24 @@ def main():
print(f'{e}. Aborting.')
sys.exit(-1)
# creating a Generate object:
try:
gen = Generate(
conf = opt.conf,
model = opt.model,
sampler_name = opt.sampler_name,
embedding_path = opt.embedding_path,
full_precision = opt.full_precision,
precision = opt.precision,
gfpgan=gfpgan,
codeformer=codeformer,
esrgan=esrgan,
free_gpu_mem=opt.free_gpu_mem,
)
except (FileNotFoundError, IOError, KeyError) as e:
print(f'{e}. Aborting.')
sys.exit(-1)
if opt.seamless:
print(">> changed to seamless tiling mode")
@ -116,7 +95,10 @@ def main():
"\n* Initialization done! Awaiting your command (-h for help, 'q' to quit)"
)
main_loop(gen, opt, infile)
try:
main_loop(gen, opt, infile)
except KeyboardInterrupt:
print("\ngoodbye!")
# TODO: main_loop() has gotten busy. Needs to be refactored.
def main_loop(gen, opt, infile):
@ -124,12 +106,13 @@ def main_loop(gen, opt, infile):
done = False
path_filter = re.compile(r'[<>:"/\\|?*]')
last_results = list()
model_config = OmegaConf.load(opt.conf)[opt.model]
model_config = OmegaConf.load(opt.conf)
# The readline completer reads history from the .dream_history file located in the
# output directory specified at the time of script launch. We do not currently support
# changing the history file midstream when the output directory is changed.
completer = get_completer(opt)
completer = get_completer(opt, models=list(model_config.keys()))
completer.set_default_dir(opt.outdir)
output_cntr = completer.get_current_history_length()+1
# os.pathconf is not available on Windows
@ -141,11 +124,9 @@ def main_loop(gen, opt, infile):
name_max = 255
while not done:
operation = 'generate' # default operation, alternative is 'postprocess'
if completer:
completer.set_default_dir(opt.outdir)
operation = 'generate'
try:
command = get_next_command(infile)
except EOFError:
@ -164,41 +145,10 @@ def main_loop(gen, opt, infile):
break
if command.startswith('!'):
subcommand = command[1:]
command, operation = do_command(command, gen, opt, completer)
if subcommand.startswith('dream'): # in case a stored prompt still contains the !dream command
command = command.replace('!dream ','',1)
elif subcommand.startswith('fix'):
command = command.replace('!fix ','',1)
operation = 'postprocess'
elif subcommand.startswith('fetch'):
file_path = command.replace('!fetch ','',1)
retrieve_dream_command(opt,file_path,completer)
continue
elif subcommand.startswith('history'):
completer.show_history()
continue
elif subcommand.startswith('search'):
search_str = command.replace('!search ','',1)
completer.show_history(search_str)
continue
elif subcommand.startswith('clear'):
completer.clear_history()
continue
elif re.match('^(\d+)',subcommand):
command_no = re.match('^(\d+)',subcommand).groups()[0]
command = completer.get_line(int(command_no))
completer.set_line(command)
continue
else: # not a recognized subcommand, so give the --help text
command = '-h'
if operation is None:
continue
if opt.parse_cmd(command) is None:
continue
@ -218,9 +168,9 @@ def main_loop(gen, opt, infile):
# width and height are set by model if not specified
if not opt.width:
opt.width = model_config.width
opt.width = gen.width
if not opt.height:
opt.height = model_config.height
opt.height = gen.height
# retrieve previous value of init image if requested
if opt.init_img is not None and re.match('^-\\d+$', opt.init_img):
@ -323,6 +273,7 @@ def main_loop(gen, opt, infile):
model_hash = gen.model_hash,
),
name = filename,
compress_level = opt.png_compression,
)
# update rfc metadata
@ -394,13 +345,162 @@ def main_loop(gen, opt, infile):
print('goodbye!')
def do_command(command:str, gen, opt:Args, completer) -> tuple:
operation = 'generate' # default operation, alternative is 'postprocess'
if command.startswith('!dream'): # in case a stored prompt still contains the !dream command
command = command.replace('!dream ','',1)
elif command.startswith('!fix'):
command = command.replace('!fix ','',1)
operation = 'postprocess'
elif command.startswith('!switch'):
model_name = command.replace('!switch ','',1)
gen.set_model(model_name)
completer.add_history(command)
operation = None
elif command.startswith('!models'):
gen.model_cache.print_models()
operation = None
elif command.startswith('!import'):
path = shlex.split(command)
if len(path) < 2:
print('** please provide a path to a .ckpt or .vae model file')
elif not os.path.exists(path[1]):
print(f'** {path[1]}: file not found')
else:
add_weights_to_config(path[1], gen, opt, completer)
completer.add_history(command)
operation = None
elif command.startswith('!edit'):
path = shlex.split(command)
if len(path) < 2:
print('** please provide the name of a model')
else:
edit_config(path[1], gen, opt, completer)
completer.add_history(command)
operation = None
elif command.startswith('!fetch'):
file_path = command.replace('!fetch ','',1)
retrieve_dream_command(opt,file_path,completer)
operation = None
elif command.startswith('!history'):
completer.show_history()
operation = None
elif command.startswith('!search'):
search_str = command.replace('!search ','',1)
completer.show_history(search_str)
operation = None
elif command.startswith('!clear'):
completer.clear_history()
operation = None
elif re.match('^!(\d+)',command):
command_no = re.match('^!(\d+)',command).groups()[0]
command = completer.get_line(int(command_no))
completer.set_line(command)
operation = None
else: # not a recognized command, so give the --help text
command = '-h'
return command, operation
def add_weights_to_config(model_path:str, gen, opt, completer):
print(f'>> Model import in process. Please enter the values needed to configure this model:')
print()
new_config = {}
new_config['weights'] = model_path
done = False
while not done:
model_name = input('Short name for this model: ')
if not re.match('^[\w._-]+$',model_name):
print('** model name must contain only words, digits and the characters [._-] **')
else:
done = True
new_config['description'] = input('Description of this model: ')
completer.complete_extensions(('.yaml','.yml'))
completer.linebuffer = 'configs/stable-diffusion/v1-inference.yaml'
done = False
while not done:
new_config['config'] = input('Configuration file for this model: ')
done = os.path.exists(new_config['config'])
completer.complete_extensions(None)
for field in ('width','height'):
done = False
while not done:
try:
completer.linebuffer = '512'
value = int(input(f'Default image {field}: '))
assert value >= 64 and value <= 2048
new_config[field] = value
done = True
except:
print('** Please enter a valid integer between 64 and 2048')
if write_config_file(opt.conf, gen, model_name, new_config):
gen.set_model(model_name)
def edit_config(model_name:str, gen, opt, completer):
config = gen.model_cache.config
if model_name not in config:
print(f'** Unknown model {model_name}')
return
print(f'\n>> Editing model {model_name} from configuration file {opt.conf}')
conf = config[model_name]
new_config = {}
completer.complete_extensions(('.yaml','.yml','.ckpt','.vae'))
for field in ('description', 'weights', 'config', 'width','height'):
completer.linebuffer = str(conf[field]) if field in conf else ''
new_value = input(f'{field}: ')
new_config[field] = int(new_value) if field in ('width','height') else new_value
completer.complete_extensions(None)
if write_config_file(opt.conf, gen, model_name, new_config, clobber=True):
gen.set_model(model_name)
def write_config_file(conf_path, gen, model_name, new_config, clobber=False):
op = 'modify' if clobber else 'import'
print('\n>> New configuration:')
print(yaml.dump({model_name:new_config}))
if input(f'OK to {op} [n]? ') not in ('y','Y'):
return False
try:
yaml_str = gen.model_cache.add_model(model_name, new_config, clobber)
except AssertionError as e:
print(f'** configuration failed: {str(e)}')
return False
tmpfile = os.path.join(os.path.dirname(conf_path),'new_config.tmp')
with open(tmpfile, 'w') as outfile:
outfile.write(yaml_str)
os.rename(tmpfile,conf_path)
return True
def do_postprocess (gen, opt, callback):
file_path = opt.prompt # treat the prompt as the file pathname
if os.path.dirname(file_path) == '': #basename given
file_path = os.path.join(opt.outdir,file_path)
tool=None
if opt.gfpgan_strength > 0:
if opt.facetool_strength > 0:
tool = opt.facetool
elif opt.embiggen:
tool = 'embiggen'
@ -416,7 +516,7 @@ def do_postprocess (gen, opt, callback):
gen.apply_postprocessor(
image_path = file_path,
tool = tool,
gfpgan_strength = opt.gfpgan_strength,
facetool_strength = opt.facetool_strength,
codeformer_fidelity = opt.codeformer_fidelity,
save_original = opt.save_original,
upscale = opt.upscale,
@ -511,6 +611,7 @@ def get_next_command(infile=None) -> str: # command string
def invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan):
print('\n* --web was specified, starting web server...')
from backend.invoke_ai_web_server import InvokeAIWebServer
# Change working directory to the stable-diffusion directory
os.chdir(
os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
@ -549,6 +650,27 @@ def split_variations(variations_string) -> list:
else:
return parts
def load_face_restoration(opt):
try:
gfpgan, codeformer, esrgan = None, None, None
if opt.restore or opt.esrgan:
from ldm.invoke.restoration import Restoration
restoration = Restoration()
if opt.restore:
gfpgan, codeformer = restoration.load_face_restore_models(opt.gfpgan_dir, opt.gfpgan_model_path)
else:
print('>> Face restoration disabled')
if opt.esrgan:
esrgan = restoration.load_esrgan(opt.esrgan_bg_tile)
else:
print('>> Upscaling disabled')
else:
print('>> Face restoration and upscaling disabled')
except (ModuleNotFoundError, ImportError):
print(traceback.format_exc(), file=sys.stderr)
print('>> You may need to install the ESRGAN and/or GFPGAN modules')
return gfpgan,codeformer,esrgan
def make_step_callback(gen, opt, prefix):
destination = os.path.join(opt.outdir,'intermediates',prefix)
os.makedirs(destination,exist_ok=True)

View File

@ -35,13 +35,14 @@ class DreamBase():
perlin: float = 0.0
sampler_name: string = 'klms'
seamless: bool = False
hires_fix: bool = False
model: str = None # The model to use (currently unused)
embeddings = None # The embeddings to use (currently unused)
progress_images: bool = False
# GFPGAN
enable_gfpgan: bool
gfpgan_strength: float = 0
facetool_strength: float = 0
# Upscale
enable_upscale: bool
@ -91,12 +92,13 @@ class DreamBase():
# model: str = None # The model to use (currently unused)
# embeddings = None # The embeddings to use (currently unused)
self.seamless = 'seamless' in j
self.hires_fix = 'hires_fix' in j
self.progress_images = 'progress_images' in j
# GFPGAN
self.enable_gfpgan = 'enable_gfpgan' in j and bool(j.get('enable_gfpgan'))
if self.enable_gfpgan:
self.gfpgan_strength = float(j.get('gfpgan_strength'))
self.facetool_strength = float(j.get('facetool_strength'))
# Upscale
self.enable_upscale = 'enable_upscale' in j and bool(j.get('enable_upscale'))

View File

@ -334,11 +334,11 @@ class GeneratorService:
# TODO: Support no generation (just upscaling/gfpgan)
upscale = None if not jobRequest.enable_upscale else jobRequest.upscale
gfpgan_strength = 0 if not jobRequest.enable_gfpgan else jobRequest.gfpgan_strength
facetool_strength = 0 if not jobRequest.enable_gfpgan else jobRequest.facetool_strength
if not jobRequest.enable_generate:
# If not generating, check if we're upscaling or running gfpgan
if not upscale and not gfpgan_strength:
if not upscale and not facetool_strength:
# Invalid settings (TODO: Add message to help user)
raise CanceledException()
@ -347,7 +347,7 @@ class GeneratorService:
self.__model.upscale_and_reconstruct(
image_list = [[image,0]],
upscale = upscale,
strength = gfpgan_strength,
strength = facetool_strength,
save_original = False,
image_callback = lambda image, seed, upscaled=False: self.__on_image_result(jobRequest, image, seed, upscaled))
@ -371,10 +371,11 @@ class GeneratorService:
steps = jobRequest.steps,
variation_amount = jobRequest.variation_amount,
with_variations = jobRequest.with_variations,
gfpgan_strength = gfpgan_strength,
facetool_strength = facetool_strength,
upscale = upscale,
sampler_name = jobRequest.sampler_name,
seamless = jobRequest.seamless,
hires_fix = jobRequest.hires_fix,
embiggen = jobRequest.embiggen,
embiggen_tiles = jobRequest.embiggen_tiles,
step_callback = lambda sample, step: self.__on_progress(jobRequest, sample, step),

View File

@ -144,8 +144,8 @@
<input type="checkbox" name="enable_gfpgan" id="enable_gfpgan">
<label for="enable_gfpgan">Enable gfpgan</label>
</legend>
<label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength:</label>
<input value="0.8" min="0" max="1" type="number" id="gfpgan_strength" name="gfpgan_strength" step="0.05">
<label title="Strength of the gfpgan (face fixing) algorithm." for="facetool_strength">GPFGAN Strength:</label>
<input value="0.8" min="0" max="1" type="number" id="facetool_strength" name="facetool_strength" step="0.05">
</fieldset>
<fieldset id="upscale">
<legend>

View File

@ -100,8 +100,8 @@
</fieldset>
<fieldset id="gfpgan">
<div class="section-header">Post-processing options</div>
<label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength (0 to disable):</label>
<input value="0.0" min="0" max="1" type="number" id="gfpgan_strength" name="gfpgan_strength" step="0.1">
<label title="Strength of the gfpgan (face fixing) algorithm." for="facetool_strength">GPFGAN Strength (0 to disable):</label>
<input value="0.0" min="0" max="1" type="number" id="facetool_strength" name="facetool_strength" step="0.1">
<label title="Upscaling to perform using ESRGAN." for="upscale_level">Upscaling Level</label>
<select id="upscale_level" name="upscale_level" value="">
<option value="" selected>None</option>